diff --git "a/training.log" "b/training.log"
new file mode 100644--- /dev/null
+++ "b/training.log"
@@ -0,0 +1,37700 @@
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
+  warnings.warn(
+[2023-04-14 08:32:38,890] [WARNING] [runner.py:190:fetch_hostfile] Unable to find hostfile, will proceed with training with local resources only.
+[2023-04-14 08:32:39,797] [INFO] [runner.py:540:main] cmd = /home/minutiae/.conda/envs/py39/bin/python -u -m deepspeed.launcher.launch --world_info=eyJsb2NhbGhvc3QiOiBbMCwgMSwgMiwgMywgNCwgNSwgNiwgN119 --master_addr=127.0.0.1 --master_port=12346 --enable_each_rank_log=None main.py --data_path Dahoas/rm-static Dahoas/full-hh-rlhf Dahoas/synthetic-instruct-gptj-pairwise yitingxie/rlhf-reward-datasets openai/webgpt_comparisons stanfordnlp/SHP --data_split 2,4,4 --actor_model_name_or_path /lus/grand/projects/BNN-Scale/chatgpt/hf_runs/DeepSpeedExamples/applications/DeepSpeed-Chat/output/actor-models/1.3b --critic_model_name_or_path /lus/grand/projects/BNN-Scale/chatgpt/hf_runs/DeepSpeedExamples/applications/DeepSpeed-Chat/output/reward-models/350m --num_padding_at_beginning 1 --per_device_train_batch_size 4 --per_device_mini_train_batch_size 4 --generation_batch_numbers 1 --ppo_epochs 1 --max_answer_seq_len 256 --max_prompt_seq_len 256 --actor_learning_rate 9.65e-6 --critic_learning_rate 5e-6 --actor_weight_decay 0.1 --critic_weight_decay 0.1 --num_train_epochs 1 --lr_scheduler_type cosine --gradient_accumulation_steps 1 --num_warmup_steps 100 --deepspeed --seed 1234 --enable_hybrid_engine --actor_zero_stage 2 --critic_zero_stage 2 --output_dir /lus/grand/projects/BNN-Scale/chatgpt/hf_runs/DeepSpeedExamples/applications/DeepSpeed-Chat/output/step3-models/1.3b
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
+  warnings.warn(
+[2023-04-14 08:32:50,431] [INFO] [launch.py:229:main] WORLD INFO DICT: {'localhost': [0, 1, 2, 3, 4, 5, 6, 7]}
+[2023-04-14 08:32:50,884] [INFO] [launch.py:235:main] nnodes=1, num_local_procs=8, node_rank=0
+[2023-04-14 08:32:50,884] [INFO] [launch.py:246:main] global_rank_mapping=defaultdict(<class 'list'>, {'localhost': [0, 1, 2, 3, 4, 5, 6, 7]})
+[2023-04-14 08:32:50,884] [INFO] [launch.py:247:main] dist_world_size=8
+[2023-04-14 08:32:50,884] [INFO] [launch.py:249:main] Setting CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
+  warnings.warn(
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
+  warnings.warn(
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
+  warnings.warn(
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
+  warnings.warn(
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
+  warnings.warn(
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
+  warnings.warn(
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
+  warnings.warn(
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
+  warnings.warn(
+[2023-04-14 08:35:14,863] [INFO] [comm.py:586:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl
+Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+  0%|          | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 64.08it/s]
+100%|██████████| 2/2 [00:00<00:00, 690.82it/s]
+Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 571.39it/s]
+Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 716.85it/s]
+Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 662.45it/s]
+Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 746.78it/s]
+Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 679.02it/s]
+Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 639.18it/s]
+Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+  0%|          | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+  0%|          | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+100%|██████████| 2/2 [00:00<00:00,  6.89it/s]100%|██████████| 2/2 [00:00<00:00,  6.89it/s]
+100%|██████████| 2/2 [00:00<00:00, 156.58it/s]
+  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 26.93it/s]
+Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 666.77it/s]
+Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 23.94it/s]
+Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+  0%|          | 0/2 [00:00<?, ?it/s] 50%|█████     | 1/2 [00:00<00:00,  1.18it/s]100%|██████████| 2/2 [00:00<00:00,  2.21it/s]
+Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+  0%|          | 0/2 [00:00<?, ?it/s] 50%|█████     | 1/2 [00:00<00:00,  7.78it/s]  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:03<00:00,  2.20s/it]100%|██████████| 2/2 [00:04<00:00,  2.01s/it]
+ 50%|█████     | 1/2 [00:02<00:02,  2.33s/it]100%|██████████| 2/2 [00:02<00:00,  1.08s/it]100%|██████████| 2/2 [00:02<00:00,  1.27s/it]
+Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+  0%|          | 0/1 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+100%|██████████| 1/1 [00:00<00:00, 15.08it/s]
+  0%|          | 0/1 [00:00<?, ?it/s]100%|██████████| 1/1 [00:00<00:00, 681.67it/s]
+Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+  0%|          | 0/1 [00:00<?, ?it/s]100%|██████████| 1/1 [00:00<00:00, 15.83it/s]
+Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+  0%|          | 0/1 [00:00<?, ?it/s]100%|██████████| 1/1 [00:00<00:00, 564.36it/s]
+Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+  0%|          | 0/1 [00:00<?, ?it/s]100%|██████████| 1/1 [00:00<00:00, 683.11it/s]
+Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+  0%|          | 0/1 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+100%|██████████| 1/1 [00:07<00:00,  7.49s/it]100%|██████████| 1/1 [00:07<00:00,  7.66s/it]
+  0%|          | 0/1 [00:00<?, ?it/s]100%|██████████| 1/1 [00:00<00:00, 475.71it/s]
+Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+  0%|          | 0/1 [00:00<?, ?it/s]100%|██████████| 1/1 [00:00<00:00,  3.12it/s]100%|██████████| 1/1 [00:00<00:00,  3.12it/s]
+Found cached dataset parquet (/grand/projects/BNN-Scale/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+  0%|          | 0/2 [00:00<?, ?it/s] 50%|█████     | 1/2 [00:00<00:00,  5.79it/s]100%|██████████| 2/2 [00:00<00:00,  2.84it/s]100%|██████████| 2/2 [00:00<00:00,  3.08it/s]
+Found cached dataset parquet (/grand/projects/BNN-Scale/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+Found cached dataset parquet (/grand/projects/BNN-Scale/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+  0%|          | 0/2 [00:00<?, ?it/s]  0%|          | 0/2 [00:00<?, ?it/s] 50%|█████     | 1/2 [00:00<00:00,  1.81it/s]100%|██████████| 2/2 [00:00<00:00,  3.11it/s]
+100%|██████████| 2/2 [00:00<00:00, 21.36it/s]
+Found cached dataset parquet (/grand/projects/BNN-Scale/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+  0%|          | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+ 50%|█████     | 1/2 [00:05<00:05,  5.17s/it]100%|██████████| 2/2 [00:05<00:00,  2.35s/it]100%|██████████| 2/2 [00:05<00:00,  2.78s/it]
+  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 223.68it/s]
+Found cached dataset parquet (/grand/projects/BNN-Scale/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 751.26it/s]
+Found cached dataset parquet (/grand/projects/BNN-Scale/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+  0%|          | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+100%|██████████| 2/2 [00:00<00:00, 22.35it/s]
+  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 756.48it/s]
+Found cached dataset webgpt_comparisons (/grand/projects/BNN-Scale/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
+  0%|          | 0/1 [00:00<?, ?it/s]Found cached dataset webgpt_comparisons (/grand/projects/BNN-Scale/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
+  0%|          | 0/1 [00:00<?, ?it/s]100%|██████████| 1/1 [00:00<00:00, 67.81it/s]
+100%|██████████| 1/1 [00:00<00:00, 278.17it/s]
+Found cached dataset webgpt_comparisons (/grand/projects/BNN-Scale/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
+  0%|          | 0/1 [00:00<?, ?it/s]100%|██████████| 1/1 [00:00<00:00, 293.12it/s]
+Found cached dataset webgpt_comparisons (/grand/projects/BNN-Scale/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
+  0%|          | 0/1 [00:00<?, ?it/s]100%|██████████| 1/1 [00:00<00:00, 276.56it/s]
+Found cached dataset webgpt_comparisons (/grand/projects/BNN-Scale/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
+  0%|          | 0/1 [00:00<?, ?it/s]100%|██████████| 1/1 [00:00<00:00, 169.24it/s]
+Found cached dataset webgpt_comparisons (/grand/projects/BNN-Scale/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
+  0%|          | 0/1 [00:00<?, ?it/s]Found cached dataset webgpt_comparisons (/grand/projects/BNN-Scale/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
+100%|██████████| 1/1 [00:00<00:00, 289.58it/s]
+  0%|          | 0/1 [00:00<?, ?it/s]100%|██████████| 1/1 [00:00<00:00, 289.08it/s]
+Found cached dataset webgpt_comparisons (/grand/projects/BNN-Scale/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
+  0%|          | 0/1 [00:00<?, ?it/s]100%|██████████| 1/1 [00:08<00:00,  8.44s/it]100%|██████████| 1/1 [00:09<00:00,  9.61s/it]
+Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
+  0%|          | 0/3 [00:00<?, ?it/s]Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
+ 33%|███▎      | 1/3 [00:15<00:30, 15.43s/it]  0%|          | 0/3 [00:00<?, ?it/s] 67%|██████▋   | 2/3 [00:20<00:09,  9.35s/it]100%|██████████| 3/3 [00:23<00:00,  6.47s/it]100%|██████████| 3/3 [00:23<00:00,  7.85s/it]
+ 33%|███▎      | 1/3 [00:03<00:06,  3.12s/it] 67%|██████▋   | 2/3 [00:03<00:01,  1.36s/it]100%|██████████| 3/3 [00:03<00:00,  1.08s/it]
+Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
+  0%|          | 0/3 [00:00<?, ?it/s]Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
+  0%|          | 0/3 [00:00<?, ?it/s] 33%|███▎      | 1/3 [00:09<00:19,  9.58s/it]Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
+ 33%|███▎      | 1/3 [00:00<00:00,  8.48it/s]100%|██████████| 3/3 [00:00<00:00, 18.45it/s]
+100%|██████████| 3/3 [00:09<00:00,  3.22s/it]
+  0%|          | 0/3 [00:00<?, ?it/s]100%|██████████| 3/3 [00:00<00:00, 241.71it/s]
+Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
+Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
+  0%|          | 0/3 [00:00<?, ?it/s]  0%|          | 0/3 [00:00<?, ?it/s] 33%|███▎      | 1/3 [00:05<00:10,  5.20s/it] 33%|███▎      | 1/3 [00:02<00:05,  2.93s/it] 67%|██████▋   | 2/3 [00:05<00:02,  2.29s/it] 67%|██████▋   | 2/3 [00:03<00:01,  1.35s/it]100%|██████████| 3/3 [00:03<00:00,  1.22it/s]100%|██████████| 3/3 [00:05<00:00,  1.33s/it]100%|██████████| 3/3 [00:03<00:00,  1.12s/it]100%|██████████| 3/3 [00:05<00:00,  1.88s/it]
+
+Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
+  0%|          | 0/3 [00:00<?, ?it/s]100%|██████████| 3/3 [00:00<00:00, 261.87it/s]
+************************[start] Initializing Actor Model [start] *************************
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+	- Avoid using `tokenizers` before the fork if possible
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combinationInstalled CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Detected CUDA files, patching ldflags
+Emitting ninja build file /home/minutiae/.cache/torch_extensions/py39_cu113/fused_adam/build.ninja...
+Building extension module fused_adam...
+Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+ninja: no work to do.
+Loading extension module fused_adam...
+Time to load fused_adam op: 4.954506158828735 seconds
+Loading extension module fused_adam...
+Time to load fused_adam op: 4.809650182723999 seconds
+Loading extension module fused_adam...
+Loading extension module fused_adam...
+Loading extension module fused_adam...
+Loading extension module fused_adam...
+Loading extension module fused_adam...
+Time to load fused_adam op: 5.013200044631958 seconds
+Time to load fused_adam op: 5.013401508331299 seconds
+Time to load fused_adam op: 5.013464689254761 seconds
+Time to load fused_adam op: 5.013665199279785 seconds
+Time to load fused_adam op: 5.014423608779907 seconds
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Detected CUDA files, patching ldflags
+Emitting ninja build file /home/minutiae/.cache/torch_extensions/py39_cu113/fused_adam/build.ninja...
+Building extension module fused_adam...
+Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+ninja: no work to do.
+Loading extension module fused_adam...
+Time to load fused_adam op: 7.266183376312256 seconds
+[2023-04-14 08:44:02,458] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed info: version=0.9.0, git-hash=unknown, git-branch=unknown
+[2023-04-14 08:44:08,781] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False
+[2023-04-14 08:44:13,170] [INFO] [logging.py:96:log_dist] [Rank 0] Removing param_group that has no 'params' in the client Optimizer
+[2023-04-14 08:44:13,170] [INFO] [logging.py:96:log_dist] [Rank 0] Using client Optimizer as basic optimizer
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+[2023-04-14 08:44:13,193] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Basic Optimizer = FusedAdam
+[2023-04-14 08:44:13,193] [INFO] [utils.py:51:is_zero_supported_optimizer] Checking ZeRO support for optimizer=FusedAdam type=<class 'deepspeed.ops.adam.fused_adam.FusedAdam'>
+[2023-04-14 08:44:13,193] [INFO] [logging.py:96:log_dist] [Rank 0] Creating torch.float16 ZeRO stage 2 optimizer
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+[2023-04-14 08:44:13,703] [INFO] [stage_1_and_2.py:133:__init__] Reduce bucket size 500,000,000
+[2023-04-14 08:44:13,704] [INFO] [stage_1_and_2.py:134:__init__] Allgather bucket size 500,000,000
+[2023-04-14 08:44:13,704] [INFO] [stage_1_and_2.py:135:__init__] CPU Offload: False
+[2023-04-14 08:44:13,704] [INFO] [stage_1_and_2.py:136:__init__] Round robin gradient partitioning: False
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Emitting ninja build file /home/minutiae/.cache/torch_extensions/py39_cu113/utils/build.ninja...
+Building extension module utils...
+Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+ninja: no work to do.
+Loading extension module utils...
+Time to load utils op: 7.119100570678711 seconds
+Loading extension module utils...
+Time to load utils op: 6.615035772323608 seconds
+Loading extension module utils...
+Loading extension module utils...
+Time to load utils op: 7.214683532714844 seconds
+Loading extension module utils...
+Loading extension module utils...
+Loading extension module utils...
+Loading extension module utils...
+Time to load utils op: 7.2167649269104 seconds
+Time to load utils op: 7.216015338897705 seconds
+Time to load utils op: 7.218137264251709 seconds
+Time to load utils op: 7.217472076416016 seconds
+Time to load utils op: 7.218402147293091 seconds
+Rank: 0 partition count [8, 8] and sizes[(164401920, False), (67840, False)] 
+Rank: 1 partition count [8, 8] and sizes[(164401920, False), (67840, False)] 
+Rank: 4 partition count [8, 8] and sizes[(164401920, False), (67840, False)] 
+Rank: 6 partition count [8, 8] and sizes[(164401920, False), (67840, False)] 
+Rank: 3 partition count [8, 8] and sizes[(164401920, False), (67840, False)] 
+Rank: 2 partition count [8, 8] and sizes[(164401920, False), (67840, False)] 
+Rank: 7 partition count [8, 8] and sizes[(164401920, False), (67840, False)] 
+Rank: 5 partition count [8, 8] and sizes[(164401920, False), (67840, False)] 
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Time to load utils op: 0.0018334388732910156 seconds
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Time to load utils op: 0.0009965896606445312 seconds
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.001102447509765625 seconds
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.0010309219360351562 seconds
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.0010993480682373047 seconds
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Time to load utils op: 0.0011150836944580078 seconds
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.0014064311981201172 seconds
+[2023-04-14 08:44:34,254] [INFO] [utils.py:785:see_memory_usage] Before initializing optimizer states
+[2023-04-14 08:44:34,255] [INFO] [utils.py:786:see_memory_usage] MA 3.06 GB         Max_MA 3.06 GB         CA 3.07 GB         Max_CA 3 GB 
+[2023-04-14 08:44:34,255] [INFO] [utils.py:793:see_memory_usage] CPU Virtual Memory:  used = 111.88 GB, percent = 11.1%
+[2023-04-14 08:44:34,948] [INFO] [utils.py:785:see_memory_usage] After initializing optimizer states
+[2023-04-14 08:44:34,949] [INFO] [utils.py:786:see_memory_usage] MA 4.29 GB         Max_MA 4.91 GB         CA 4.91 GB         Max_CA 5 GB 
+[2023-04-14 08:44:34,949] [INFO] [utils.py:793:see_memory_usage] CPU Virtual Memory:  used = 111.87 GB, percent = 11.1%
+[2023-04-14 08:44:34,949] [INFO] [stage_1_and_2.py:489:__init__] optimizer state initialized
+[2023-04-14 08:44:35,656] [INFO] [utils.py:785:see_memory_usage] After initializing ZeRO optimizer
+[2023-04-14 08:44:35,657] [INFO] [utils.py:786:see_memory_usage] MA 4.29 GB         Max_MA 4.29 GB         CA 4.91 GB         Max_CA 5 GB 
+[2023-04-14 08:44:35,657] [INFO] [utils.py:793:see_memory_usage] CPU Virtual Memory:  used = 111.85 GB, percent = 11.1%
+[2023-04-14 08:44:35,659] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Final Optimizer = FusedAdam
+[2023-04-14 08:44:35,659] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed using client LR scheduler
+[2023-04-14 08:44:35,659] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed LR Scheduler = <torch.optim.lr_scheduler.LambdaLR object at 0x7f0590761a30>
+[2023-04-14 08:44:35,659] [INFO] [logging.py:96:log_dist] [Rank 0] step=0, skipped=0, lr=[0.0, 0.0], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 08:44:35,660] [INFO] [config.py:953:print] DeepSpeedEngine configuration:
+[2023-04-14 08:44:35,660] [INFO] [config.py:957:print]   activation_checkpointing_config  {
+    "partition_activations": false, 
+    "contiguous_memory_optimization": false, 
+    "cpu_checkpointing": false, 
+    "number_checkpoints": null, 
+    "synchronize_checkpoint_boundary": false, 
+    "profile": false
+}
+[2023-04-14 08:44:35,660] [INFO] [config.py:957:print]   aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True}
+[2023-04-14 08:44:35,660] [INFO] [config.py:957:print]   amp_enabled .................. False
+[2023-04-14 08:44:35,660] [INFO] [config.py:957:print]   amp_params ................... False
+[2023-04-14 08:44:35,660] [INFO] [config.py:957:print]   autotuning_config ............ {
+    "enabled": false, 
+    "start_step": null, 
+    "end_step": null, 
+    "metric_path": null, 
+    "arg_mappings": null, 
+    "metric": "throughput", 
+    "model_info": null, 
+    "results_dir": "autotuning_results", 
+    "exps_dir": "autotuning_exps", 
+    "overwrite": true, 
+    "fast": true, 
+    "start_profile_step": 3, 
+    "end_profile_step": 5, 
+    "tuner_type": "gridsearch", 
+    "tuner_early_stopping": 5, 
+    "tuner_num_trials": 50, 
+    "model_info_path": null, 
+    "mp_size": 1, 
+    "max_train_batch_size": null, 
+    "min_train_batch_size": 1, 
+    "max_train_micro_batch_size_per_gpu": 1.024000e+03, 
+    "min_train_micro_batch_size_per_gpu": 1, 
+    "num_tuning_micro_batch_sizes": 3
+}
+[2023-04-14 08:44:35,660] [INFO] [config.py:957:print]   bfloat16_enabled ............. False
+[2023-04-14 08:44:35,660] [INFO] [config.py:957:print]   checkpoint_parallel_write_pipeline  False
+[2023-04-14 08:44:35,660] [INFO] [config.py:957:print]   checkpoint_tag_validation_enabled  True
+[2023-04-14 08:44:35,660] [INFO] [config.py:957:print]   checkpoint_tag_validation_fail  False
+[2023-04-14 08:44:35,660] [INFO] [config.py:957:print]   comms_config ................. <deepspeed.comm.config.DeepSpeedCommsConfig object at 0x7f04cfff4fd0>
+[2023-04-14 08:44:35,660] [INFO] [config.py:957:print]   communication_data_type ...... None
+[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}}
+[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   curriculum_enabled_legacy .... False
+[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   curriculum_params_legacy ..... False
+[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   data_efficiency_config ....... {'enabled': False, 'seed': 1234, 'data_sampling': {'enabled': False, 'num_epochs': 1000, 'num_workers': 0, 'curriculum_learning': {'enabled': False}}, 'data_routing': {'enabled': False, 'random_ltd': {'enabled': False, 'layer_token_lr_schedule': {'enabled': False}}}}
+[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   data_efficiency_enabled ...... False
+[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   dataloader_drop_last ......... False
+[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   disable_allgather ............ False
+[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   dump_state ................... False
+[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   dynamic_loss_scale_args ...... {'init_scale': 65536, 'scale_window': 100, 'delayed_shift': 2, 'min_scale': 1}
+[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   eigenvalue_enabled ........... False
+[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   eigenvalue_gas_boundary_resolution  1
+[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   eigenvalue_layer_name ........ bert.encoder.layer
+[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   eigenvalue_layer_num ......... 0
+[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   eigenvalue_max_iter .......... 100
+[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   eigenvalue_stability ......... 1e-06
+[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   eigenvalue_tol ............... 0.01
+[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   eigenvalue_verbose ........... False
+[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   elasticity_enabled ........... False
+[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   flops_profiler_config ........ {
+    "enabled": false, 
+    "profile_step": 1, 
+    "module_depth": -1, 
+    "top_modules": 1, 
+    "detailed": true, 
+    "output_file": null
+}
+[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   fp16_auto_cast ............... False
+[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   fp16_enabled ................. True
+[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   fp16_master_weights_and_gradients  False
+[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   global_rank .................. 0
+[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   grad_accum_dtype ............. None
+[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   gradient_accumulation_steps .. 1
+[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   gradient_clipping ............ 1.0
+[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   gradient_predivide_factor .... 1.0
+[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   hybrid_engine ................ enabled=True max_out_tokens=512 inference_tp_size=1 release_inference_cache=False pin_parameters=True tp_gather_partition_size=8
+[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   initial_dynamic_scale ........ 65536
+[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   load_universal_checkpoint .... False
+[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   loss_scale ................... 0
+[2023-04-14 08:44:35,661] [INFO] [config.py:957:print]   memory_breakdown ............. False
+[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   monitor_config ............... tensorboard=TensorBoardConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') wandb=WandbConfig(enabled=False, group=None, team=None, project='deepspeed') csv_monitor=CSVConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') enabled=False
+[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   nebula_config ................ {
+    "enabled": false, 
+    "persistent_storage_path": null, 
+    "persistent_time_interval": 100, 
+    "num_of_version_in_retention": 2, 
+    "enable_nebula_load": true, 
+    "load_path": null
+}
+[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   optimizer_legacy_fusion ...... False
+[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   optimizer_name ............... None
+[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   optimizer_params ............. None
+[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0}
+[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   pld_enabled .................. False
+[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   pld_params ................... False
+[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   prescale_gradients ........... False
+[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   scheduler_name ............... None
+[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   scheduler_params ............. None
+[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   sparse_attention ............. None
+[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   sparse_gradients_enabled ..... False
+[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   steps_per_print .............. 10
+[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   train_batch_size ............. 32
+[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   train_micro_batch_size_per_gpu  4
+[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   use_node_local_storage ....... False
+[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   wall_clock_breakdown ......... False
+[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   world_size ................... 8
+[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   zero_allow_untested_optimizer  False
+[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   zero_config .................. stage=2 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=500,000,000 allgather_partitions=True allgather_bucket_size=500,000,000 overlap_comm=False load_from_fp32_weights=True elastic_checkpoint=False offload_param=DeepSpeedZeroOffloadParamConfig(device='none', nvme_path=None, buffer_count=5, buffer_size=100,000,000, max_in_cpu=1,000,000,000, pin_memory=False) offload_optimizer=DeepSpeedZeroOffloadOptimizerConfig(device='none', nvme_path=None, buffer_count=4, pin_memory=False, pipeline=False, pipeline_read=False, pipeline_write=False, fast_init=False) sub_group_size=1,000,000,000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=30000000 param_persistence_threshold=10000 model_persistence_threshold=sys.maxsize max_live_parameters=30000000 max_reuse_distance=1,000,000,000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False memory_efficient_linear=False
+[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   zero_enabled ................. True
+[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   zero_force_ds_cpu_optimizer .. True
+[2023-04-14 08:44:35,662] [INFO] [config.py:957:print]   zero_optimization_stage ...... 2
+[2023-04-14 08:44:35,662] [INFO] [config.py:943:print_user_config]   json = {
+    "train_batch_size": 32, 
+    "train_micro_batch_size_per_gpu": 4, 
+    "steps_per_print": 10, 
+    "zero_optimization": {
+        "stage": 2, 
+        "offload_param": {
+            "device": "none"
+        }, 
+        "offload_optimizer": {
+            "device": "none"
+        }, 
+        "stage3_param_persistence_threshold": 1.000000e+04, 
+        "stage3_max_live_parameters": 3.000000e+07, 
+        "stage3_prefetch_bucket_size": 3.000000e+07, 
+        "memory_efficient_linear": false
+    }, 
+    "fp16": {
+        "enabled": true, 
+        "loss_scale_window": 100
+    }, 
+    "gradient_clipping": 1.0, 
+    "prescale_gradients": false, 
+    "wall_clock_breakdown": false, 
+    "hybrid_engine": {
+        "enabled": true, 
+        "inference_tp_size": 1, 
+        "release_inference_cache": false, 
+        "pin_parameters": true, 
+        "tp_gather_partition_size": 8
+    }
+}
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.0014083385467529297 seconds
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combinationUsing /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Detected CUDA files, patching ldflags
+Emitting ninja build file /home/minutiae/.cache/torch_extensions/py39_cu113/transformer_inference/build.ninja...
+Building extension module transformer_inference...
+Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+[1/9] /usr/local/cuda-11.4/bin/nvcc  -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/minutiae/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/dequantize.cu -o dequantize.cuda.o 
+[2/9] /usr/local/cuda-11.4/bin/nvcc  -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/minutiae/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/gelu.cu -o gelu.cuda.o 
+[3/9] /usr/local/cuda-11.4/bin/nvcc  -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/minutiae/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/relu.cu -o relu.cuda.o 
+[4/9] /usr/local/cuda-11.4/bin/nvcc  -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/minutiae/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/apply_rotary_pos_emb.cu -o apply_rotary_pos_emb.cuda.o 
+[5/9] /usr/local/cuda-11.4/bin/nvcc  -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/minutiae/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu -o transform.cuda.o 
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu(57): warning: variable "lane" was declared but never referenced
+
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu(94): warning: variable "half_dim" was declared but never referenced
+
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu(111): warning: variable "vals_half" was declared but never referenced
+
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu(112): warning: variable "output_half" was declared but never referenced
+
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu(129): warning: variable "lane" was declared but never referenced
+
+[6/9] /usr/local/cuda-11.4/bin/nvcc  -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/minutiae/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/softmax.cu -o softmax.cuda.o 
+[7/9] /usr/local/cuda-11.4/bin/nvcc  -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/minutiae/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu -o layer_norm.cuda.o 
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
+          detected during:
+            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=1, maxThreads=256]" 
+(166): here
+            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
+(192): here
+
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
+          detected during:
+            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=1, maxThreads=256]" 
+(166): here
+            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
+(192): here
+
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
+          detected during:
+            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=2, maxThreads=256]" 
+(168): here
+            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
+(192): here
+
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
+          detected during:
+            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=2, maxThreads=256]" 
+(168): here
+            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
+(192): here
+
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
+          detected during:
+            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=4, maxThreads=256]" 
+(170): here
+            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
+(192): here
+
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
+          detected during:
+            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=4, maxThreads=256]" 
+(170): here
+            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
+(192): here
+
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
+          detected during:
+            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=8, maxThreads=256]" 
+(172): here
+            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
+(192): here
+
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
+          detected during:
+            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=8, maxThreads=256]" 
+(172): here
+            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
+(192): here
+
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
+          detected during:
+            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=16, maxThreads=256]" 
+(174): here
+            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
+(192): here
+
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
+          detected during:
+            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=16, maxThreads=256]" 
+(174): here
+            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
+(192): here
+
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
+          detected during:
+            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=2, threadsPerGroup=256, maxThreads=256]" 
+(179): here
+            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
+(192): here
+
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
+          detected during:
+            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=2, threadsPerGroup=256, maxThreads=256]" 
+(179): here
+            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
+(192): here
+
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
+          detected during:
+            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=4, threadsPerGroup=256, maxThreads=256]" 
+(182): here
+            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
+(192): here
+
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
+          detected during:
+            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=4, threadsPerGroup=256, maxThreads=256]" 
+(182): here
+            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
+(192): here
+
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
+          detected during:
+            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=6, threadsPerGroup=256, maxThreads=256]" 
+(185): here
+            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
+(192): here
+
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
+          detected during:
+            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=6, threadsPerGroup=256, maxThreads=256]" 
+(185): here
+            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
+(192): here
+
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
+          detected during:
+            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=8, threadsPerGroup=256, maxThreads=256]" 
+(188): here
+            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
+(192): here
+
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
+          detected during:
+            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=8, threadsPerGroup=256, maxThreads=256]" 
+(188): here
+            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
+(192): here
+
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
+          detected during:
+            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=1, maxThreads=256]" 
+(166): here
+            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
+(200): here
+
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
+          detected during:
+            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=1, maxThreads=256]" 
+(166): here
+            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
+(200): here
+
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
+          detected during:
+            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=2, maxThreads=256]" 
+(168): here
+            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
+(200): here
+
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
+          detected during:
+            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=2, maxThreads=256]" 
+(168): here
+            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
+(200): here
+
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
+          detected during:
+            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=4, maxThreads=256]" 
+(170): here
+            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
+(200): here
+
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
+          detected during:
+            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=4, maxThreads=256]" 
+(170): here
+            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
+(200): here
+
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
+          detected during:
+            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=8, maxThreads=256]" 
+(172): here
+            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
+(200): here
+
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
+          detected during:
+            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=8, maxThreads=256]" 
+(172): here
+            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
+(200): here
+
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
+          detected during:
+            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=16, maxThreads=256]" 
+(174): here
+            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
+(200): here
+
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
+          detected during:
+            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=16, maxThreads=256]" 
+(174): here
+            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
+(200): here
+
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
+          detected during:
+            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=4, threadsPerGroup=256, maxThreads=256]" 
+(179): here
+            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
+(200): here
+
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
+          detected during:
+            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=4, threadsPerGroup=256, maxThreads=256]" 
+(179): here
+            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
+(200): here
+
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
+          detected during:
+            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=8, threadsPerGroup=256, maxThreads=256]" 
+(182): here
+            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
+(200): here
+
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
+          detected during:
+            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=8, threadsPerGroup=256, maxThreads=256]" 
+(182): here
+            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
+(200): here
+
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
+          detected during:
+            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=12, threadsPerGroup=256, maxThreads=256]" 
+(185): here
+            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
+(200): here
+
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
+          detected during:
+            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=12, threadsPerGroup=256, maxThreads=256]" 
+(185): here
+            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
+(200): here
+
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
+          detected during:
+            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=16, threadsPerGroup=256, maxThreads=256]" 
+(188): here
+            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
+(200): here
+
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
+          detected during:
+            instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=16, threadsPerGroup=256, maxThreads=256]" 
+(188): here
+            instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
+(200): here
+
+[8/9] /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicxx -MMD -MF pt_binding.o.d -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/minutiae/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++14 -O3 -std=c++14 -g -Wno-reorder -c /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp -o pt_binding.o 
+In file included from /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:10:
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes/inference_context.h: In member function ‘void InferenceContext::GenWorkSpace(const unsigned int&, const unsigned int&, const size_t&, const size_t&, const size_t&, const unsigned int&, const bool&, const size_t&, const unsigned int&, unsigned int, unsigned int)’:
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes/inference_context.h:139:52: warning: format ‘%d’ expects argument of type ‘int’, but argument 2 has type ‘size_t’ {aka ‘long unsigned int’} [-Wformat=]
+  139 |                 "Allocatable workspace available (%d tokens) is less than minimum requested "
+      |                                                   ~^
+      |                                                    |
+      |                                                    int
+      |                                                   %ld
+  140 |                 "workspace (%d tokens)\n",
+  141 |                 _max_seq_len,
+      |                 ~~~~~~~~~~~~                        
+      |                 |
+      |                 size_t {aka long unsigned int}
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp: In instantiation of ‘std::vector<at::Tensor> ds_softmax_context(at::Tensor&, at::Tensor&, int, bool, bool, int, float, bool, bool, int, bool, unsigned int, unsigned int, at::Tensor&) [with T = float]’:
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:1750:99:   required from here
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:536:50: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
+  536 |                                      {hidden_dim * InferenceContext::Instance().GetMaxTokenLenght(),
+      |                                       ~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:536:50: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:537:41: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
+  537 |                                       k * InferenceContext::Instance().GetMaxTokenLenght(),
+      |                                       ~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:537:41: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:545:38: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
+  545 |                          {hidden_dim * InferenceContext::Instance().GetMaxTokenLenght(),
+      |                           ~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:545:38: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:546:29: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
+  546 |                           k * InferenceContext::Instance().GetMaxTokenLenght(),
+      |                           ~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:546:29: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp: In instantiation of ‘std::vector<at::Tensor> ds_softmax_context(at::Tensor&, at::Tensor&, int, bool, bool, int, float, bool, bool, int, bool, unsigned int, unsigned int, at::Tensor&) [with T = __half]’:
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:1753:49:   required from here
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:536:50: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
+  536 |                                      {hidden_dim * InferenceContext::Instance().GetMaxTokenLenght(),
+      |                                       ~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:536:50: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:537:41: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
+  537 |                                       k * InferenceContext::Instance().GetMaxTokenLenght(),
+      |                                       ~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:537:41: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:545:38: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
+  545 |                          {hidden_dim * InferenceContext::Instance().GetMaxTokenLenght(),
+      |                           ~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:545:38: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:546:29: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
+  546 |                           k * InferenceContext::Instance().GetMaxTokenLenght(),
+      |                           ~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:546:29: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
+[9/9] /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicxx pt_binding.o gelu.cuda.o relu.cuda.o layer_norm.cuda.o softmax.cuda.o dequantize.cuda.o apply_rotary_pos_emb.cuda.o transform.cuda.o -shared -lcurand -L/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/lib -lc10 -lc10_cuda -ltorch_cpu -ltorch_cuda_cu -ltorch_cuda_cpp -ltorch -ltorch_python -L/usr/local/cuda-11.4/lib64 -lcudart -o transformer_inference.so
+Loading extension module transformer_inference...
+Loading extension module transformer_inference...
+Time to load transformer_inference op: 33.57252836227417 seconds
+Time to load transformer_inference op: 33.4696843624115 seconds
+Loading extension module transformer_inference...
+Time to load transformer_inference op: 33.380457639694214 seconds
+Loading extension module transformer_inference...
+Loading extension module transformer_inference...
+Time to load transformer_inference op: 33.478936433792114 seconds
+Time to load transformer_inference op: 33.23845458030701 seconds
+Loading extension module transformer_inference...
+Time to load transformer_inference op: 32.77969670295715 seconds
+[2023-04-14 08:45:10,904] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed-Inference config: {'layer_id': 0, 'hidden_size': 2048, 'intermediate_size': 8192, 'heads': 32, 'num_hidden_layers': -1, 'fp16': True, 'pre_layer_norm': True, 'local_rank': -1, 'stochastic_mode': False, 'epsilon': 1e-05, 'mp_size': 1, 'q_int8': False, 'scale_attention': True, 'triangular_masking': True, 'local_attention': False, 'window_size': 1, 'rotary_dim': -1, 'rotate_half': False, 'rotate_every_two': True, 'return_tuple': True, 'mlp_after_attn': True, 'mlp_act_func_type': <ActivationFuncType.ReLU: 2>, 'specialized_mode': False, 'training_mp_size': 1, 'bigscience_bloom': False, 'max_out_tokens': 512, 'min_out_tokens': 512, 'scale_attn_by_inverse_layer_idx': False, 'enable_qkv_quantization': False, 'use_mup': False, 'return_single_tuple': False, 'set_empty_params': True, 'transposed_mode': True}
+Loading extension module transformer_inference...
+Time to load transformer_inference op: 33.499565839767456 seconds
+Loading extension module transformer_inference...
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Time to load transformer_inference op: 33.51667404174805 seconds
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module transformer_inference, skipping build step...
+Loading extension module transformer_inference...
+Time to load transformer_inference op: 0.17304539680480957 seconds
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module transformer_inference, skipping build step...
+Loading extension module transformer_inference...
+Time to load transformer_inference op: 0.2198350429534912 seconds
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module transformer_inference, skipping build step...
+Loading extension module transformer_inference...
+Time to load transformer_inference op: 0.23125028610229492 seconds
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module transformer_inference, skipping build step...
+Loading extension module transformer_inference...
+Time to load transformer_inference op: 0.23965048789978027 seconds
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module transformer_inference, skipping build step...
+Loading extension module transformer_inference...
+Time to load transformer_inference op: 0.24375414848327637 seconds
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module transformer_inference, skipping build step...
+Loading extension module transformer_inference...
+Time to load transformer_inference op: 0.25126194953918457 seconds
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module transformer_inference, skipping build step...
+Loading extension module transformer_inference...
+Time to load transformer_inference op: 0.31780099868774414 seconds
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module transformer_inference, skipping build step...
+Loading extension module transformer_inference...
+Time to load transformer_inference op: 0.1689906120300293 seconds
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+No modifications detected for re-loaded extension module transformer_inference, skipping build step...
+Loading extension module transformer_inference...
+Time to load transformer_inference op: 0.33678388595581055 seconds
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module transformer_inference, skipping build step...
+Loading extension module transformer_inference...
+Time to load transformer_inference op: 0.2203667163848877 seconds
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module transformer_inference, skipping build step...
+Loading extension module transformer_inference...
+Time to load transformer_inference op: 0.2368457317352295 seconds
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module transformer_inference, skipping build step...
+Loading extension module transformer_inference...
+Time to load transformer_inference op: 0.2534804344177246 seconds
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module transformer_inference, skipping build step...
+Loading extension module transformer_inference...
+Time to load transformer_inference op: 0.2447066307067871 seconds
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module transformer_inference, skipping build step...
+Loading extension module transformer_inference...
+Time to load transformer_inference op: 0.27658915519714355 seconds
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module transformer_inference, skipping build step...
+Loading extension module transformer_inference...
+Time to load transformer_inference op: 0.3282308578491211 seconds
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module transformer_inference, skipping build step...
+Loading extension module transformer_inference...
+Time to load transformer_inference op: 0.8675589561462402 seconds
+*****************[end] Initialized Actor Model [end] (duration: 119.75s)******************
+*************************[start] Initializing Ref Model [start] **************************
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 1.891409158706665 seconds
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.8250091075897217 seconds
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.0019402503967285156 seconds
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.0012993812561035156 seconds
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.0013997554779052734 seconds
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.0013012886047363281 seconds
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.0018343925476074219 seconds
+[2023-04-14 08:45:57,529] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed info: version=0.9.0, git-hash=unknown, git-branch=unknown
+[2023-04-14 08:46:12,698] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False
+[2023-04-14 08:46:13,190] [INFO] [config.py:953:print] DeepSpeedEngine configuration:
+[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   activation_checkpointing_config  {
+    "partition_activations": false, 
+    "contiguous_memory_optimization": false, 
+    "cpu_checkpointing": false, 
+    "number_checkpoints": null, 
+    "synchronize_checkpoint_boundary": false, 
+    "profile": false
+}
+[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True}
+[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   amp_enabled .................. False
+[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   amp_params ................... False
+[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   autotuning_config ............ {
+    "enabled": false, 
+    "start_step": null, 
+    "end_step": null, 
+    "metric_path": null, 
+    "arg_mappings": null, 
+    "metric": "throughput", 
+    "model_info": null, 
+    "results_dir": "autotuning_results", 
+    "exps_dir": "autotuning_exps", 
+    "overwrite": true, 
+    "fast": true, 
+    "start_profile_step": 3, 
+    "end_profile_step": 5, 
+    "tuner_type": "gridsearch", 
+    "tuner_early_stopping": 5, 
+    "tuner_num_trials": 50, 
+    "model_info_path": null, 
+    "mp_size": 1, 
+    "max_train_batch_size": null, 
+    "min_train_batch_size": 1, 
+    "max_train_micro_batch_size_per_gpu": 1.024000e+03, 
+    "min_train_micro_batch_size_per_gpu": 1, 
+    "num_tuning_micro_batch_sizes": 3
+}
+[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   bfloat16_enabled ............. False
+[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   checkpoint_parallel_write_pipeline  False
+[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   checkpoint_tag_validation_enabled  True
+[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   checkpoint_tag_validation_fail  False
+[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   comms_config ................. <deepspeed.comm.config.DeepSpeedCommsConfig object at 0x7f0511539820>
+[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   communication_data_type ...... None
+[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}}
+[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   curriculum_enabled_legacy .... False
+[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   curriculum_params_legacy ..... False
+[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   data_efficiency_config ....... {'enabled': False, 'seed': 1234, 'data_sampling': {'enabled': False, 'num_epochs': 1000, 'num_workers': 0, 'curriculum_learning': {'enabled': False}}, 'data_routing': {'enabled': False, 'random_ltd': {'enabled': False, 'layer_token_lr_schedule': {'enabled': False}}}}
+[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   data_efficiency_enabled ...... False
+[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   dataloader_drop_last ......... False
+[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   disable_allgather ............ False
+[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   dump_state ................... False
+[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   dynamic_loss_scale_args ...... None
+[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   eigenvalue_enabled ........... False
+[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   eigenvalue_gas_boundary_resolution  1
+[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   eigenvalue_layer_name ........ bert.encoder.layer
+[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   eigenvalue_layer_num ......... 0
+[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   eigenvalue_max_iter .......... 100
+[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   eigenvalue_stability ......... 1e-06
+[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   eigenvalue_tol ............... 0.01
+[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   eigenvalue_verbose ........... False
+[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   elasticity_enabled ........... False
+[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   flops_profiler_config ........ {
+    "enabled": false, 
+    "profile_step": 1, 
+    "module_depth": -1, 
+    "top_modules": 1, 
+    "detailed": true, 
+    "output_file": null
+}
+[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   fp16_auto_cast ............... False
+[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   fp16_enabled ................. True
+[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   fp16_master_weights_and_gradients  False
+[2023-04-14 08:46:13,191] [INFO] [config.py:957:print]   global_rank .................. 0
+[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   grad_accum_dtype ............. None
+[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   gradient_accumulation_steps .. 1
+[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   gradient_clipping ............ 1.0
+[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   gradient_predivide_factor .... 1.0
+[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   hybrid_engine ................ enabled=False max_out_tokens=512 inference_tp_size=1 release_inference_cache=False pin_parameters=True tp_gather_partition_size=8
+[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   initial_dynamic_scale ........ 65536
+[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   load_universal_checkpoint .... False
+[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   loss_scale ................... 0
+[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   memory_breakdown ............. False
+[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   monitor_config ............... tensorboard=TensorBoardConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') wandb=WandbConfig(enabled=False, group=None, team=None, project='deepspeed') csv_monitor=CSVConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') enabled=False
+[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   nebula_config ................ {
+    "enabled": false, 
+    "persistent_storage_path": null, 
+    "persistent_time_interval": 100, 
+    "num_of_version_in_retention": 2, 
+    "enable_nebula_load": true, 
+    "load_path": null
+}
+[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   optimizer_legacy_fusion ...... False
+[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   optimizer_name ............... None
+[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   optimizer_params ............. None
+[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0}
+[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   pld_enabled .................. False
+[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   pld_params ................... False
+[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   prescale_gradients ........... False
+[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   scheduler_name ............... None
+[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   scheduler_params ............. None
+[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   sparse_attention ............. None
+[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   sparse_gradients_enabled ..... False
+[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   steps_per_print .............. 10
+[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   train_batch_size ............. 32
+[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   train_micro_batch_size_per_gpu  4
+[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   use_node_local_storage ....... False
+[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   wall_clock_breakdown ......... False
+[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   world_size ................... 8
+[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   zero_allow_untested_optimizer  False
+[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   zero_config .................. stage=0 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=500,000,000 allgather_partitions=True allgather_bucket_size=500,000,000 overlap_comm=False load_from_fp32_weights=True elastic_checkpoint=False offload_param=DeepSpeedZeroOffloadParamConfig(device='none', nvme_path=None, buffer_count=5, buffer_size=100,000,000, max_in_cpu=1,000,000,000, pin_memory=False) offload_optimizer=None sub_group_size=1,000,000,000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=50,000,000 param_persistence_threshold=10000 model_persistence_threshold=sys.maxsize max_live_parameters=1,000,000,000 max_reuse_distance=1,000,000,000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False memory_efficient_linear=False
+[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   zero_enabled ................. False
+[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   zero_force_ds_cpu_optimizer .. True
+[2023-04-14 08:46:13,192] [INFO] [config.py:957:print]   zero_optimization_stage ...... 0
+[2023-04-14 08:46:13,192] [INFO] [config.py:943:print_user_config]   json = {
+    "train_batch_size": 32, 
+    "train_micro_batch_size_per_gpu": 4, 
+    "steps_per_print": 10, 
+    "zero_optimization": {
+        "stage": 0, 
+        "stage3_param_persistence_threshold": 1.000000e+04, 
+        "offload_param": {
+            "device": "none"
+        }, 
+        "memory_efficient_linear": false
+    }, 
+    "fp16": {
+        "enabled": true
+    }, 
+    "gradient_clipping": 1.0, 
+    "prescale_gradients": false, 
+    "wall_clock_breakdown": false
+}
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.0015451908111572266 seconds
+*******************[end] Initialized Ref Model [end] (duration: 59.70s)*******************
+************************[start] Initializing Critic Model [start] ************************
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module fused_adam, skipping build step...
+Loading extension module fused_adam...
+Time to load fused_adam op: 0.0030541419982910156 seconds
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module fused_adam, skipping build step...
+Loading extension module fused_adam...
+Time to load fused_adam op: 0.0025348663330078125 seconds
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module fused_adam, skipping build step...
+Loading extension module fused_adam...
+Time to load fused_adam op: 0.0019736289978027344 seconds
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module fused_adam, skipping build step...
+Loading extension module fused_adam...
+Time to load fused_adam op: 0.0029854774475097656 seconds
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module fused_adam, skipping build step...
+Loading extension module fused_adam...
+Time to load fused_adam op: 0.0022940635681152344 seconds
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module fused_adam, skipping build step...
+Loading extension module fused_adam...
+Time to load fused_adam op: 0.0029518604278564453 seconds
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module fused_adam, skipping build step...
+Loading extension module fused_adam...
+Time to load fused_adam op: 0.002866029739379883 seconds
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.0012390613555908203 seconds
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.003458738327026367 seconds
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module fused_adam, skipping build step...
+Loading extension module fused_adam...
+Time to load fused_adam op: 0.002884387969970703 seconds
+[2023-04-14 08:46:30,739] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed info: version=0.9.0, git-hash=unknown, git-branch=unknown
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.5726070404052734 seconds
+[2023-04-14 08:46:34,252] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False
+Rank: 6 partition count [8, 8] and sizes[(41365824, False), (33792, False)] 
+Rank: 7 partition count [8, 8] and sizes[(41365824, False), (33792, False)] 
+Rank: 1 partition count [8, 8] and sizes[(41365824, False), (33792, False)] 
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.0016791820526123047 seconds
+[2023-04-14 08:46:36,743] [INFO] [logging.py:96:log_dist] [Rank 0] Removing param_group that has no 'params' in the client Optimizer
+[2023-04-14 08:46:36,743] [INFO] [logging.py:96:log_dist] [Rank 0] Using client Optimizer as basic optimizer
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.002676725387573242 seconds
+[2023-04-14 08:46:36,762] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Basic Optimizer = FusedAdam
+[2023-04-14 08:46:36,762] [INFO] [utils.py:51:is_zero_supported_optimizer] Checking ZeRO support for optimizer=FusedAdam type=<class 'deepspeed.ops.adam.fused_adam.FusedAdam'>
+[2023-04-14 08:46:36,763] [INFO] [logging.py:96:log_dist] [Rank 0] Creating torch.float16 ZeRO stage 2 optimizer
+[2023-04-14 08:46:36,763] [INFO] [stage_1_and_2.py:133:__init__] Reduce bucket size 500,000,000
+[2023-04-14 08:46:36,763] [INFO] [stage_1_and_2.py:134:__init__] Allgather bucket size 500,000,000
+[2023-04-14 08:46:36,763] [INFO] [stage_1_and_2.py:135:__init__] CPU Offload: False
+[2023-04-14 08:46:36,763] [INFO] [stage_1_and_2.py:136:__init__] Round robin gradient partitioning: False
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.0006117820739746094 seconds
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.002028226852416992 seconds
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.0013179779052734375 seconds
+Rank: 0 partition count [8, 8] and sizes[(41365824, False), (33792, False)] 
+Rank: 3 partition count [8, 8] and sizes[(41365824, False), (33792, False)] 
+Rank: 4 partition count [8, 8] and sizes[(41365824, False), (33792, False)] 
+Rank: 5 partition count [8, 8] and sizes[(41365824, False), (33792, False)] 
+Rank: 2 partition count [8, 8] and sizes[(41365824, False), (33792, False)] 
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...Time to load utils op: 0.0013089179992675781 seconds
+
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.0011861324310302734 seconds
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.0008509159088134766 secondsUsing /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.0007336139678955078 seconds
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...Time to load utils op: 0.0008528232574462891 seconds
+
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.0011703968048095703 seconds
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.0012271404266357422 seconds
+[2023-04-14 08:46:42,781] [INFO] [utils.py:785:see_memory_usage] Before initializing optimizer states
+[2023-04-14 08:46:42,782] [INFO] [utils.py:786:see_memory_usage] MA 8.1 GB         Max_MA 8.1 GB         CA 8.29 GB         Max_CA 8 GB 
+[2023-04-14 08:46:42,782] [INFO] [utils.py:793:see_memory_usage] CPU Virtual Memory:  used = 125.79 GB, percent = 12.5%
+[2023-04-14 08:46:43,491] [INFO] [utils.py:785:see_memory_usage] After initializing optimizer states
+[2023-04-14 08:46:43,491] [INFO] [utils.py:786:see_memory_usage] MA 8.41 GB         Max_MA 8.56 GB         CA 8.75 GB         Max_CA 9 GB 
+[2023-04-14 08:46:43,492] [INFO] [utils.py:793:see_memory_usage] CPU Virtual Memory:  used = 127.29 GB, percent = 12.6%
+[2023-04-14 08:46:43,492] [INFO] [stage_1_and_2.py:489:__init__] optimizer state initialized
+[2023-04-14 08:46:44,198] [INFO] [utils.py:785:see_memory_usage] After initializing ZeRO optimizer
+[2023-04-14 08:46:44,198] [INFO] [utils.py:786:see_memory_usage] MA 8.41 GB         Max_MA 8.41 GB         CA 8.75 GB         Max_CA 9 GB 
+[2023-04-14 08:46:44,199] [INFO] [utils.py:793:see_memory_usage] CPU Virtual Memory:  used = 129.3 GB, percent = 12.8%
+[2023-04-14 08:46:44,200] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Final Optimizer = FusedAdam
+[2023-04-14 08:46:44,200] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed using client LR scheduler
+[2023-04-14 08:46:44,200] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed LR Scheduler = <torch.optim.lr_scheduler.LambdaLR object at 0x7f05908fb7c0>
+[2023-04-14 08:46:44,200] [INFO] [logging.py:96:log_dist] [Rank 0] step=0, skipped=0, lr=[0.0, 0.0], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 08:46:44,201] [INFO] [config.py:953:print] DeepSpeedEngine configuration:
+[2023-04-14 08:46:44,201] [INFO] [config.py:957:print]   activation_checkpointing_config  {
+    "partition_activations": false, 
+    "contiguous_memory_optimization": false, 
+    "cpu_checkpointing": false, 
+    "number_checkpoints": null, 
+    "synchronize_checkpoint_boundary": false, 
+    "profile": false
+}
+[2023-04-14 08:46:44,201] [INFO] [config.py:957:print]   aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True}
+[2023-04-14 08:46:44,201] [INFO] [config.py:957:print]   amp_enabled .................. False
+[2023-04-14 08:46:44,201] [INFO] [config.py:957:print]   amp_params ................... False
+[2023-04-14 08:46:44,201] [INFO] [config.py:957:print]   autotuning_config ............ {
+    "enabled": false, 
+    "start_step": null, 
+    "end_step": null, 
+    "metric_path": null, 
+    "arg_mappings": null, 
+    "metric": "throughput", 
+    "model_info": null, 
+    "results_dir": "autotuning_results", 
+    "exps_dir": "autotuning_exps", 
+    "overwrite": true, 
+    "fast": true, 
+    "start_profile_step": 3, 
+    "end_profile_step": 5, 
+    "tuner_type": "gridsearch", 
+    "tuner_early_stopping": 5, 
+    "tuner_num_trials": 50, 
+    "model_info_path": null, 
+    "mp_size": 1, 
+    "max_train_batch_size": null, 
+    "min_train_batch_size": 1, 
+    "max_train_micro_batch_size_per_gpu": 1.024000e+03, 
+    "min_train_micro_batch_size_per_gpu": 1, 
+    "num_tuning_micro_batch_sizes": 3
+}
+[2023-04-14 08:46:44,208] [INFO] [config.py:957:print]   bfloat16_enabled ............. False
+[2023-04-14 08:46:44,208] [INFO] [config.py:957:print]   checkpoint_parallel_write_pipeline  False
+[2023-04-14 08:46:44,208] [INFO] [config.py:957:print]   checkpoint_tag_validation_enabled  True
+[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   checkpoint_tag_validation_fail  False
+[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   comms_config ................. <deepspeed.comm.config.DeepSpeedCommsConfig object at 0x7f04d00c9ee0>
+[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   communication_data_type ...... None
+[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}}
+[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   curriculum_enabled_legacy .... False
+[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   curriculum_params_legacy ..... False
+[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   data_efficiency_config ....... {'enabled': False, 'seed': 1234, 'data_sampling': {'enabled': False, 'num_epochs': 1000, 'num_workers': 0, 'curriculum_learning': {'enabled': False}}, 'data_routing': {'enabled': False, 'random_ltd': {'enabled': False, 'layer_token_lr_schedule': {'enabled': False}}}}
+[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   data_efficiency_enabled ...... False
+[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   dataloader_drop_last ......... False
+[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   disable_allgather ............ False
+[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   dump_state ................... False
+[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   dynamic_loss_scale_args ...... {'init_scale': 65536, 'scale_window': 100, 'delayed_shift': 2, 'min_scale': 1}
+[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   eigenvalue_enabled ........... False
+[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   eigenvalue_gas_boundary_resolution  1
+[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   eigenvalue_layer_name ........ bert.encoder.layer
+[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   eigenvalue_layer_num ......... 0
+[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   eigenvalue_max_iter .......... 100
+[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   eigenvalue_stability ......... 1e-06
+[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   eigenvalue_tol ............... 0.01
+[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   eigenvalue_verbose ........... False
+[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   elasticity_enabled ........... False
+[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   flops_profiler_config ........ {
+    "enabled": false, 
+    "profile_step": 1, 
+    "module_depth": -1, 
+    "top_modules": 1, 
+    "detailed": true, 
+    "output_file": null
+}
+[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   fp16_auto_cast ............... False
+[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   fp16_enabled ................. True
+[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   fp16_master_weights_and_gradients  False
+[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   global_rank .................. 0
+[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   grad_accum_dtype ............. None
+[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   gradient_accumulation_steps .. 1
+[2023-04-14 08:46:44,209] [INFO] [config.py:957:print]   gradient_clipping ............ 1.0
+[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   gradient_predivide_factor .... 1.0
+[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   hybrid_engine ................ enabled=False max_out_tokens=512 inference_tp_size=1 release_inference_cache=False pin_parameters=True tp_gather_partition_size=8
+[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   initial_dynamic_scale ........ 65536
+[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   load_universal_checkpoint .... False
+[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   loss_scale ................... 0
+[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   memory_breakdown ............. False
+[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   monitor_config ............... tensorboard=TensorBoardConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') wandb=WandbConfig(enabled=False, group=None, team=None, project='deepspeed') csv_monitor=CSVConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') enabled=False
+[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   nebula_config ................ {
+    "enabled": false, 
+    "persistent_storage_path": null, 
+    "persistent_time_interval": 100, 
+    "num_of_version_in_retention": 2, 
+    "enable_nebula_load": true, 
+    "load_path": null
+}
+[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   optimizer_legacy_fusion ...... False
+[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   optimizer_name ............... None
+[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   optimizer_params ............. None
+[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0}
+[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   pld_enabled .................. False
+[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   pld_params ................... False
+[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   prescale_gradients ........... False
+[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   scheduler_name ............... None
+[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   scheduler_params ............. None
+[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   sparse_attention ............. None
+[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   sparse_gradients_enabled ..... False
+[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   steps_per_print .............. 10
+[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   train_batch_size ............. 32
+[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   train_micro_batch_size_per_gpu  4
+[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   use_node_local_storage ....... False
+[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   wall_clock_breakdown ......... False
+[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   world_size ................... 8
+[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   zero_allow_untested_optimizer  False
+[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   zero_config .................. stage=2 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=500,000,000 allgather_partitions=True allgather_bucket_size=500,000,000 overlap_comm=False load_from_fp32_weights=True elastic_checkpoint=False offload_param=DeepSpeedZeroOffloadParamConfig(device='none', nvme_path=None, buffer_count=5, buffer_size=100,000,000, max_in_cpu=1,000,000,000, pin_memory=False) offload_optimizer=DeepSpeedZeroOffloadOptimizerConfig(device='none', nvme_path=None, buffer_count=4, pin_memory=False, pipeline=False, pipeline_read=False, pipeline_write=False, fast_init=False) sub_group_size=1,000,000,000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=30000000 param_persistence_threshold=10000 model_persistence_threshold=sys.maxsize max_live_parameters=30000000 max_reuse_distance=1,000,000,000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False memory_efficient_linear=False
+[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   zero_enabled ................. True
+[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   zero_force_ds_cpu_optimizer .. True
+[2023-04-14 08:46:44,210] [INFO] [config.py:957:print]   zero_optimization_stage ...... 2
+[2023-04-14 08:46:44,210] [INFO] [config.py:943:print_user_config]   json = {
+    "train_batch_size": 32, 
+    "train_micro_batch_size_per_gpu": 4, 
+    "steps_per_print": 10, 
+    "zero_optimization": {
+        "stage": 2, 
+        "offload_param": {
+            "device": "none"
+        }, 
+        "offload_optimizer": {
+            "device": "none"
+        }, 
+        "stage3_param_persistence_threshold": 1.000000e+04, 
+        "stage3_max_live_parameters": 3.000000e+07, 
+        "stage3_prefetch_bucket_size": 3.000000e+07, 
+        "memory_efficient_linear": false
+    }, 
+    "fp16": {
+        "enabled": true, 
+        "loss_scale_window": 100
+    }, 
+    "gradient_clipping": 1.0, 
+    "prescale_gradients": false, 
+    "wall_clock_breakdown": false, 
+    "hybrid_engine": {
+        "enabled": false, 
+        "inference_tp_size": 1, 
+        "release_inference_cache": false, 
+        "pin_parameters": true, 
+        "tp_gather_partition_size": 8
+    }
+}
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.0017056465148925781 seconds
+*****************[end] Initialized Critic Model [end] (duration: 31.02s)******************
+************************[start] Initializing Reward Model [start] ************************
+[2023-04-14 08:47:02,078] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed info: version=0.9.0, git-hash=unknown, git-branch=unknown
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 1.1240429878234863 seconds
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.42214012145996094 seconds
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.002139568328857422 seconds
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.0012676715850830078 seconds
+[2023-04-14 08:47:21,293] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False
+[2023-04-14 08:47:21,295] [INFO] [config.py:953:print] DeepSpeedEngine configuration:
+[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   activation_checkpointing_config  {
+    "partition_activations": false, 
+    "contiguous_memory_optimization": false, 
+    "cpu_checkpointing": false, 
+    "number_checkpoints": null, 
+    "synchronize_checkpoint_boundary": false, 
+    "profile": false
+}
+[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True}
+[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   amp_enabled .................. False
+[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   amp_params ................... False
+[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   autotuning_config ............ {
+    "enabled": false, 
+    "start_step": null, 
+    "end_step": null, 
+    "metric_path": null, 
+    "arg_mappings": null, 
+    "metric": "throughput", 
+    "model_info": null, 
+    "results_dir": "autotuning_results", 
+    "exps_dir": "autotuning_exps", 
+    "overwrite": true, 
+    "fast": true, 
+    "start_profile_step": 3, 
+    "end_profile_step": 5, 
+    "tuner_type": "gridsearch", 
+    "tuner_early_stopping": 5, 
+    "tuner_num_trials": 50, 
+    "model_info_path": null, 
+    "mp_size": 1, 
+    "max_train_batch_size": null, 
+    "min_train_batch_size": 1, 
+    "max_train_micro_batch_size_per_gpu": 1.024000e+03, 
+    "min_train_micro_batch_size_per_gpu": 1, 
+    "num_tuning_micro_batch_sizes": 3
+}
+[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   bfloat16_enabled ............. False
+[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   checkpoint_parallel_write_pipeline  False
+[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   checkpoint_tag_validation_enabled  True
+[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   checkpoint_tag_validation_fail  False
+[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   comms_config ................. <deepspeed.comm.config.DeepSpeedCommsConfig object at 0x7f0501546370>
+[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   communication_data_type ...... None
+[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}}
+[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   curriculum_enabled_legacy .... False
+[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   curriculum_params_legacy ..... False
+[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   data_efficiency_config ....... {'enabled': False, 'seed': 1234, 'data_sampling': {'enabled': False, 'num_epochs': 1000, 'num_workers': 0, 'curriculum_learning': {'enabled': False}}, 'data_routing': {'enabled': False, 'random_ltd': {'enabled': False, 'layer_token_lr_schedule': {'enabled': False}}}}
+[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   data_efficiency_enabled ...... False
+[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   dataloader_drop_last ......... False
+[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   disable_allgather ............ False
+[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   dump_state ................... False
+[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   dynamic_loss_scale_args ...... None
+[2023-04-14 08:47:21,295] [INFO] [config.py:957:print]   eigenvalue_enabled ........... False
+[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   eigenvalue_gas_boundary_resolution  1
+[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   eigenvalue_layer_name ........ bert.encoder.layer
+[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   eigenvalue_layer_num ......... 0
+[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   eigenvalue_max_iter .......... 100
+[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   eigenvalue_stability ......... 1e-06
+[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   eigenvalue_tol ............... 0.01
+[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   eigenvalue_verbose ........... False
+[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   elasticity_enabled ........... False
+[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   flops_profiler_config ........ {
+    "enabled": false, 
+    "profile_step": 1, 
+    "module_depth": -1, 
+    "top_modules": 1, 
+    "detailed": true, 
+    "output_file": null
+}
+[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   fp16_auto_cast ............... False
+[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   fp16_enabled ................. True
+[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   fp16_master_weights_and_gradients  False
+[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   global_rank .................. 0
+[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   grad_accum_dtype ............. None
+[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   gradient_accumulation_steps .. 1
+[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   gradient_clipping ............ 1.0
+[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   gradient_predivide_factor .... 1.0
+[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   hybrid_engine ................ enabled=False max_out_tokens=512 inference_tp_size=1 release_inference_cache=False pin_parameters=True tp_gather_partition_size=8
+[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   initial_dynamic_scale ........ 65536
+[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   load_universal_checkpoint .... False
+[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   loss_scale ................... 0
+[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   memory_breakdown ............. False
+[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   monitor_config ............... tensorboard=TensorBoardConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') wandb=WandbConfig(enabled=False, group=None, team=None, project='deepspeed') csv_monitor=CSVConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') enabled=False
+[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   nebula_config ................ {
+    "enabled": false, 
+    "persistent_storage_path": null, 
+    "persistent_time_interval": 100, 
+    "num_of_version_in_retention": 2, 
+    "enable_nebula_load": true, 
+    "load_path": null
+}
+[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   optimizer_legacy_fusion ...... False
+[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   optimizer_name ............... None
+[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   optimizer_params ............. None
+[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0}
+[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   pld_enabled .................. False
+[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   pld_params ................... False
+[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   prescale_gradients ........... False
+[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   scheduler_name ............... None
+[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   scheduler_params ............. None
+[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   sparse_attention ............. None
+[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   sparse_gradients_enabled ..... False
+[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   steps_per_print .............. 10
+[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   train_batch_size ............. 32
+[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   train_micro_batch_size_per_gpu  4
+[2023-04-14 08:47:21,296] [INFO] [config.py:957:print]   use_node_local_storage ....... False
+[2023-04-14 08:47:21,297] [INFO] [config.py:957:print]   wall_clock_breakdown ......... False
+[2023-04-14 08:47:21,297] [INFO] [config.py:957:print]   world_size ................... 8
+[2023-04-14 08:47:21,297] [INFO] [config.py:957:print]   zero_allow_untested_optimizer  False
+[2023-04-14 08:47:21,297] [INFO] [config.py:957:print]   zero_config .................. stage=0 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=500,000,000 allgather_partitions=True allgather_bucket_size=500,000,000 overlap_comm=False load_from_fp32_weights=True elastic_checkpoint=False offload_param=DeepSpeedZeroOffloadParamConfig(device='none', nvme_path=None, buffer_count=5, buffer_size=100,000,000, max_in_cpu=1,000,000,000, pin_memory=False) offload_optimizer=None sub_group_size=1,000,000,000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=50,000,000 param_persistence_threshold=10000 model_persistence_threshold=sys.maxsize max_live_parameters=1,000,000,000 max_reuse_distance=1,000,000,000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False memory_efficient_linear=False
+[2023-04-14 08:47:21,297] [INFO] [config.py:957:print]   zero_enabled ................. False
+[2023-04-14 08:47:21,297] [INFO] [config.py:957:print]   zero_force_ds_cpu_optimizer .. True
+[2023-04-14 08:47:21,297] [INFO] [config.py:957:print]   zero_optimization_stage ...... 0
+[2023-04-14 08:47:21,297] [INFO] [config.py:943:print_user_config]   json = {
+    "train_batch_size": 32, 
+    "train_micro_batch_size_per_gpu": 4, 
+    "steps_per_print": 10, 
+    "zero_optimization": {
+        "stage": 0, 
+        "stage3_param_persistence_threshold": 1.000000e+04, 
+        "offload_param": {
+            "device": "none"
+        }, 
+        "memory_efficient_linear": false
+    }, 
+    "fp16": {
+        "enabled": true
+    }, 
+    "gradient_clipping": 1.0, 
+    "prescale_gradients": false, 
+    "wall_clock_breakdown": false
+}
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.0011258125305175781 seconds
+*****************[end] Initialized Reward Model [end] (duration: 37.09s)******************
+***** Running training *****
+Beginning of Epoch 1/1, Total Generation Batches 8260
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.009740114212036133 seconds
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.001196146011352539 seconds
+Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+No modifications detected for re-loaded extension module utils, skipping build step...
+Loading extension module utils...
+Time to load utils op: 0.001905679702758789 seconds
+------------------------------------------------------
+Free memory : 28.318542 (GigaBytes)  
+Total memory: 39.586121 (GigaBytes)  
+Requested memory: 0.515625 (GigaBytes) 
+Setting maximum total tokens (input + output) to 512 
+WorkSpace: 0x7efdf6000000 
+------------------------------------------------------
+[2023-04-14 08:47:24,971] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+[2023-04-14 08:47:25,055] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 0|ppo_ep: 1|act_loss: 0.08221435546875|cri_loss: 0.051910400390625|unsuper_loss: 0.0
+average reward score: 0.74267578125
+-------------------------------------------------------------------------------------
+|E2E latency=3.40s |Gather latency=0.00s (0.00%) |Generate time=2.68s (79.04%) |Training time=0.62s (18.39%) |Others=0.09 (2.57%)|CurSamplesPerSec=9.42 |AvgSamplesPerSec=9.42
+[2023-04-14 08:47:27,082] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+[2023-04-14 08:47:27,167] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 1|ppo_ep: 1|act_loss: -0.0277862548828125|cri_loss: -0.0082244873046875|unsuper_loss: 0.0
+average reward score: 1.08203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.64s (77.71%) |Training time=0.38s (18.12%) |Others=0.09 (4.17%)|CurSamplesPerSec=15.15 |AvgSamplesPerSec=11.62
+[2023-04-14 08:47:29,187] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
+[2023-04-14 08:47:29,272] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
+epoch: 0|step: 2|ppo_ep: 1|act_loss: 0.009063720703125|cri_loss: 0.0110931396484375|unsuper_loss: 0.0
+average reward score: 0.4833984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.60s (76.06%) |Training time=0.41s (19.68%) |Others=0.09 (4.26%)|CurSamplesPerSec=15.20 |AvgSamplesPerSec=12.61
+[2023-04-14 08:47:31,435] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192
+epoch: 0|step: 3|ppo_ep: 1|act_loss: 0.374755859375|cri_loss: 0.230712890625|unsuper_loss: 0.0
+average reward score: 0.4599609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.85%) |Training time=0.45s (20.99%) |Others=0.09 (4.17%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=13.09
+epoch: 0|step: 4|ppo_ep: 1|act_loss: 0.25146484375|cri_loss: 0.15087890625|unsuper_loss: 0.0
+average reward score: 0.5869140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.82%) |Training time=0.44s (20.62%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=13.41
+epoch: 0|step: 5|ppo_ep: 1|act_loss: -0.0999755859375|cri_loss: -0.04248046875|unsuper_loss: 0.0
+average reward score: 1.0810546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.79s (75.75%) |Training time=0.47s (20.04%) |Others=0.10 (4.21%)|CurSamplesPerSec=13.54 |AvgSamplesPerSec=13.43
+epoch: 0|step: 6|ppo_ep: 1|act_loss: -0.05633544921875|cri_loss: -0.0228271484375|unsuper_loss: 0.0
+average reward score: 1.267578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.19%) |Training time=0.43s (20.08%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=13.64
+epoch: 0|step: 7|ppo_ep: 1|act_loss: 0.14599609375|cri_loss: 0.0970458984375|unsuper_loss: 0.0
+average reward score: 0.63232421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.83%) |Training time=0.44s (20.56%) |Others=0.10 (4.61%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=13.79
+epoch: 0|step: 8|ppo_ep: 1|act_loss: -0.1309814453125|cri_loss: -0.061859130859375|unsuper_loss: 0.0
+average reward score: 1.03125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.23%) |Training time=0.45s (21.08%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=13.91
+[2023-04-14 08:47:44,382] [INFO] [logging.py:96:log_dist] [Rank 0] step=10, skipped=3, lr=[6.755000000000001e-07, 6.755000000000001e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 08:47:44,400] [INFO] [timer.py:199:stop] epoch=0/micro_step=10/global_step=10, RunningAvgSamplesPerSec=113.50591974216755, CurrSamplesPerSec=110.00055566710786, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 08:47:44,493] [INFO] [logging.py:96:log_dist] [Rank 0] step=10, skipped=4, lr=[3.0000000000000004e-07, 3.0000000000000004e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 9|ppo_ep: 1|act_loss: 0.1016845703125|cri_loss: 0.0560302734375|unsuper_loss: 0.0
+average reward score: 0.6484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.16%) |Training time=0.45s (21.26%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.01
+epoch: 0|step: 10|ppo_ep: 1|act_loss: -0.150390625|cri_loss: -0.06866455078125|unsuper_loss: 0.0
+average reward score: 0.9638671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.93%) |Training time=0.46s (21.39%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.09
+epoch: 0|step: 11|ppo_ep: 1|act_loss: -0.00390625|cri_loss: 0.007171630859375|unsuper_loss: 0.0
+average reward score: 0.94287109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.15%) |Training time=0.45s (21.25%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.16
+epoch: 0|step: 12|ppo_ep: 1|act_loss: -0.043121337890625|cri_loss: -0.01629638671875|unsuper_loss: 0.0
+average reward score: 1.5361328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.16%) |Training time=0.45s (21.16%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.22
+epoch: 0|step: 13|ppo_ep: 1|act_loss: 0.06817626953125|cri_loss: 0.04638671875|unsuper_loss: 0.0
+average reward score: 1.15234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.95%) |Training time=0.46s (21.47%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.27
+epoch: 0|step: 14|ppo_ep: 1|act_loss: -0.041015625|cri_loss: -0.0144195556640625|unsuper_loss: 0.0
+average reward score: 1.4755859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.02%) |Training time=0.46s (21.29%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.31
+epoch: 0|step: 15|ppo_ep: 1|act_loss: -0.01495361328125|cri_loss: -0.003643035888671875|unsuper_loss: 0.0
+average reward score: 1.29296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.40%) |Training time=0.48s (22.04%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.33
+epoch: 0|step: 16|ppo_ep: 1|act_loss: 0.165283203125|cri_loss: 0.108154296875|unsuper_loss: 0.0
+average reward score: 1.4296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.28%) |Training time=0.48s (21.95%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.36
+epoch: 0|step: 17|ppo_ep: 1|act_loss: -0.022979736328125|cri_loss: -0.0068511962890625|unsuper_loss: 0.0
+average reward score: 1.6962890625
+-------------------------------------------------------------------------------------
+|E2E latency=3.46s |Gather latency=0.00s (0.00%) |Generate time=1.59s (46.04%) |Training time=0.48s (13.84%) |Others=1.39 (40.12%)|CurSamplesPerSec=9.25 |AvgSamplesPerSec=13.93
+epoch: 0|step: 18|ppo_ep: 1|act_loss: -0.06494140625|cri_loss: -0.02099609375|unsuper_loss: 0.0
+average reward score: 1.404296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.66%) |Training time=0.45s (20.69%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=13.98
+[2023-04-14 08:48:07,384] [INFO] [logging.py:96:log_dist] [Rank 0] step=20, skipped=3, lr=[1.6405000000000002e-06, 1.6405000000000002e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 08:48:07,402] [INFO] [timer.py:199:stop] epoch=0/micro_step=20/global_step=20, RunningAvgSamplesPerSec=109.13579418396058, CurrSamplesPerSec=98.81244018293347, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 08:48:07,495] [INFO] [logging.py:96:log_dist] [Rank 0] step=20, skipped=4, lr=[8.000000000000001e-07, 8.000000000000001e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 19|ppo_ep: 1|act_loss: 0.1060791015625|cri_loss: 0.056182861328125|unsuper_loss: 0.0
+average reward score: 1.443359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.88%) |Training time=0.49s (20.82%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=13.96
+[2023-04-14 08:48:09,536] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192
+epoch: 0|step: 20|ppo_ep: 1|act_loss: 0.0240478515625|cri_loss: 0.0189361572265625|unsuper_loss: 0.0
+average reward score: 1.62109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.13%) |Training time=0.45s (21.13%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.01
+epoch: 0|step: 21|ppo_ep: 1|act_loss: -0.0139617919921875|cri_loss: 0.00112152099609375|unsuper_loss: 0.0
+average reward score: 1.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.64%) |Training time=0.48s (21.81%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.04
+epoch: 0|step: 22|ppo_ep: 1|act_loss: 0.06494140625|cri_loss: 0.0443115234375|unsuper_loss: 0.0
+average reward score: 1.67578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.87s |Gather latency=0.00s (0.00%) |Generate time=1.58s (55.15%) |Training time=0.47s (16.54%) |Others=0.81 (28.32%)|CurSamplesPerSec=11.15 |AvgSamplesPerSec=13.88
+epoch: 0|step: 23|ppo_ep: 1|act_loss: 0.06280517578125|cri_loss: 0.04296875|unsuper_loss: 0.0
+average reward score: 1.5107421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.40%) |Training time=0.48s (22.04%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=13.91
+epoch: 0|step: 24|ppo_ep: 1|act_loss: 0.11566162109375|cri_loss: 0.0645751953125|unsuper_loss: 0.0
+average reward score: 1.412109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.68s (74.14%) |Training time=0.49s (21.43%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.13 |AvgSamplesPerSec=13.92
+epoch: 0|step: 25|ppo_ep: 1|act_loss: 0.04132080078125|cri_loss: 0.030853271484375|unsuper_loss: 0.0
+average reward score: 2.07421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.28%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=13.95
+epoch: 0|step: 26|ppo_ep: 1|act_loss: 0.0771484375|cri_loss: 0.043548583984375|unsuper_loss: 0.0
+average reward score: 2.001953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.28%) |Training time=0.48s (22.04%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=13.98
+epoch: 0|step: 27|ppo_ep: 1|act_loss: 0.052703857421875|cri_loss: 0.030303955078125|unsuper_loss: 0.0
+average reward score: 1.6484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.17%) |Training time=0.48s (22.14%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.01
+epoch: 0|step: 28|ppo_ep: 1|act_loss: -0.023284912109375|cri_loss: -0.00701904296875|unsuper_loss: 0.0
+average reward score: 1.5791015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.55%) |Training time=0.47s (21.82%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.04
+[2023-04-14 08:48:29,822] [INFO] [logging.py:96:log_dist] [Rank 0] step=30, skipped=4, lr=[2.5090000000000005e-06, 2.5090000000000005e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 08:48:29,840] [INFO] [timer.py:199:stop] epoch=0/micro_step=30/global_step=30, RunningAvgSamplesPerSec=106.47952620942749, CurrSamplesPerSec=99.61674735645987, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 08:48:29,933] [INFO] [logging.py:96:log_dist] [Rank 0] step=30, skipped=4, lr=[1.3e-06, 1.3e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 29|ppo_ep: 1|act_loss: -0.11663818359375|cri_loss: -0.05499267578125|unsuper_loss: 0.0
+average reward score: 1.8896484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.10%) |Training time=0.48s (22.37%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.06
+epoch: 0|step: 30|ppo_ep: 1|act_loss: -0.067626953125|cri_loss: -0.02947998046875|unsuper_loss: 0.0
+average reward score: 1.865234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.19%) |Training time=0.48s (22.18%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.08
+epoch: 0|step: 31|ppo_ep: 1|act_loss: -0.127685546875|cri_loss: -0.048828125|unsuper_loss: 0.0
+average reward score: 1.484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.27%) |Training time=0.48s (22.18%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.11
+epoch: 0|step: 32|ppo_ep: 1|act_loss: 0.05084228515625|cri_loss: 0.0467529296875|unsuper_loss: 0.0
+average reward score: 1.5107421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.43%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.13
+epoch: 0|step: 33|ppo_ep: 1|act_loss: -0.0036468505859375|cri_loss: 0.000675201416015625|unsuper_loss: 0.0
+average reward score: 1.853515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.07%) |Training time=0.46s (21.35%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.15
+epoch: 0|step: 34|ppo_ep: 1|act_loss: 0.03912353515625|cri_loss: 0.0289306640625|unsuper_loss: 0.0
+average reward score: 2.162109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.04%) |Training time=0.48s (20.58%) |Others=0.10 (4.38%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.14
+epoch: 0|step: 35|ppo_ep: 1|act_loss: 0.00030517578125|cri_loss: 0.0166015625|unsuper_loss: 0.0
+average reward score: 2.197265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.20%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.15
+epoch: 0|step: 36|ppo_ep: 1|act_loss: 0.02532958984375|cri_loss: 0.0178680419921875|unsuper_loss: 0.0
+average reward score: 2.248046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.47s (21.90%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.17
+epoch: 0|step: 37|ppo_ep: 1|act_loss: 0.0909423828125|cri_loss: 0.1270751953125|unsuper_loss: 0.0
+average reward score: 2.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.48s (22.07%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.19
+epoch: 0|step: 38|ppo_ep: 1|act_loss: 0.1304931640625|cri_loss: 0.0841064453125|unsuper_loss: 0.0
+average reward score: 2.462890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.79%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.20
+[2023-04-14 08:48:51,574] [INFO] [logging.py:96:log_dist] [Rank 0] step=40, skipped=4, lr=[3.474e-06, 3.474e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 08:48:51,593] [INFO] [timer.py:199:stop] epoch=0/micro_step=40/global_step=40, RunningAvgSamplesPerSec=105.52605461942416, CurrSamplesPerSec=102.21813606772618, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 08:48:51,686] [INFO] [logging.py:96:log_dist] [Rank 0] step=40, skipped=4, lr=[1.8000000000000001e-06, 1.8000000000000001e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 39|ppo_ep: 1|act_loss: 0.02679443359375|cri_loss: 0.0269775390625|unsuper_loss: 0.0
+average reward score: 2.24609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.30%) |Training time=0.48s (22.05%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.22
+epoch: 0|step: 40|ppo_ep: 1|act_loss: 0.11529541015625|cri_loss: 0.06890869140625|unsuper_loss: 0.0
+average reward score: 2.169921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.29%) |Training time=0.48s (22.07%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.23
+epoch: 0|step: 41|ppo_ep: 1|act_loss: 0.003875732421875|cri_loss: 0.0196533203125|unsuper_loss: 0.0
+average reward score: 2.478515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (22.01%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.25
+epoch: 0|step: 42|ppo_ep: 1|act_loss: 0.06878662109375|cri_loss: 0.043701171875|unsuper_loss: 0.0
+average reward score: 2.244140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.45%) |Training time=0.47s (21.89%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.26
+epoch: 0|step: 43|ppo_ep: 1|act_loss: 0.039947509765625|cri_loss: 0.03765869140625|unsuper_loss: 0.0
+average reward score: 2.173828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.42s |Gather latency=0.00s (0.00%) |Generate time=1.58s (65.32%) |Training time=0.47s (19.58%) |Others=0.37 (15.10%)|CurSamplesPerSec=13.20 |AvgSamplesPerSec=14.23
+epoch: 0|step: 44|ppo_ep: 1|act_loss: -0.0477294921875|cri_loss: -0.0135498046875|unsuper_loss: 0.0
+average reward score: 2.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.42%) |Training time=0.47s (21.88%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.25
+epoch: 0|step: 45|ppo_ep: 1|act_loss: 0.006374359130859375|cri_loss: 0.0103607177734375|unsuper_loss: 0.0
+average reward score: 2.35546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (22.02%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.26
+epoch: 0|step: 46|ppo_ep: 1|act_loss: -0.04522705078125|cri_loss: -0.01043701171875|unsuper_loss: 0.0
+average reward score: 2.5859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.21%) |Training time=0.48s (22.17%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.27
+epoch: 0|step: 47|ppo_ep: 1|act_loss: -0.21044921875|cri_loss: -0.07177734375|unsuper_loss: 0.0
+average reward score: 1.8818359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.62%) |Training time=0.47s (21.80%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.28
+epoch: 0|step: 48|ppo_ep: 1|act_loss: -0.071044921875|cri_loss: -0.0281982421875|unsuper_loss: 0.0
+average reward score: 1.77734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.25%) |Training time=0.44s (20.25%) |Others=0.12 (5.50%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.29
+[2023-04-14 08:49:13,586] [INFO] [logging.py:96:log_dist] [Rank 0] step=50, skipped=4, lr=[4.439e-06, 4.439e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 08:49:13,604] [INFO] [timer.py:199:stop] epoch=0/micro_step=50/global_step=50, RunningAvgSamplesPerSec=105.16244786155492, CurrSamplesPerSec=102.06640283983927, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 08:49:13,697] [INFO] [logging.py:96:log_dist] [Rank 0] step=50, skipped=4, lr=[2.3000000000000004e-06, 2.3000000000000004e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 49|ppo_ep: 1|act_loss: -0.0042724609375|cri_loss: 0.0188140869140625|unsuper_loss: 0.0
+average reward score: 2.279296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.25%) |Training time=0.48s (20.51%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.79 |AvgSamplesPerSec=14.28
+epoch: 0|step: 50|ppo_ep: 1|act_loss: 0.0303955078125|cri_loss: 0.0217437744140625|unsuper_loss: 0.0
+average reward score: 1.8505859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.13%) |Training time=0.48s (22.24%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.29
+epoch: 0|step: 51|ppo_ep: 1|act_loss: 0.0499267578125|cri_loss: 0.031097412109375|unsuper_loss: 0.0
+average reward score: 1.7294921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.47s (21.99%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.30
+epoch: 0|step: 52|ppo_ep: 1|act_loss: 0.080322265625|cri_loss: 0.04913330078125|unsuper_loss: 0.0
+average reward score: 2.24609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.98%) |Training time=0.49s (22.40%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.31
+epoch: 0|step: 53|ppo_ep: 1|act_loss: -0.0433349609375|cri_loss: -0.0122833251953125|unsuper_loss: 0.0
+average reward score: 2.27734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.68s (74.28%) |Training time=0.48s (21.29%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.18 |AvgSamplesPerSec=14.31
+epoch: 0|step: 54|ppo_ep: 1|act_loss: -0.0277557373046875|cri_loss: -0.0034637451171875|unsuper_loss: 0.0
+average reward score: 2.482421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.48%) |Training time=0.47s (21.87%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.32
+epoch: 0|step: 55|ppo_ep: 1|act_loss: -0.006175994873046875|cri_loss: 0.000583648681640625|unsuper_loss: 0.0
+average reward score: 2.197265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.10%) |Training time=0.48s (22.33%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.33
+epoch: 0|step: 56|ppo_ep: 1|act_loss: 0.05126953125|cri_loss: 0.0340576171875|unsuper_loss: 0.0
+average reward score: 1.505859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.53%) |Training time=0.47s (21.83%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.33
+epoch: 0|step: 57|ppo_ep: 1|act_loss: 0.0333251953125|cri_loss: 0.02850341796875|unsuper_loss: 0.0
+average reward score: 1.888671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.19%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.34
+epoch: 0|step: 58|ppo_ep: 1|act_loss: 0.0157318115234375|cri_loss: 0.017181396484375|unsuper_loss: 0.0
+average reward score: 2.6796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.10%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.35
+[2023-04-14 08:49:35,261] [INFO] [logging.py:96:log_dist] [Rank 0] step=60, skipped=4, lr=[5.404000000000001e-06, 5.404000000000001e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 08:49:35,280] [INFO] [timer.py:199:stop] epoch=0/micro_step=60/global_step=60, RunningAvgSamplesPerSec=104.51481421567632, CurrSamplesPerSec=102.19540958042981, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 08:49:35,373] [INFO] [logging.py:96:log_dist] [Rank 0] step=60, skipped=4, lr=[2.8000000000000003e-06, 2.8000000000000003e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 59|ppo_ep: 1|act_loss: 0.1995849609375|cri_loss: 0.1263427734375|unsuper_loss: 0.0
+average reward score: 2.279296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.29%) |Training time=0.48s (22.07%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.36
+epoch: 0|step: 60|ppo_ep: 1|act_loss: 0.0227508544921875|cri_loss: 0.0230255126953125|unsuper_loss: 0.0
+average reward score: 2.01953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.24%) |Training time=0.48s (22.09%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.37
+epoch: 0|step: 61|ppo_ep: 1|act_loss: -0.019378662109375|cri_loss: 0.0001220703125|unsuper_loss: 0.0
+average reward score: 2.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.40%) |Training time=0.47s (21.88%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.37
+epoch: 0|step: 62|ppo_ep: 1|act_loss: 0.0308380126953125|cri_loss: 0.02142333984375|unsuper_loss: 0.0
+average reward score: 2.513671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.13%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.38
+epoch: 0|step: 63|ppo_ep: 1|act_loss: 0.01335906982421875|cri_loss: 0.0110626220703125|unsuper_loss: 0.0
+average reward score: 2.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.46s (21.16%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.39
+epoch: 0|step: 64|ppo_ep: 1|act_loss: 0.07464599609375|cri_loss: 0.04296875|unsuper_loss: 0.0
+average reward score: 2.30859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.76s |Gather latency=0.00s (0.00%) |Generate time=1.75s (63.68%) |Training time=0.46s (16.86%) |Others=0.54 (19.46%)|CurSamplesPerSec=11.61 |AvgSamplesPerSec=14.34
+epoch: 0|step: 65|ppo_ep: 1|act_loss: -0.02947998046875|cri_loss: 0.002777099609375|unsuper_loss: 0.0
+average reward score: 2.33984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.47s (21.80%) |Others=0.10 (4.82%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.34
+epoch: 0|step: 66|ppo_ep: 1|act_loss: 0.08624267578125|cri_loss: 0.05859375|unsuper_loss: 0.0
+average reward score: 2.13671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.48s (22.27%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.35
+epoch: 0|step: 67|ppo_ep: 1|act_loss: 0.03778076171875|cri_loss: 0.02117919921875|unsuper_loss: 0.0
+average reward score: 1.986328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (21.91%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.36
+epoch: 0|step: 68|ppo_ep: 1|act_loss: 0.0061492919921875|cri_loss: 0.01113128662109375|unsuper_loss: 0.0
+average reward score: 2.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.48s (22.02%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.36
+[2023-04-14 08:49:57,426] [INFO] [logging.py:96:log_dist] [Rank 0] step=70, skipped=4, lr=[6.369000000000001e-06, 6.369000000000001e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 08:49:57,445] [INFO] [timer.py:199:stop] epoch=0/micro_step=70/global_step=70, RunningAvgSamplesPerSec=104.34243082596494, CurrSamplesPerSec=101.61912549440034, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 08:49:57,538] [INFO] [logging.py:96:log_dist] [Rank 0] step=70, skipped=4, lr=[3.3000000000000006e-06, 3.3000000000000006e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 69|ppo_ep: 1|act_loss: 0.04266357421875|cri_loss: 0.057037353515625|unsuper_loss: 0.0
+average reward score: 2.65234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.18%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.37
+epoch: 0|step: 70|ppo_ep: 1|act_loss: -0.05322265625|cri_loss: 0.0018310546875|unsuper_loss: 0.0
+average reward score: 2.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.26%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.38
+epoch: 0|step: 71|ppo_ep: 1|act_loss: -0.072509765625|cri_loss: -0.027374267578125|unsuper_loss: 0.0
+average reward score: 2.64453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.47s (22.01%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.38
+epoch: 0|step: 72|ppo_ep: 1|act_loss: 0.0211181640625|cri_loss: 0.0151519775390625|unsuper_loss: 0.0
+average reward score: 1.9931640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.33%) |Training time=0.47s (22.03%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.39
+epoch: 0|step: 73|ppo_ep: 1|act_loss: -0.056396484375|cri_loss: -0.0245513916015625|unsuper_loss: 0.0
+average reward score: 2.205078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.47s (22.00%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.39
+epoch: 0|step: 74|ppo_ep: 1|act_loss: -0.10888671875|cri_loss: -0.047149658203125|unsuper_loss: 0.0
+average reward score: 2.083984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.63%) |Training time=0.47s (21.71%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.40
+epoch: 0|step: 75|ppo_ep: 1|act_loss: -0.0101776123046875|cri_loss: 2.288818359375e-05|unsuper_loss: 0.0
+average reward score: 2.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.16%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41
+epoch: 0|step: 76|ppo_ep: 1|act_loss: 0.1962890625|cri_loss: 0.11920166015625|unsuper_loss: 0.0
+average reward score: 2.0234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.60s (69.30%) |Training time=0.47s (20.44%) |Others=0.24 (10.26%)|CurSamplesPerSec=13.87 |AvgSamplesPerSec=14.40
+epoch: 0|step: 77|ppo_ep: 1|act_loss: 0.2744140625|cri_loss: 0.161865234375|unsuper_loss: 0.0
+average reward score: 2.255859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.50%) |Training time=0.47s (21.85%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.40
+epoch: 0|step: 78|ppo_ep: 1|act_loss: 0.05914306640625|cri_loss: 0.03167724609375|unsuper_loss: 0.0
+average reward score: 1.486328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.15%) |Training time=0.49s (22.20%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.41
+[2023-04-14 08:50:19,193] [INFO] [logging.py:96:log_dist] [Rank 0] step=80, skipped=4, lr=[7.3340000000000004e-06, 7.3340000000000004e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 08:50:19,212] [INFO] [timer.py:199:stop] epoch=0/micro_step=80/global_step=80, RunningAvgSamplesPerSec=104.23033362572767, CurrSamplesPerSec=106.49301658358083, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 08:50:19,304] [INFO] [logging.py:96:log_dist] [Rank 0] step=80, skipped=4, lr=[3.8000000000000005e-06, 3.8000000000000005e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 79|ppo_ep: 1|act_loss: 0.04840087890625|cri_loss: 0.0286712646484375|unsuper_loss: 0.0
+average reward score: 2.51953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.51%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.41
+epoch: 0|step: 80|ppo_ep: 1|act_loss: -0.1134033203125|cri_loss: -0.039398193359375|unsuper_loss: 0.0
+average reward score: 2.19140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.75%) |Training time=0.46s (21.58%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.42
+epoch: 0|step: 81|ppo_ep: 1|act_loss: -0.018402099609375|cri_loss: -0.006984710693359375|unsuper_loss: 0.0
+average reward score: 1.7470703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.67%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
+epoch: 0|step: 82|ppo_ep: 1|act_loss: 0.0022106170654296875|cri_loss: 0.00585174560546875|unsuper_loss: 0.0
+average reward score: 2.59375
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.33%) |Training time=0.48s (21.58%) |Others=0.14 (6.08%)|CurSamplesPerSec=14.34 |AvgSamplesPerSec=14.42
+epoch: 0|step: 83|ppo_ep: 1|act_loss: -0.027191162109375|cri_loss: -0.01220703125|unsuper_loss: 0.0
+average reward score: 2.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.72%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.43
+epoch: 0|step: 84|ppo_ep: 1|act_loss: 0.021575927734375|cri_loss: 0.0121307373046875|unsuper_loss: 0.0
+average reward score: 2.166015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.68%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43
+epoch: 0|step: 85|ppo_ep: 1|act_loss: -0.00457763671875|cri_loss: 0.0264892578125|unsuper_loss: 0.0
+average reward score: 2.62890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.74%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.43
+epoch: 0|step: 86|ppo_ep: 1|act_loss: 0.05242919921875|cri_loss: 0.030517578125|unsuper_loss: 0.0
+average reward score: 2.1796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.71%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44
+epoch: 0|step: 87|ppo_ep: 1|act_loss: -0.063720703125|cri_loss: -0.02862548828125|unsuper_loss: 0.0
+average reward score: 2.689453125
+-------------------------------------------------------------------------------------
+|E2E latency=3.10s |Gather latency=0.00s (0.00%) |Generate time=1.59s (51.29%) |Training time=0.47s (15.14%) |Others=1.04 (33.57%)|CurSamplesPerSec=10.32 |AvgSamplesPerSec=14.37
+epoch: 0|step: 88|ppo_ep: 1|act_loss: 0.04443359375|cri_loss: 0.0270843505859375|unsuper_loss: 0.0
+average reward score: 2.146484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.47s (21.85%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.38
+[2023-04-14 08:50:41,764] [INFO] [logging.py:96:log_dist] [Rank 0] step=90, skipped=4, lr=[8.299000000000001e-06, 8.299000000000001e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 08:50:41,783] [INFO] [timer.py:199:stop] epoch=0/micro_step=90/global_step=90, RunningAvgSamplesPerSec=104.24126059063528, CurrSamplesPerSec=104.1532635552079, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 08:50:41,875] [INFO] [logging.py:96:log_dist] [Rank 0] step=90, skipped=4, lr=[4.3e-06, 4.3e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 89|ppo_ep: 1|act_loss: 0.021820068359375|cri_loss: 0.01277923583984375|unsuper_loss: 0.0
+average reward score: 2.080078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.67%) |Training time=0.47s (21.79%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.38
+epoch: 0|step: 90|ppo_ep: 1|act_loss: 0.04132080078125|cri_loss: 0.025177001953125|unsuper_loss: 0.0
+average reward score: 2.509765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.80%) |Training time=0.49s (22.61%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.39
+epoch: 0|step: 91|ppo_ep: 1|act_loss: -0.03411865234375|cri_loss: -0.014068603515625|unsuper_loss: 0.0
+average reward score: 2.0859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.56%) |Training time=0.50s (22.97%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.39
+epoch: 0|step: 92|ppo_ep: 1|act_loss: 0.0838623046875|cri_loss: 0.04376220703125|unsuper_loss: 0.0
+average reward score: 1.935546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.47%) |Training time=0.50s (22.94%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.39
+epoch: 0|step: 93|ppo_ep: 1|act_loss: 0.052032470703125|cri_loss: 0.028717041015625|unsuper_loss: 0.0
+average reward score: 1.8779296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.63s (69.96%) |Training time=0.59s (25.52%) |Others=0.11 (4.52%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.38
+epoch: 0|step: 94|ppo_ep: 1|act_loss: 0.02972412109375|cri_loss: 0.0212554931640625|unsuper_loss: 0.0
+average reward score: 2.41796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.25%) |Training time=0.51s (23.21%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.39
+epoch: 0|step: 95|ppo_ep: 1|act_loss: -0.09625244140625|cri_loss: -0.038818359375|unsuper_loss: 0.0
+average reward score: 2.189453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.46%) |Training time=0.50s (23.04%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.39
+epoch: 0|step: 96|ppo_ep: 1|act_loss: -0.003040313720703125|cri_loss: 0.001705169677734375|unsuper_loss: 0.0
+average reward score: 2.337890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.65%) |Training time=0.50s (22.75%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.39
+epoch: 0|step: 97|ppo_ep: 1|act_loss: 0.00817108154296875|cri_loss: 0.006500244140625|unsuper_loss: 0.0
+average reward score: 1.982421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.65%) |Training time=0.50s (22.78%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.39
+epoch: 0|step: 98|ppo_ep: 1|act_loss: 0.0372314453125|cri_loss: 0.0206298828125|unsuper_loss: 0.0
+average reward score: 2.24609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.80s |Gather latency=0.00s (0.00%) |Generate time=1.58s (56.59%) |Training time=0.50s (17.98%) |Others=0.71 (25.43%)|CurSamplesPerSec=11.43 |AvgSamplesPerSec=14.36
+[2023-04-14 08:51:04,421] [INFO] [logging.py:96:log_dist] [Rank 0] step=100, skipped=4, lr=[9.264e-06, 9.264e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 08:51:04,439] [INFO] [timer.py:199:stop] epoch=0/micro_step=100/global_step=100, RunningAvgSamplesPerSec=102.92779009185206, CurrSamplesPerSec=92.65775833740642, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 08:51:04,532] [INFO] [logging.py:96:log_dist] [Rank 0] step=100, skipped=4, lr=[4.800000000000001e-06, 4.800000000000001e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 99|ppo_ep: 1|act_loss: 0.050079345703125|cri_loss: 0.0273284912109375|unsuper_loss: 0.0
+average reward score: 2.841796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.36%) |Training time=0.51s (23.17%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.36
+epoch: 0|step: 100|ppo_ep: 1|act_loss: -0.035552978515625|cri_loss: -0.0124969482421875|unsuper_loss: 0.0
+average reward score: 2.423828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.53%) |Training time=0.50s (22.88%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.36
+epoch: 0|step: 101|ppo_ep: 1|act_loss: 0.02716064453125|cri_loss: 0.019012451171875|unsuper_loss: 0.0
+average reward score: 2.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.32%) |Training time=0.51s (23.11%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.36
+epoch: 0|step: 102|ppo_ep: 1|act_loss: 0.03790283203125|cri_loss: 0.020172119140625|unsuper_loss: 0.0
+average reward score: 1.931640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.43%) |Training time=0.50s (23.01%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.37
+epoch: 0|step: 103|ppo_ep: 1|act_loss: -0.005924224853515625|cri_loss: -0.000888824462890625|unsuper_loss: 0.0
+average reward score: 2.119140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.53%) |Training time=0.51s (23.04%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.37
+epoch: 0|step: 104|ppo_ep: 1|act_loss: -0.02667236328125|cri_loss: 0.0159912109375|unsuper_loss: 0.0
+average reward score: 2.685546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.51%) |Training time=0.50s (22.91%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.37
+epoch: 0|step: 105|ppo_ep: 1|act_loss: 0.0069122314453125|cri_loss: 0.00909423828125|unsuper_loss: 0.0
+average reward score: 3.01171875
+-------------------------------------------------------------------------------------
+|E2E latency=3.44s |Gather latency=0.00s (0.00%) |Generate time=1.58s (46.01%) |Training time=0.50s (14.67%) |Others=1.35 (39.32%)|CurSamplesPerSec=9.30 |AvgSamplesPerSec=14.30
+epoch: 0|step: 106|ppo_ep: 1|act_loss: -0.02008056640625|cri_loss: -0.00409698486328125|unsuper_loss: 0.0
+average reward score: 4.37109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.17%) |Training time=0.51s (23.28%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.30
+epoch: 0|step: 107|ppo_ep: 1|act_loss: 0.0693359375|cri_loss: 0.03973388671875|unsuper_loss: 0.0
+average reward score: 2.705078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.77s (74.59%) |Training time=0.50s (21.21%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.49 |AvgSamplesPerSec=14.29
+epoch: 0|step: 108|ppo_ep: 1|act_loss: 0.0755615234375|cri_loss: 0.046112060546875|unsuper_loss: 0.0
+average reward score: 3.279296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.24%) |Training time=0.51s (23.12%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.29
+[2023-04-14 08:51:27,825] [INFO] [logging.py:96:log_dist] [Rank 0] step=110, skipped=4, lr=[9.649987126724682e-06, 9.649987126724682e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 08:51:27,843] [INFO] [timer.py:199:stop] epoch=0/micro_step=110/global_step=110, RunningAvgSamplesPerSec=101.84287411563196, CurrSamplesPerSec=84.43894398574668, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 08:51:27,936] [INFO] [logging.py:96:log_dist] [Rank 0] step=110, skipped=4, lr=[4.999993329909162e-06, 4.999993329909162e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 109|ppo_ep: 1|act_loss: 0.0775146484375|cri_loss: 0.044769287109375|unsuper_loss: 0.0
+average reward score: 3.353515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.41%) |Training time=0.54s (24.22%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.31 |AvgSamplesPerSec=14.29
+epoch: 0|step: 110|ppo_ep: 1|act_loss: 0.1475830078125|cri_loss: 0.1015625|unsuper_loss: 0.0
+average reward score: 3.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.62s (71.42%) |Training time=0.53s (23.33%) |Others=0.12 (5.25%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.29
+epoch: 0|step: 111|ppo_ep: 1|act_loss: -0.0105133056640625|cri_loss: -0.00122833251953125|unsuper_loss: 0.0
+average reward score: 3.443359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.65s (72.47%) |Training time=0.53s (23.06%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.02 |AvgSamplesPerSec=14.29
+epoch: 0|step: 112|ppo_ep: 1|act_loss: -0.0516357421875|cri_loss: -0.0228424072265625|unsuper_loss: 0.0
+average reward score: 3.765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.88%) |Training time=0.52s (23.59%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.29
+epoch: 0|step: 113|ppo_ep: 1|act_loss: -0.0172882080078125|cri_loss: 0.0036468505859375|unsuper_loss: 0.0
+average reward score: 3.423828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.68%) |Training time=0.53s (23.89%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.46 |AvgSamplesPerSec=14.29
+epoch: 0|step: 114|ppo_ep: 1|act_loss: 0.12249755859375|cri_loss: 0.0792236328125|unsuper_loss: 0.0
+average reward score: 2.9453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.01%) |Training time=0.52s (23.45%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.29
+epoch: 0|step: 115|ppo_ep: 1|act_loss: 0.03033447265625|cri_loss: 0.023590087890625|unsuper_loss: 0.0
+average reward score: 3.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.05%) |Training time=0.52s (23.51%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.46 |AvgSamplesPerSec=14.30
+epoch: 0|step: 116|ppo_ep: 1|act_loss: 0.004199981689453125|cri_loss: 0.00466156005859375|unsuper_loss: 0.0
+average reward score: 3.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.08s |Gather latency=0.00s (0.00%) |Generate time=1.59s (76.49%) |Training time=0.39s (18.63%) |Others=0.10 (4.88%)|CurSamplesPerSec=15.41 |AvgSamplesPerSec=14.30
+epoch: 0|step: 117|ppo_ep: 1|act_loss: 0.0142974853515625|cri_loss: 0.00982666015625|unsuper_loss: 0.0
+average reward score: 3.892578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.63%) |Training time=0.50s (22.89%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.31
+epoch: 0|step: 118|ppo_ep: 1|act_loss: 0.09619140625|cri_loss: 0.05419921875|unsuper_loss: 0.0
+average reward score: 3.646484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.78%) |Training time=0.50s (22.64%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.31
+[2023-04-14 08:51:50,104] [INFO] [logging.py:96:log_dist] [Rank 0] step=120, skipped=4, lr=[9.649908456957608e-06, 9.649908456957608e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 08:51:50,123] [INFO] [timer.py:199:stop] epoch=0/micro_step=120/global_step=120, RunningAvgSamplesPerSec=101.11741645963838, CurrSamplesPerSec=93.20669082868693, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 08:51:50,215] [INFO] [logging.py:96:log_dist] [Rank 0] step=120, skipped=4, lr=[4.999952568371817e-06, 4.999952568371817e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 119|ppo_ep: 1|act_loss: 0.18896484375|cri_loss: 0.116455078125|unsuper_loss: 0.0
+average reward score: 3.484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.48%) |Training time=0.51s (23.06%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.31
+epoch: 0|step: 120|ppo_ep: 1|act_loss: -0.066162109375|cri_loss: -0.029693603515625|unsuper_loss: 0.0
+average reward score: 3.103515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.44%) |Training time=0.50s (23.01%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.31
+epoch: 0|step: 121|ppo_ep: 1|act_loss: -0.02349853515625|cri_loss: -0.00815582275390625|unsuper_loss: 0.0
+average reward score: 3.55859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.30%) |Training time=0.51s (23.15%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.32
+epoch: 0|step: 122|ppo_ep: 1|act_loss: 0.03350830078125|cri_loss: 0.0233154296875|unsuper_loss: 0.0
+average reward score: 3.19921875
+-------------------------------------------------------------------------------------
+|E2E latency=3.18s |Gather latency=0.00s (0.00%) |Generate time=1.75s (55.13%) |Training time=0.52s (16.37%) |Others=0.91 (28.50%)|CurSamplesPerSec=10.07 |AvgSamplesPerSec=14.27
+epoch: 0|step: 123|ppo_ep: 1|act_loss: -0.057373046875|cri_loss: -0.022796630859375|unsuper_loss: 0.0
+average reward score: 3.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.49%) |Training time=0.50s (22.68%) |Others=0.11 (4.82%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.27
+epoch: 0|step: 124|ppo_ep: 1|act_loss: 0.162109375|cri_loss: 0.1400146484375|unsuper_loss: 0.0
+average reward score: 4.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.00%) |Training time=0.49s (22.40%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.27
+epoch: 0|step: 125|ppo_ep: 1|act_loss: 0.07275390625|cri_loss: 0.042236328125|unsuper_loss: 0.0
+average reward score: 4.015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.89%) |Training time=0.49s (22.46%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.28
+epoch: 0|step: 126|ppo_ep: 1|act_loss: -0.01470184326171875|cri_loss: -0.003173828125|unsuper_loss: 0.0
+average reward score: 3.93359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.49s (22.36%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.28
+epoch: 0|step: 127|ppo_ep: 1|act_loss: 0.09210205078125|cri_loss: 0.05419921875|unsuper_loss: 0.0
+average reward score: 3.919921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.68%) |Training time=0.50s (22.76%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.28
+epoch: 0|step: 128|ppo_ep: 1|act_loss: 0.0026092529296875|cri_loss: 0.01064300537109375|unsuper_loss: 0.0
+average reward score: 3.953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.71%) |Training time=0.49s (22.66%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.29
+[2023-04-14 08:52:12,913] [INFO] [logging.py:96:log_dist] [Rank 0] step=130, skipped=4, lr=[9.649758270407744e-06, 9.649758270407744e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 08:52:12,931] [INFO] [timer.py:199:stop] epoch=0/micro_step=130/global_step=130, RunningAvgSamplesPerSec=100.66527352211149, CurrSamplesPerSec=96.38733686515383, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 08:52:13,024] [INFO] [logging.py:96:log_dist] [Rank 0] step=130, skipped=4, lr=[4.999874751506603e-06, 4.999874751506603e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 129|ppo_ep: 1|act_loss: 0.0672607421875|cri_loss: 0.040771484375|unsuper_loss: 0.0
+average reward score: 4.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.90%) |Training time=0.49s (22.53%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.29
+epoch: 0|step: 130|ppo_ep: 1|act_loss: -0.0712890625|cri_loss: -0.033447265625|unsuper_loss: 0.0
+average reward score: 4.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.99%) |Training time=0.49s (22.39%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.29
+epoch: 0|step: 131|ppo_ep: 1|act_loss: -0.0435791015625|cri_loss: -0.015869140625|unsuper_loss: 0.0
+average reward score: 4.5234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.73%) |Training time=0.50s (22.74%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.30
+epoch: 0|step: 132|ppo_ep: 1|act_loss: 0.0189971923828125|cri_loss: 0.0142822265625|unsuper_loss: 0.0
+average reward score: 3.80859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.03%) |Training time=0.49s (22.35%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.30
+epoch: 0|step: 133|ppo_ep: 1|act_loss: 0.0258636474609375|cri_loss: 0.017120361328125|unsuper_loss: 0.0
+average reward score: 4.05078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.54%) |Training time=0.50s (22.89%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.30
+epoch: 0|step: 134|ppo_ep: 1|act_loss: -0.128662109375|cri_loss: -0.00958251953125|unsuper_loss: 0.0
+average reward score: 4.80078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.77%) |Training time=0.49s (22.54%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.30
+epoch: 0|step: 135|ppo_ep: 1|act_loss: -0.119384765625|cri_loss: -0.05499267578125|unsuper_loss: 0.0
+average reward score: 3.833984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.76%) |Training time=0.49s (22.65%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.31
+epoch: 0|step: 136|ppo_ep: 1|act_loss: -0.12493896484375|cri_loss: -0.05194091796875|unsuper_loss: 0.0
+average reward score: 3.76171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.74%) |Training time=0.49s (22.60%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.31
+epoch: 0|step: 137|ppo_ep: 1|act_loss: 0.04376220703125|cri_loss: 0.0254669189453125|unsuper_loss: 0.0
+average reward score: 4.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.81%) |Training time=0.49s (22.38%) |Others=0.10 (4.81%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.31
+epoch: 0|step: 138|ppo_ep: 1|act_loss: -0.00145721435546875|cri_loss: 0.002796173095703125|unsuper_loss: 0.0
+average reward score: 4.17578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.38s |Gather latency=0.00s (0.00%) |Generate time=1.65s (69.33%) |Training time=0.61s (25.81%) |Others=0.12 (4.85%)|CurSamplesPerSec=13.46 |AvgSamplesPerSec=14.31
+[2023-04-14 08:52:35,041] [INFO] [logging.py:96:log_dist] [Rank 0] step=140, skipped=4, lr=[9.649536569301218e-06, 9.649536569301218e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 08:52:35,060] [INFO] [timer.py:199:stop] epoch=0/micro_step=140/global_step=140, RunningAvgSamplesPerSec=99.93906583355329, CurrSamplesPerSec=69.75915377003102, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 08:52:35,152] [INFO] [logging.py:96:log_dist] [Rank 0] step=140, skipped=4, lr=[4.9997598804669524e-06, 4.9997598804669524e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 139|ppo_ep: 1|act_loss: -0.0291748046875|cri_loss: -0.00913238525390625|unsuper_loss: 0.0
+average reward score: 4.02734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.60s (68.45%) |Training time=0.64s (27.28%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.30
+epoch: 0|step: 140|ppo_ep: 1|act_loss: 0.0887451171875|cri_loss: 0.0537109375|unsuper_loss: 0.0
+average reward score: 4.5234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.62%) |Training time=0.50s (22.80%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.30
+epoch: 0|step: 141|ppo_ep: 1|act_loss: -0.0107574462890625|cri_loss: -0.003376007080078125|unsuper_loss: 0.0
+average reward score: 4.21484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.87%) |Training time=0.49s (22.62%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.31
+epoch: 0|step: 142|ppo_ep: 1|act_loss: -0.044647216796875|cri_loss: -0.01788330078125|unsuper_loss: 0.0
+average reward score: 5.0234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.71%) |Training time=0.49s (22.69%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.31
+epoch: 0|step: 143|ppo_ep: 1|act_loss: 0.0205841064453125|cri_loss: 0.013397216796875|unsuper_loss: 0.0
+average reward score: 3.9609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.21%) |Training time=0.51s (23.10%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.31
+epoch: 0|step: 144|ppo_ep: 1|act_loss: -0.0972900390625|cri_loss: -0.045196533203125|unsuper_loss: 0.0
+average reward score: 4.68359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.70%) |Training time=0.52s (23.78%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.31
+epoch: 0|step: 145|ppo_ep: 1|act_loss: -0.054412841796875|cri_loss: -0.023834228515625|unsuper_loss: 0.0
+average reward score: 5.06640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.83%) |Training time=0.52s (23.63%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.54 |AvgSamplesPerSec=14.31
+epoch: 0|step: 146|ppo_ep: 1|act_loss: 0.040130615234375|cri_loss: 0.0330810546875|unsuper_loss: 0.0
+average reward score: 4.31640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.13%) |Training time=0.51s (23.32%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.32
+epoch: 0|step: 147|ppo_ep: 1|act_loss: -0.01995849609375|cri_loss: -0.001068115234375|unsuper_loss: 0.0
+average reward score: 4.13671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.60%) |Training time=0.53s (23.86%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.32
+epoch: 0|step: 148|ppo_ep: 1|act_loss: 0.0291748046875|cri_loss: 0.0229949951171875|unsuper_loss: 0.0
+average reward score: 5.375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.05%) |Training time=0.51s (23.41%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.32
+[2023-04-14 08:52:56,981] [INFO] [logging.py:96:log_dist] [Rank 0] step=150, skipped=4, lr=[9.649243356924173e-06, 9.649243356924173e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 08:52:57,000] [INFO] [timer.py:199:stop] epoch=0/micro_step=150/global_step=150, RunningAvgSamplesPerSec=99.35793580311184, CurrSamplesPerSec=91.16621508445346, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 08:52:57,092] [INFO] [logging.py:96:log_dist] [Rank 0] step=150, skipped=4, lr=[4.99960795695553e-06, 4.99960795695553e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 149|ppo_ep: 1|act_loss: -0.01128387451171875|cri_loss: 0.0003204345703125|unsuper_loss: 0.0
+average reward score: 5.21484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.14%) |Training time=0.51s (23.33%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.32
+epoch: 0|step: 150|ppo_ep: 1|act_loss: 0.0914306640625|cri_loss: 0.050262451171875|unsuper_loss: 0.0
+average reward score: 5.7265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.35%) |Training time=0.51s (23.07%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.32
+epoch: 0|step: 151|ppo_ep: 1|act_loss: 0.03863525390625|cri_loss: 0.022003173828125|unsuper_loss: 0.0
+average reward score: 4.93359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.25%) |Training time=0.51s (23.04%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.32
+epoch: 0|step: 152|ppo_ep: 1|act_loss: -0.0318603515625|cri_loss: -0.01226806640625|unsuper_loss: 0.0
+average reward score: 5.48828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.73%) |Training time=0.50s (22.70%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.33
+epoch: 0|step: 153|ppo_ep: 1|act_loss: 0.026123046875|cri_loss: 0.0231781005859375|unsuper_loss: 0.0
+average reward score: 6.06640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.58%) |Training time=0.50s (22.94%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.33
+epoch: 0|step: 154|ppo_ep: 1|act_loss: -0.1361083984375|cri_loss: -0.059722900390625|unsuper_loss: 0.0
+average reward score: 6.58984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.38s |Gather latency=0.00s (0.00%) |Generate time=1.77s (74.35%) |Training time=0.51s (21.45%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.46 |AvgSamplesPerSec=14.32
+epoch: 0|step: 155|ppo_ep: 1|act_loss: 0.053131103515625|cri_loss: 0.0291748046875|unsuper_loss: 0.0
+average reward score: 6.1796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.24%) |Training time=0.51s (23.20%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.32
+epoch: 0|step: 156|ppo_ep: 1|act_loss: 0.0271148681640625|cri_loss: 0.018280029296875|unsuper_loss: 0.0
+average reward score: 5.79296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.74%) |Training time=0.53s (23.75%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.32
+epoch: 0|step: 157|ppo_ep: 1|act_loss: 0.05975341796875|cri_loss: 0.036865234375|unsuper_loss: 0.0
+average reward score: 6.02734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.59s (68.33%) |Training time=0.50s (21.58%) |Others=0.23 (10.09%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.32
+epoch: 0|step: 158|ppo_ep: 1|act_loss: 0.025848388671875|cri_loss: 0.0191802978515625|unsuper_loss: 0.0
+average reward score: 5.90625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.08%) |Training time=0.51s (23.36%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.32
+[2023-04-14 08:53:19,249] [INFO] [logging.py:96:log_dist] [Rank 0] step=160, skipped=4, lr=[9.648878637622726e-06, 9.648878637622726e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 08:53:19,268] [INFO] [timer.py:199:stop] epoch=0/micro_step=160/global_step=160, RunningAvgSamplesPerSec=98.90795954004966, CurrSamplesPerSec=93.22086815920744, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 08:53:19,360] [INFO] [logging.py:96:log_dist] [Rank 0] step=160, skipped=4, lr=[4.99941898322421e-06, 4.99941898322421e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 159|ppo_ep: 1|act_loss: 0.0384521484375|cri_loss: 0.0228424072265625|unsuper_loss: 0.0
+average reward score: 7.49609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.33%) |Training time=0.51s (23.12%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.32
+epoch: 0|step: 160|ppo_ep: 1|act_loss: 0.0286102294921875|cri_loss: 0.020538330078125|unsuper_loss: 0.0
+average reward score: 5.2734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.16%) |Training time=0.51s (23.28%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.32
+epoch: 0|step: 161|ppo_ep: 1|act_loss: -0.0103302001953125|cri_loss: -0.00080108642578125|unsuper_loss: 0.0
+average reward score: 5.40234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.35%) |Training time=0.51s (23.17%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.33
+epoch: 0|step: 162|ppo_ep: 1|act_loss: 0.14404296875|cri_loss: 0.08203125|unsuper_loss: 0.0
+average reward score: 6.02734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.39%) |Training time=0.50s (23.04%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.33
+epoch: 0|step: 163|ppo_ep: 1|act_loss: 0.0390625|cri_loss: 0.0228118896484375|unsuper_loss: 0.0
+average reward score: 5.38671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.72%) |Training time=0.50s (22.77%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.33
+epoch: 0|step: 164|ppo_ep: 1|act_loss: -0.00506591796875|cri_loss: 0.0015716552734375|unsuper_loss: 0.0
+average reward score: 5.30859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.39%) |Training time=0.50s (23.05%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.33
+epoch: 0|step: 165|ppo_ep: 1|act_loss: -0.0205078125|cri_loss: -0.007106781005859375|unsuper_loss: 0.0
+average reward score: 5.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.27%) |Training time=0.51s (23.17%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.33
+epoch: 0|step: 166|ppo_ep: 1|act_loss: 0.03875732421875|cri_loss: 0.021942138671875|unsuper_loss: 0.0
+average reward score: 5.3671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.58%) |Training time=0.50s (22.84%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.34
+epoch: 0|step: 167|ppo_ep: 1|act_loss: -0.048583984375|cri_loss: -0.0202789306640625|unsuper_loss: 0.0
+average reward score: 4.83984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.05%) |Training time=0.49s (22.47%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.34
+epoch: 0|step: 168|ppo_ep: 1|act_loss: 0.0002899169921875|cri_loss: 0.0030384063720703125|unsuper_loss: 0.0
+average reward score: 5.38671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.58%) |Training time=0.50s (22.31%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.39 |AvgSamplesPerSec=14.34
+[2023-04-14 08:53:41,327] [INFO] [logging.py:96:log_dist] [Rank 0] step=170, skipped=4, lr=[9.648442416802894e-06, 9.648442416802894e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 08:53:41,345] [INFO] [timer.py:199:stop] epoch=0/micro_step=170/global_step=170, RunningAvgSamplesPerSec=98.61371514796177, CurrSamplesPerSec=93.73256923212298, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 08:53:41,442] [INFO] [logging.py:96:log_dist] [Rank 0] step=170, skipped=4, lr=[4.999192962074038e-06, 4.999192962074038e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 169|ppo_ep: 1|act_loss: 0.0283203125|cri_loss: 0.018218994140625|unsuper_loss: 0.0
+average reward score: 5.25
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.29%) |Training time=0.50s (21.39%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.58 |AvgSamplesPerSec=14.33
+epoch: 0|step: 170|ppo_ep: 1|act_loss: -0.010101318359375|cri_loss: -0.0031566619873046875|unsuper_loss: 0.0
+average reward score: 5.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.03%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.34
+epoch: 0|step: 171|ppo_ep: 1|act_loss: -0.04736328125|cri_loss: -0.0209808349609375|unsuper_loss: 0.0
+average reward score: 5.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.03%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.34
+epoch: 0|step: 172|ppo_ep: 1|act_loss: -0.06488037109375|cri_loss: -0.029296875|unsuper_loss: 0.0
+average reward score: 5.10546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.40%) |Training time=0.47s (21.95%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.34
+epoch: 0|step: 173|ppo_ep: 1|act_loss: -0.0254974365234375|cri_loss: -0.0096588134765625|unsuper_loss: 0.0
+average reward score: 5.57421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.91%) |Training time=0.49s (22.49%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
+epoch: 0|step: 174|ppo_ep: 1|act_loss: -0.0384521484375|cri_loss: -0.0167694091796875|unsuper_loss: 0.0
+average reward score: 5.47265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.74%) |Training time=0.47s (21.59%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.35
+epoch: 0|step: 175|ppo_ep: 1|act_loss: -0.004512786865234375|cri_loss: -0.000179290771484375|unsuper_loss: 0.0
+average reward score: 5.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.89%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.35
+epoch: 0|step: 176|ppo_ep: 1|act_loss: 0.031982421875|cri_loss: 0.019287109375|unsuper_loss: 0.0
+average reward score: 5.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.90%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.35
+epoch: 0|step: 177|ppo_ep: 1|act_loss: 0.0201873779296875|cri_loss: 0.01238250732421875|unsuper_loss: 0.0
+average reward score: 5.04296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.62%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.36
+epoch: 0|step: 178|ppo_ep: 1|act_loss: 0.0927734375|cri_loss: 0.04998779296875|unsuper_loss: 0.0
+average reward score: 5.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.40s |Gather latency=0.00s (0.00%) |Generate time=1.58s (66.01%) |Training time=0.49s (20.57%) |Others=0.32 (13.42%)|CurSamplesPerSec=13.36 |AvgSamplesPerSec=14.35
+[2023-04-14 08:54:03,202] [INFO] [logging.py:96:log_dist] [Rank 0] step=180, skipped=4, lr=[9.647934700930525e-06, 9.647934700930525e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 08:54:03,220] [INFO] [timer.py:199:stop] epoch=0/micro_step=180/global_step=180, RunningAvgSamplesPerSec=98.71604805374989, CurrSamplesPerSec=87.31243836243797, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 08:54:03,313] [INFO] [logging.py:96:log_dist] [Rank 0] step=180, skipped=4, lr=[4.998929896855195e-06, 4.998929896855195e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 179|ppo_ep: 1|act_loss: 0.01641845703125|cri_loss: 0.0104217529296875|unsuper_loss: 0.0
+average reward score: 5.8515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.49%) |Training time=0.53s (23.88%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.42 |AvgSamplesPerSec=14.35
+epoch: 0|step: 180|ppo_ep: 1|act_loss: -0.001728057861328125|cri_loss: 0.001316070556640625|unsuper_loss: 0.0
+average reward score: 5.23046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.03%) |Training time=0.50s (22.31%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.40 |AvgSamplesPerSec=14.35
+epoch: 0|step: 181|ppo_ep: 1|act_loss: -0.0086212158203125|cri_loss: -0.0017242431640625|unsuper_loss: 0.0
+average reward score: 5.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.65s (73.73%) |Training time=0.48s (21.66%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.32 |AvgSamplesPerSec=14.35
+epoch: 0|step: 182|ppo_ep: 1|act_loss: 0.02783203125|cri_loss: 0.0161590576171875|unsuper_loss: 0.0
+average reward score: 5.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.40%) |Training time=0.44s (19.93%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.35
+epoch: 0|step: 183|ppo_ep: 1|act_loss: -0.01708984375|cri_loss: -0.005275726318359375|unsuper_loss: 0.0
+average reward score: 5.3515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.05%) |Training time=0.50s (22.47%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.51 |AvgSamplesPerSec=14.35
+epoch: 0|step: 184|ppo_ep: 1|act_loss: -0.0195159912109375|cri_loss: -0.008209228515625|unsuper_loss: 0.0
+average reward score: 4.60546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.38s |Gather latency=0.00s (0.00%) |Generate time=1.82s (76.48%) |Training time=0.46s (19.21%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.47 |AvgSamplesPerSec=14.35
+epoch: 0|step: 185|ppo_ep: 1|act_loss: -0.00017547607421875|cri_loss: 0.0018596649169921875|unsuper_loss: 0.0
+average reward score: 5.6640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.95%) |Training time=0.45s (20.43%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.35
+[2023-04-14 08:54:18,825] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, but hysteresis is 2. Reducing hysteresis to 1
+[2023-04-14 08:54:18,911] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 186|ppo_ep: 1|act_loss: -0.01111602783203125|cri_loss: -0.004123687744140625|unsuper_loss: 0.0
+average reward score: 4.5703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.23%) |Training time=0.42s (19.42%) |Others=0.09 (4.35%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.35
+epoch: 0|step: 187|ppo_ep: 1|act_loss: -0.0054168701171875|cri_loss: -0.000263214111328125|unsuper_loss: 0.0
+average reward score: 4.9609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.84%) |Training time=0.47s (21.53%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.35
+epoch: 0|step: 188|ppo_ep: 1|act_loss: 0.01232147216796875|cri_loss: 0.01074981689453125|unsuper_loss: 0.0
+average reward score: 6.3125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.84%) |Training time=0.47s (21.45%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.35
+[2023-04-14 08:54:25,388] [INFO] [logging.py:96:log_dist] [Rank 0] step=190, skipped=5, lr=[9.647416634573466e-06, 9.647416634573466e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 08:54:25,406] [INFO] [timer.py:199:stop] epoch=0/micro_step=190/global_step=190, RunningAvgSamplesPerSec=99.07856090082578, CurrSamplesPerSec=102.86956528403809, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 08:54:25,499] [INFO] [logging.py:96:log_dist] [Rank 0] step=190, skipped=5, lr=[4.998661468690914e-06, 4.998661468690914e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 189|ppo_ep: 1|act_loss: -0.072509765625|cri_loss: -0.030487060546875|unsuper_loss: 0.0
+average reward score: 6.27734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.93%) |Training time=0.47s (21.60%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.35
+epoch: 0|step: 190|ppo_ep: 1|act_loss: -0.04168701171875|cri_loss: -0.017730712890625|unsuper_loss: 0.0
+average reward score: 5.90625
+-------------------------------------------------------------------------------------
+|E2E latency=2.84s |Gather latency=0.00s (0.00%) |Generate time=1.62s (57.15%) |Training time=0.47s (16.46%) |Others=0.75 (26.39%)|CurSamplesPerSec=11.28 |AvgSamplesPerSec=14.33
+epoch: 0|step: 191|ppo_ep: 1|act_loss: 0.0439453125|cri_loss: 0.028289794921875|unsuper_loss: 0.0
+average reward score: 6.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.23%) |Training time=0.46s (21.21%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.33
+epoch: 0|step: 192|ppo_ep: 1|act_loss: 0.000202178955078125|cri_loss: 0.003143310546875|unsuper_loss: 0.0
+average reward score: 5.27734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.94%) |Training time=0.47s (21.45%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.34
+epoch: 0|step: 193|ppo_ep: 1|act_loss: 0.0016498565673828125|cri_loss: 0.00395965576171875|unsuper_loss: 0.0
+average reward score: 6.703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.34%) |Training time=0.46s (21.02%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.34
+epoch: 0|step: 194|ppo_ep: 1|act_loss: -0.04425048828125|cri_loss: -0.016937255859375|unsuper_loss: 0.0
+average reward score: 6.85546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.45%) |Training time=0.46s (20.94%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.34
+epoch: 0|step: 195|ppo_ep: 1|act_loss: -0.0312347412109375|cri_loss: -0.0128936767578125|unsuper_loss: 0.0
+average reward score: 6.02734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.63%) |Training time=0.46s (20.86%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.34
+epoch: 0|step: 196|ppo_ep: 1|act_loss: 0.04901123046875|cri_loss: 0.030487060546875|unsuper_loss: 0.0
+average reward score: 6.328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.86%) |Training time=0.43s (19.50%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.34
+epoch: 0|step: 197|ppo_ep: 1|act_loss: 0.0048675537109375|cri_loss: 0.004283905029296875|unsuper_loss: 0.0
+average reward score: 7.109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.70s (73.37%) |Training time=0.51s (22.19%) |Others=0.10 (4.44%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.34
+epoch: 0|step: 198|ppo_ep: 1|act_loss: 0.027496337890625|cri_loss: 0.01763916015625|unsuper_loss: 0.0
+average reward score: 6.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.62s (72.68%) |Training time=0.47s (20.95%) |Others=0.14 (6.38%)|CurSamplesPerSec=14.33 |AvgSamplesPerSec=14.34
+[2023-04-14 08:54:48,213] [INFO] [logging.py:96:log_dist] [Rank 0] step=200, skipped=5, lr=[9.646773099710006e-06, 9.646773099710006e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 08:54:48,231] [INFO] [timer.py:199:stop] epoch=0/micro_step=200/global_step=200, RunningAvgSamplesPerSec=99.47785048462806, CurrSamplesPerSec=113.295647388129, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 08:54:48,323] [INFO] [logging.py:96:log_dist] [Rank 0] step=200, skipped=5, lr=[4.998328030937827e-06, 4.998328030937827e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 199|ppo_ep: 1|act_loss: -0.10589599609375|cri_loss: -0.049896240234375|unsuper_loss: 0.0
+average reward score: 6.046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.76s (76.27%) |Training time=0.45s (19.35%) |Others=0.10 (4.38%)|CurSamplesPerSec=13.90 |AvgSamplesPerSec=14.34
+epoch: 0|step: 200|ppo_ep: 1|act_loss: 0.00897216796875|cri_loss: 0.0068206787109375|unsuper_loss: 0.0
+average reward score: 5.69140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.10%) |Training time=0.47s (21.29%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.34
+epoch: 0|step: 201|ppo_ep: 1|act_loss: 9.5367431640625e-07|cri_loss: 0.0008797645568847656|unsuper_loss: 0.0
+average reward score: 5.99609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.91%) |Training time=0.47s (21.50%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.34
+epoch: 0|step: 202|ppo_ep: 1|act_loss: -0.0102386474609375|cri_loss: -0.003673553466796875|unsuper_loss: 0.0
+average reward score: 5.32421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.84%) |Training time=0.47s (21.48%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.34
+epoch: 0|step: 203|ppo_ep: 1|act_loss: -0.0355224609375|cri_loss: -0.014923095703125|unsuper_loss: 0.0
+average reward score: 4.9296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.86%) |Training time=0.48s (21.66%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.34
+epoch: 0|step: 204|ppo_ep: 1|act_loss: -0.04193115234375|cri_loss: -0.0184326171875|unsuper_loss: 0.0
+average reward score: 5.953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.88%) |Training time=0.47s (21.52%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.34
+epoch: 0|step: 205|ppo_ep: 1|act_loss: 0.03240966796875|cri_loss: 0.0181121826171875|unsuper_loss: 0.0
+average reward score: 5.54296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.62s |Gather latency=0.00s (0.00%) |Generate time=1.63s (62.21%) |Training time=0.47s (17.92%) |Others=0.52 (19.87%)|CurSamplesPerSec=12.23 |AvgSamplesPerSec=14.33
+epoch: 0|step: 206|ppo_ep: 1|act_loss: 0.05206298828125|cri_loss: 0.0300445556640625|unsuper_loss: 0.0
+average reward score: 5.921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.11%) |Training time=0.47s (21.30%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.33
+epoch: 0|step: 207|ppo_ep: 1|act_loss: -0.00235748291015625|cri_loss: 0.000972747802734375|unsuper_loss: 0.0
+average reward score: 5.6015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.94%) |Training time=0.47s (21.55%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.33
+epoch: 0|step: 208|ppo_ep: 1|act_loss: 0.000396728515625|cri_loss: 0.006542205810546875|unsuper_loss: 0.0
+average reward score: 6.625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.83%) |Training time=0.47s (21.59%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.33
+[2023-04-14 08:55:10,546] [INFO] [logging.py:96:log_dist] [Rank 0] step=210, skipped=5, lr=[9.646058094537316e-06, 9.646058094537316e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 08:55:10,564] [INFO] [timer.py:199:stop] epoch=0/micro_step=210/global_step=210, RunningAvgSamplesPerSec=99.69265405081147, CurrSamplesPerSec=104.76069635587082, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 08:55:10,657] [INFO] [logging.py:96:log_dist] [Rank 0] step=210, skipped=5, lr=[4.997957561936433e-06, 4.997957561936433e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 209|ppo_ep: 1|act_loss: -0.060089111328125|cri_loss: -0.027008056640625|unsuper_loss: 0.0
+average reward score: 5.8125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.15%) |Training time=0.47s (21.36%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.34
+epoch: 0|step: 210|ppo_ep: 1|act_loss: -0.004329681396484375|cri_loss: 0.000911712646484375|unsuper_loss: 0.0
+average reward score: 6.90625
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.43%) |Training time=0.48s (21.94%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.51 |AvgSamplesPerSec=14.34
+epoch: 0|step: 211|ppo_ep: 1|act_loss: -0.03790283203125|cri_loss: -0.016845703125|unsuper_loss: 0.0
+average reward score: 7.890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.05%) |Training time=0.45s (20.46%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.34
+epoch: 0|step: 212|ppo_ep: 1|act_loss: -0.022186279296875|cri_loss: -0.0086212158203125|unsuper_loss: 0.0
+average reward score: 6.6640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.46%) |Training time=0.46s (20.99%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.34
+epoch: 0|step: 213|ppo_ep: 1|act_loss: 0.007564544677734375|cri_loss: 0.007236480712890625|unsuper_loss: 0.0
+average reward score: 5.8203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.07%) |Training time=0.47s (19.74%) |Others=0.10 (4.18%)|CurSamplesPerSec=13.58 |AvgSamplesPerSec=14.33
+epoch: 0|step: 214|ppo_ep: 1|act_loss: 0.022003173828125|cri_loss: 0.01690673828125|unsuper_loss: 0.0
+average reward score: 6.3515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.98%) |Training time=0.47s (21.41%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.34
+epoch: 0|step: 215|ppo_ep: 1|act_loss: -0.10302734375|cri_loss: -0.040618896484375|unsuper_loss: 0.0
+average reward score: 5.890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.26%) |Training time=0.46s (21.16%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.34
+epoch: 0|step: 216|ppo_ep: 1|act_loss: -0.0308685302734375|cri_loss: -0.0083160400390625|unsuper_loss: 0.0
+average reward score: 5.8828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.06%) |Training time=0.46s (21.18%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.34
+epoch: 0|step: 217|ppo_ep: 1|act_loss: 0.0677490234375|cri_loss: 0.042144775390625|unsuper_loss: 0.0
+average reward score: 5.3671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.99%) |Training time=0.47s (21.39%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.34
+epoch: 0|step: 218|ppo_ep: 1|act_loss: 0.1029052734375|cri_loss: 0.0599365234375|unsuper_loss: 0.0
+average reward score: 6.484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.53%) |Training time=0.48s (21.86%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.34
+[2023-04-14 08:55:32,679] [INFO] [logging.py:96:log_dist] [Rank 0] step=220, skipped=5, lr=[9.645271629653494e-06, 9.645271629653494e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 08:55:32,901] [INFO] [timer.py:199:stop] epoch=0/micro_step=220/global_step=220, RunningAvgSamplesPerSec=99.60717426750885, CurrSamplesPerSec=60.76466806591054, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 08:55:32,994] [INFO] [logging.py:96:log_dist] [Rank 0] step=220, skipped=5, lr=[4.9975500671779765e-06, 4.9975500671779765e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 219|ppo_ep: 1|act_loss: 0.13671875|cri_loss: 0.07745361328125|unsuper_loss: 0.0
+average reward score: 6.9453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.41s |Gather latency=0.00s (0.00%) |Generate time=1.62s (67.15%) |Training time=0.69s (28.68%) |Others=0.10 (4.17%)|CurSamplesPerSec=13.30 |AvgSamplesPerSec=14.34
+epoch: 0|step: 220|ppo_ep: 1|act_loss: 0.0733642578125|cri_loss: 0.04412841796875|unsuper_loss: 0.0
+average reward score: 7.16796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.43%) |Training time=0.46s (20.97%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.34
+[2023-04-14 08:55:37,263] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192
+epoch: 0|step: 221|ppo_ep: 1|act_loss: -0.236328125|cri_loss: -0.09283447265625|unsuper_loss: 0.0
+average reward score: 5.28125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.44%) |Training time=0.43s (19.93%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.34
+epoch: 0|step: 222|ppo_ep: 1|act_loss: -0.042388916015625|cri_loss: -0.016998291015625|unsuper_loss: 0.0
+average reward score: 6.92578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.33%) |Training time=0.46s (21.08%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.34
+epoch: 0|step: 223|ppo_ep: 1|act_loss: -0.0716552734375|cri_loss: -0.0293731689453125|unsuper_loss: 0.0
+average reward score: 5.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.29%) |Training time=0.47s (21.22%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.34
+epoch: 0|step: 224|ppo_ep: 1|act_loss: 0.0211181640625|cri_loss: 0.01495361328125|unsuper_loss: 0.0
+average reward score: 7.453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.36%) |Training time=0.46s (21.05%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.34
+epoch: 0|step: 225|ppo_ep: 1|act_loss: 0.139404296875|cri_loss: 0.08172607421875|unsuper_loss: 0.0
+average reward score: 6.4296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.61%) |Training time=0.47s (21.21%) |Others=0.12 (5.18%)|CurSamplesPerSec=14.36 |AvgSamplesPerSec=14.34
+epoch: 0|step: 226|ppo_ep: 1|act_loss: 0.04437255859375|cri_loss: 0.02532958984375|unsuper_loss: 0.0
+average reward score: 4.3671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.76s (76.61%) |Training time=0.44s (18.98%) |Others=0.10 (4.41%)|CurSamplesPerSec=13.94 |AvgSamplesPerSec=14.34
+epoch: 0|step: 227|ppo_ep: 1|act_loss: -0.06201171875|cri_loss: -0.0190582275390625|unsuper_loss: 0.0
+average reward score: 5.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.41%) |Training time=0.49s (22.11%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.34
+epoch: 0|step: 228|ppo_ep: 1|act_loss: -0.092041015625|cri_loss: -0.03985595703125|unsuper_loss: 0.0
+average reward score: 5.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.62s (70.56%) |Training time=0.49s (21.35%) |Others=0.19 (8.08%)|CurSamplesPerSec=13.92 |AvgSamplesPerSec=14.34
+[2023-04-14 08:55:55,060] [INFO] [logging.py:96:log_dist] [Rank 0] step=230, skipped=6, lr=[9.64450272281792e-06, 9.64450272281792e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 08:55:55,078] [INFO] [timer.py:199:stop] epoch=0/micro_step=230/global_step=230, RunningAvgSamplesPerSec=99.88061376855845, CurrSamplesPerSec=101.70929986715883, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 08:55:55,171] [INFO] [logging.py:96:log_dist] [Rank 0] step=230, skipped=5, lr=[4.997105552702513e-06, 4.997105552702513e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 229|ppo_ep: 1|act_loss: 0.03997802734375|cri_loss: 0.023681640625|unsuper_loss: 0.0
+average reward score: 5.828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.66%) |Training time=0.48s (21.68%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.34
+epoch: 0|step: 230|ppo_ep: 1|act_loss: 0.036895751953125|cri_loss: 0.02520751953125|unsuper_loss: 0.0
+average reward score: 4.91015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.57%) |Training time=0.48s (21.84%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.51 |AvgSamplesPerSec=14.34
+epoch: 0|step: 231|ppo_ep: 1|act_loss: 0.041748046875|cri_loss: 0.02581787109375|unsuper_loss: 0.0
+average reward score: 4.92578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.91%) |Training time=0.47s (21.52%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.34
+epoch: 0|step: 232|ppo_ep: 1|act_loss: 0.1104736328125|cri_loss: 0.0606689453125|unsuper_loss: 0.0
+average reward score: 5.625
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.38%) |Training time=0.49s (22.09%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.43 |AvgSamplesPerSec=14.34
+epoch: 0|step: 233|ppo_ep: 1|act_loss: 0.022430419921875|cri_loss: 0.0141143798828125|unsuper_loss: 0.0
+average reward score: 4.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.80%) |Training time=0.48s (21.73%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.34
+epoch: 0|step: 234|ppo_ep: 1|act_loss: -0.097412109375|cri_loss: -0.044830322265625|unsuper_loss: 0.0
+average reward score: 5.31640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.62s (69.95%) |Training time=0.48s (20.82%) |Others=0.21 (9.22%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.34
+epoch: 0|step: 235|ppo_ep: 1|act_loss: 0.1146240234375|cri_loss: 0.08062744140625|unsuper_loss: 0.0
+average reward score: 4.8515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.78%) |Training time=0.48s (21.63%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.34
+epoch: 0|step: 236|ppo_ep: 1|act_loss: 0.098876953125|cri_loss: 0.0562744140625|unsuper_loss: 0.0
+average reward score: 4.9140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.00%) |Training time=0.47s (21.42%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.34
+epoch: 0|step: 237|ppo_ep: 1|act_loss: 0.246826171875|cri_loss: 0.1407470703125|unsuper_loss: 0.0
+average reward score: 4.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.42%) |Training time=0.49s (22.03%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.34
+epoch: 0|step: 238|ppo_ep: 1|act_loss: -0.043609619140625|cri_loss: -0.010223388671875|unsuper_loss: 0.0
+average reward score: 4.8359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.62%) |Training time=0.48s (21.80%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.34
+[2023-04-14 08:56:17,202] [INFO] [logging.py:96:log_dist] [Rank 0] step=240, skipped=6, lr=[9.643580517474126e-06, 9.643580517474126e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 08:56:17,221] [INFO] [timer.py:199:stop] epoch=0/micro_step=240/global_step=240, RunningAvgSamplesPerSec=99.92462431230227, CurrSamplesPerSec=100.35225418814385, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 08:56:17,314] [INFO] [logging.py:96:log_dist] [Rank 0] step=240, skipped=5, lr=[4.996624025098819e-06, 4.996624025098819e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 239|ppo_ep: 1|act_loss: -0.1341552734375|cri_loss: -0.050537109375|unsuper_loss: 0.0
+average reward score: 4.5546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.61%) |Training time=0.48s (21.84%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.34
+epoch: 0|step: 240|ppo_ep: 1|act_loss: -0.0472412109375|cri_loss: -0.0107421875|unsuper_loss: 0.0
+average reward score: 4.12109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.17%) |Training time=0.46s (20.69%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.46 |AvgSamplesPerSec=14.34
+epoch: 0|step: 241|ppo_ep: 1|act_loss: -0.02374267578125|cri_loss: -0.00562286376953125|unsuper_loss: 0.0
+average reward score: 3.810546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.20%) |Training time=0.47s (21.24%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.35
+epoch: 0|step: 242|ppo_ep: 1|act_loss: 0.135498046875|cri_loss: 0.0750732421875|unsuper_loss: 0.0
+average reward score: 3.974609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.54%) |Training time=0.48s (21.88%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.35
+epoch: 0|step: 243|ppo_ep: 1|act_loss: 0.01165008544921875|cri_loss: 0.016204833984375|unsuper_loss: 0.0
+average reward score: 4.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.81s (76.71%) |Training time=0.45s (19.00%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.53 |AvgSamplesPerSec=14.34
+epoch: 0|step: 244|ppo_ep: 1|act_loss: 0.11224365234375|cri_loss: 0.061309814453125|unsuper_loss: 0.0
+average reward score: 4.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.90%) |Training time=0.47s (21.52%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.34
+epoch: 0|step: 245|ppo_ep: 1|act_loss: 0.07666015625|cri_loss: 0.047027587890625|unsuper_loss: 0.0
+average reward score: 4.1640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.58%) |Training time=0.48s (21.85%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.34
+epoch: 0|step: 246|ppo_ep: 1|act_loss: -0.057037353515625|cri_loss: -0.0242919921875|unsuper_loss: 0.0
+average reward score: 4.9375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.49%) |Training time=0.48s (21.94%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.34
+epoch: 0|step: 247|ppo_ep: 1|act_loss: -0.036651611328125|cri_loss: -0.016265869140625|unsuper_loss: 0.0
+average reward score: 4.28125
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.86%) |Training time=0.48s (21.66%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.35
+epoch: 0|step: 248|ppo_ep: 1|act_loss: 0.031280517578125|cri_loss: 0.019378662109375|unsuper_loss: 0.0
+average reward score: 4.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.43%) |Training time=0.49s (22.01%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.35
+[2023-04-14 08:56:39,389] [INFO] [logging.py:96:log_dist] [Rank 0] step=250, skipped=6, lr=[9.64258688914287e-06, 9.64258688914287e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 08:56:39,927] [INFO] [timer.py:199:stop] epoch=0/micro_step=250/global_step=250, RunningAvgSamplesPerSec=99.39271434606478, CurrSamplesPerSec=38.278583366131066, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 08:56:40,020] [INFO] [logging.py:96:log_dist] [Rank 0] step=250, skipped=5, lr=[4.996105491504296e-06, 4.996105491504296e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 249|ppo_ep: 1|act_loss: 0.09033203125|cri_loss: 0.050537109375|unsuper_loss: 0.0
+average reward score: 4.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.72s |Gather latency=0.00s (0.00%) |Generate time=1.62s (59.51%) |Training time=1.00s (36.79%) |Others=0.10 (3.71%)|CurSamplesPerSec=11.78 |AvgSamplesPerSec=14.33
+epoch: 0|step: 250|ppo_ep: 1|act_loss: -0.1090087890625|cri_loss: -0.0499267578125|unsuper_loss: 0.0
+average reward score: 4.80078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.45%) |Training time=0.48s (21.98%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.33
+epoch: 0|step: 251|ppo_ep: 1|act_loss: -0.18017578125|cri_loss: -0.0765380859375|unsuper_loss: 0.0
+average reward score: 4.68359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.78%) |Training time=0.45s (20.68%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.34
+epoch: 0|step: 252|ppo_ep: 1|act_loss: -0.17724609375|cri_loss: -0.08209228515625|unsuper_loss: 0.0
+average reward score: 4.9375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.97%) |Training time=0.47s (21.42%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.34
+epoch: 0|step: 253|ppo_ep: 1|act_loss: -0.12353515625|cri_loss: -0.0531005859375|unsuper_loss: 0.0
+average reward score: 5.26171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.17%) |Training time=0.46s (21.22%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.34
+epoch: 0|step: 254|ppo_ep: 1|act_loss: 0.2373046875|cri_loss: 0.14013671875|unsuper_loss: 0.0
+average reward score: 4.77734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.13%) |Training time=0.44s (20.23%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.34
+epoch: 0|step: 255|ppo_ep: 1|act_loss: -0.028411865234375|cri_loss: -0.00667572021484375|unsuper_loss: 0.0
+average reward score: 4.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.76s (77.44%) |Training time=0.41s (18.18%) |Others=0.10 (4.38%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.34
+epoch: 0|step: 256|ppo_ep: 1|act_loss: 0.0565185546875|cri_loss: 0.037017822265625|unsuper_loss: 0.0
+average reward score: 4.72265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.97%) |Training time=0.42s (19.35%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.34
+epoch: 0|step: 257|ppo_ep: 1|act_loss: 0.1236572265625|cri_loss: 0.067138671875|unsuper_loss: 0.0
+average reward score: 4.89453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.08%) |Training time=0.42s (19.34%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.34
+epoch: 0|step: 258|ppo_ep: 1|act_loss: 0.1390380859375|cri_loss: 0.0894775390625|unsuper_loss: 0.0
+average reward score: 4.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.31%) |Training time=0.43s (20.04%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.34
+[2023-04-14 08:57:01,938] [INFO] [logging.py:96:log_dist] [Rank 0] step=260, skipped=6, lr=[9.64152185255212e-06, 9.64152185255212e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 08:57:01,956] [INFO] [timer.py:199:stop] epoch=0/micro_step=260/global_step=260, RunningAvgSamplesPerSec=99.89679140432641, CurrSamplesPerSec=114.06243881213967, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 08:57:02,050] [INFO] [logging.py:96:log_dist] [Rank 0] step=260, skipped=5, lr=[4.9955499596048615e-06, 4.9955499596048615e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 259|ppo_ep: 1|act_loss: 0.0908203125|cri_loss: 0.06024169921875|unsuper_loss: 0.0
+average reward score: 4.9296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.80s (76.75%) |Training time=0.45s (19.04%) |Others=0.10 (4.21%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.34
+epoch: 0|step: 260|ppo_ep: 1|act_loss: 0.124267578125|cri_loss: 0.065673828125|unsuper_loss: 0.0
+average reward score: 5.125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.42%) |Training time=0.46s (20.96%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.34
+epoch: 0|step: 261|ppo_ep: 1|act_loss: 0.03094482421875|cri_loss: 0.02093505859375|unsuper_loss: 0.0
+average reward score: 4.8515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.58%) |Training time=0.45s (20.79%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.34
+epoch: 0|step: 262|ppo_ep: 1|act_loss: -0.03643798828125|cri_loss: -0.0055389404296875|unsuper_loss: 0.0
+average reward score: 4.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.85%) |Training time=0.44s (20.49%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.35
+epoch: 0|step: 263|ppo_ep: 1|act_loss: -0.098388671875|cri_loss: -0.0452880859375|unsuper_loss: 0.0
+average reward score: 4.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.06%) |Training time=0.44s (20.24%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.35
+epoch: 0|step: 264|ppo_ep: 1|act_loss: -0.1561279296875|cri_loss: -0.04656982421875|unsuper_loss: 0.0
+average reward score: 4.61328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.59%) |Training time=0.45s (20.75%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.35
+epoch: 0|step: 265|ppo_ep: 1|act_loss: -0.072021484375|cri_loss: -0.0289306640625|unsuper_loss: 0.0
+average reward score: 4.859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.72s |Gather latency=0.00s (0.00%) |Generate time=1.63s (60.04%) |Training time=0.45s (16.66%) |Others=0.63 (23.30%)|CurSamplesPerSec=11.76 |AvgSamplesPerSec=14.34
+epoch: 0|step: 266|ppo_ep: 1|act_loss: 0.00478363037109375|cri_loss: 0.006679534912109375|unsuper_loss: 0.0
+average reward score: 4.609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.65%) |Training time=0.45s (20.69%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.34
+epoch: 0|step: 267|ppo_ep: 1|act_loss: 0.04937744140625|cri_loss: 0.029876708984375|unsuper_loss: 0.0
+average reward score: 4.15625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.83%) |Training time=0.45s (20.63%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.34
+epoch: 0|step: 268|ppo_ep: 1|act_loss: 0.050994873046875|cri_loss: 0.0362548828125|unsuper_loss: 0.0
+average reward score: 4.35546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.38%) |Training time=0.43s (19.99%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.34
+[2023-04-14 08:57:24,192] [INFO] [logging.py:96:log_dist] [Rank 0] step=270, skipped=6, lr=[9.640385423488292e-06, 9.640385423488292e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 08:57:24,211] [INFO] [timer.py:199:stop] epoch=0/micro_step=270/global_step=270, RunningAvgSamplesPerSec=100.33253846211156, CurrSamplesPerSec=118.06418606287716, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 08:57:24,304] [INFO] [logging.py:96:log_dist] [Rank 0] step=270, skipped=5, lr=[4.99495743763484e-06, 4.99495743763484e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 269|ppo_ep: 1|act_loss: -0.050506591796875|cri_loss: -0.022216796875|unsuper_loss: 0.0
+average reward score: 5.25390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.39%) |Training time=0.43s (20.00%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
+epoch: 0|step: 270|ppo_ep: 1|act_loss: 0.007083892822265625|cri_loss: 0.00634765625|unsuper_loss: 0.0
+average reward score: 5.26171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.63%) |Training time=0.43s (19.72%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
+epoch: 0|step: 271|ppo_ep: 1|act_loss: -0.02874755859375|cri_loss: -0.01068115234375|unsuper_loss: 0.0
+average reward score: 5.3828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.24%) |Training time=0.43s (19.78%) |Others=0.11 (4.98%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.35
+epoch: 0|step: 272|ppo_ep: 1|act_loss: 0.158203125|cri_loss: 0.0850830078125|unsuper_loss: 0.0
+average reward score: 4.30859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.42%) |Training time=0.46s (20.89%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.35
+epoch: 0|step: 273|ppo_ep: 1|act_loss: 0.04718017578125|cri_loss: 0.0275421142578125|unsuper_loss: 0.0
+average reward score: 4.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.38s |Gather latency=0.00s (0.00%) |Generate time=1.83s (76.95%) |Training time=0.45s (18.83%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.47 |AvgSamplesPerSec=14.34
+epoch: 0|step: 274|ppo_ep: 1|act_loss: -0.04803466796875|cri_loss: -0.0196685791015625|unsuper_loss: 0.0
+average reward score: 5.3671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.56%) |Training time=0.45s (20.64%) |Others=0.11 (4.80%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.34
+epoch: 0|step: 275|ppo_ep: 1|act_loss: 0.01389312744140625|cri_loss: 0.0127716064453125|unsuper_loss: 0.0
+average reward score: 4.5859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.88%) |Training time=0.48s (21.61%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.34
+epoch: 0|step: 276|ppo_ep: 1|act_loss: -0.10394287109375|cri_loss: -0.044525146484375|unsuper_loss: 0.0
+average reward score: 4.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.63s (69.59%) |Training time=0.47s (20.00%) |Others=0.24 (10.40%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.34
+epoch: 0|step: 277|ppo_ep: 1|act_loss: -0.0570068359375|cri_loss: -0.0216827392578125|unsuper_loss: 0.0
+average reward score: 4.35546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.06%) |Training time=0.45s (20.43%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.34
+epoch: 0|step: 278|ppo_ep: 1|act_loss: -0.020050048828125|cri_loss: -0.0082244873046875|unsuper_loss: 0.0
+average reward score: 4.79296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.66%) |Training time=0.48s (21.65%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.40 |AvgSamplesPerSec=14.34
+[2023-04-14 08:57:46,450] [INFO] [logging.py:96:log_dist] [Rank 0] step=280, skipped=6, lr=[9.639177618796e-06, 9.639177618796e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 08:57:46,468] [INFO] [timer.py:199:stop] epoch=0/micro_step=280/global_step=280, RunningAvgSamplesPerSec=100.64003665757109, CurrSamplesPerSec=108.39001360752977, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 08:57:46,561] [INFO] [logging.py:96:log_dist] [Rank 0] step=280, skipped=5, lr=[4.994327934376836e-06, 4.994327934376836e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 279|ppo_ep: 1|act_loss: 0.05682373046875|cri_loss: 0.0301666259765625|unsuper_loss: 0.0
+average reward score: 4.76953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.56%) |Training time=0.46s (20.92%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.34
+epoch: 0|step: 280|ppo_ep: 1|act_loss: 0.1710205078125|cri_loss: 0.101806640625|unsuper_loss: 0.0
+average reward score: 5.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.41%) |Training time=0.46s (20.99%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.34
+epoch: 0|step: 281|ppo_ep: 1|act_loss: 0.1187744140625|cri_loss: 0.06683349609375|unsuper_loss: 0.0
+average reward score: 5.09765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.98%) |Training time=0.47s (21.51%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.35
+epoch: 0|step: 282|ppo_ep: 1|act_loss: 0.095703125|cri_loss: 0.05072021484375|unsuper_loss: 0.0
+average reward score: 5.14453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.53%) |Training time=0.46s (20.85%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.35
+epoch: 0|step: 283|ppo_ep: 1|act_loss: -0.037811279296875|cri_loss: -0.017578125|unsuper_loss: 0.0
+average reward score: 4.9453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.13%) |Training time=0.47s (21.31%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.35
+epoch: 0|step: 284|ppo_ep: 1|act_loss: 0.009124755859375|cri_loss: 0.00556182861328125|unsuper_loss: 0.0
+average reward score: 5.03125
+-------------------------------------------------------------------------------------
+|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.73s (75.48%) |Training time=0.46s (20.10%) |Others=0.10 (4.41%)|CurSamplesPerSec=13.97 |AvgSamplesPerSec=14.35
+epoch: 0|step: 285|ppo_ep: 1|act_loss: -0.05279541015625|cri_loss: -0.0239105224609375|unsuper_loss: 0.0
+average reward score: 5.234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.38%) |Training time=0.44s (20.00%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.35
+epoch: 0|step: 286|ppo_ep: 1|act_loss: -0.0899658203125|cri_loss: -0.0411376953125|unsuper_loss: 0.0
+average reward score: 5.1796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.85%) |Training time=0.42s (19.49%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.35
+epoch: 0|step: 287|ppo_ep: 1|act_loss: -0.02752685546875|cri_loss: -0.01082611083984375|unsuper_loss: 0.0
+average reward score: 4.94140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.18%) |Training time=0.42s (19.26%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.35
+epoch: 0|step: 288|ppo_ep: 1|act_loss: -0.000614166259765625|cri_loss: 0.00179290771484375|unsuper_loss: 0.0
+average reward score: 4.75390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.21%) |Training time=0.43s (18.46%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.70 |AvgSamplesPerSec=14.35
+[2023-04-14 08:58:08,558] [INFO] [logging.py:96:log_dist] [Rank 0] step=290, skipped=6, lr=[9.637898456377828e-06, 9.637898456377828e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 08:58:08,576] [INFO] [timer.py:199:stop] epoch=0/micro_step=290/global_step=290, RunningAvgSamplesPerSec=101.01194688929871, CurrSamplesPerSec=112.96647476685857, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 08:58:08,668] [INFO] [logging.py:96:log_dist] [Rank 0] step=290, skipped=5, lr=[4.993661459161605e-06, 4.993661459161605e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 289|ppo_ep: 1|act_loss: -0.08251953125|cri_loss: -0.0379638671875|unsuper_loss: 0.0
+average reward score: 4.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.91%) |Training time=0.45s (20.57%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.35
+epoch: 0|step: 290|ppo_ep: 1|act_loss: 0.0092010498046875|cri_loss: 0.008026123046875|unsuper_loss: 0.0
+average reward score: 5.48828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.73%) |Training time=0.45s (20.64%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.35
+epoch: 0|step: 291|ppo_ep: 1|act_loss: 0.030975341796875|cri_loss: 0.016845703125|unsuper_loss: 0.0
+average reward score: 5.55859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.66%) |Training time=0.45s (20.78%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.35
+epoch: 0|step: 292|ppo_ep: 1|act_loss: 0.0404052734375|cri_loss: 0.0223846435546875|unsuper_loss: 0.0
+average reward score: 5.3828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.07%) |Training time=0.44s (20.30%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.35
+epoch: 0|step: 293|ppo_ep: 1|act_loss: -0.0026226043701171875|cri_loss: 0.0005626678466796875|unsuper_loss: 0.0
+average reward score: 4.88671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.89%) |Training time=0.45s (20.56%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.35
+epoch: 0|step: 294|ppo_ep: 1|act_loss: 0.1007080078125|cri_loss: 0.056976318359375|unsuper_loss: 0.0
+average reward score: 5.7421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.53%) |Training time=0.45s (20.84%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.35
+epoch: 0|step: 295|ppo_ep: 1|act_loss: -0.01436614990234375|cri_loss: -0.00609588623046875|unsuper_loss: 0.0
+average reward score: 5.13671875
+-------------------------------------------------------------------------------------
+|E2E latency=3.40s |Gather latency=0.00s (0.00%) |Generate time=1.62s (47.50%) |Training time=0.45s (13.18%) |Others=1.34 (39.33%)|CurSamplesPerSec=9.40 |AvgSamplesPerSec=14.33
+epoch: 0|step: 296|ppo_ep: 1|act_loss: -0.000537872314453125|cri_loss: 0.003475189208984375|unsuper_loss: 0.0
+average reward score: 4.97265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.95%) |Training time=0.39s (18.28%) |Others=0.10 (4.77%)|CurSamplesPerSec=15.12 |AvgSamplesPerSec=14.33
+epoch: 0|step: 297|ppo_ep: 1|act_loss: -0.042449951171875|cri_loss: -0.017486572265625|unsuper_loss: 0.0
+average reward score: 5.71484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.76%) |Training time=0.45s (20.70%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.33
+epoch: 0|step: 298|ppo_ep: 1|act_loss: -0.001461029052734375|cri_loss: 0.00197601318359375|unsuper_loss: 0.0
+average reward score: 6.12109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.55%) |Training time=0.45s (20.81%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.33
+[2023-04-14 08:58:31,449] [INFO] [logging.py:96:log_dist] [Rank 0] step=300, skipped=6, lr=[9.636547955194047e-06, 9.636547955194047e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 08:58:31,467] [INFO] [timer.py:199:stop] epoch=0/micro_step=300/global_step=300, RunningAvgSamplesPerSec=101.42191353477254, CurrSamplesPerSec=116.11825887144846, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 08:58:31,561] [INFO] [logging.py:96:log_dist] [Rank 0] step=300, skipped=5, lr=[4.9929580218679195e-06, 4.9929580218679195e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 299|ppo_ep: 1|act_loss: -0.037841796875|cri_loss: -0.017974853515625|unsuper_loss: 0.0
+average reward score: 6.19140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.10%) |Training time=0.44s (20.32%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.34
+epoch: 0|step: 300|ppo_ep: 1|act_loss: 0.1055908203125|cri_loss: 0.0635986328125|unsuper_loss: 0.0
+average reward score: 6.1328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.65%) |Training time=0.43s (19.69%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.34
+epoch: 0|step: 301|ppo_ep: 1|act_loss: 0.01062774658203125|cri_loss: 0.00878143310546875|unsuper_loss: 0.0
+average reward score: 4.91796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.96%) |Training time=0.44s (20.39%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
+epoch: 0|step: 302|ppo_ep: 1|act_loss: 0.02557373046875|cri_loss: 0.0182647705078125|unsuper_loss: 0.0
+average reward score: 5.859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.99%) |Training time=0.44s (20.36%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.34
+epoch: 0|step: 303|ppo_ep: 1|act_loss: -0.0107421875|cri_loss: -0.004383087158203125|unsuper_loss: 0.0
+average reward score: 5.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.17%) |Training time=0.43s (18.60%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.34
+epoch: 0|step: 304|ppo_ep: 1|act_loss: -0.0123443603515625|cri_loss: -0.00382232666015625|unsuper_loss: 0.0
+average reward score: 5.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.97%) |Training time=0.44s (20.37%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.34
+epoch: 0|step: 305|ppo_ep: 1|act_loss: -0.01494598388671875|cri_loss: -0.0056304931640625|unsuper_loss: 0.0
+average reward score: 5.9453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.88%) |Training time=0.44s (20.49%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.34
+epoch: 0|step: 306|ppo_ep: 1|act_loss: -0.00437164306640625|cri_loss: -0.0005321502685546875|unsuper_loss: 0.0
+average reward score: 6.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.71%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34
+epoch: 0|step: 307|ppo_ep: 1|act_loss: -0.009765625|cri_loss: -0.004009246826171875|unsuper_loss: 0.0
+average reward score: 5.921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.28%) |Training time=0.44s (20.17%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.34
+epoch: 0|step: 308|ppo_ep: 1|act_loss: 0.039825439453125|cri_loss: 0.025970458984375|unsuper_loss: 0.0
+average reward score: 5.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.18%) |Training time=0.43s (20.05%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.34
+[2023-04-14 08:58:53,301] [INFO] [logging.py:96:log_dist] [Rank 0] step=310, skipped=6, lr=[9.635126135262344e-06, 9.635126135262344e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 08:58:53,319] [INFO] [timer.py:199:stop] epoch=0/micro_step=310/global_step=310, RunningAvgSamplesPerSec=101.82524227057964, CurrSamplesPerSec=112.93795796099022, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 08:58:53,412] [INFO] [logging.py:96:log_dist] [Rank 0] step=310, skipped=5, lr=[4.9922176329224145e-06, 4.9922176329224145e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 309|ppo_ep: 1|act_loss: -0.0081024169921875|cri_loss: -0.00330352783203125|unsuper_loss: 0.0
+average reward score: 5.76953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.77%) |Training time=0.45s (20.61%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.35
+[2023-04-14 08:58:55,462] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096
+epoch: 0|step: 310|ppo_ep: 1|act_loss: 0.0209503173828125|cri_loss: 0.0167999267578125|unsuper_loss: 0.0
+average reward score: 5.25
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.57%) |Training time=0.42s (19.76%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.35
+epoch: 0|step: 311|ppo_ep: 1|act_loss: 0.05865478515625|cri_loss: 0.036773681640625|unsuper_loss: 0.0
+average reward score: 6.4453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.01%) |Training time=0.44s (20.45%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.35
+epoch: 0|step: 312|ppo_ep: 1|act_loss: -0.0080413818359375|cri_loss: -0.001312255859375|unsuper_loss: 0.0
+average reward score: 6.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.67s (75.84%) |Training time=0.43s (19.60%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.35
+epoch: 0|step: 313|ppo_ep: 1|act_loss: 0.00653076171875|cri_loss: 0.0057525634765625|unsuper_loss: 0.0
+average reward score: 7.109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.73s (76.11%) |Training time=0.44s (19.49%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.07 |AvgSamplesPerSec=14.35
+epoch: 0|step: 314|ppo_ep: 1|act_loss: 0.0157012939453125|cri_loss: 0.0095367431640625|unsuper_loss: 0.0
+average reward score: 6.78515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.07%) |Training time=0.44s (20.28%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.35
+epoch: 0|step: 315|ppo_ep: 1|act_loss: -0.037750244140625|cri_loss: -0.016204833984375|unsuper_loss: 0.0
+average reward score: 6.21875
+-------------------------------------------------------------------------------------
+|E2E latency=2.49s |Gather latency=0.00s (0.00%) |Generate time=1.64s (66.05%) |Training time=0.43s (17.14%) |Others=0.42 (16.82%)|CurSamplesPerSec=12.85 |AvgSamplesPerSec=14.34
+epoch: 0|step: 316|ppo_ep: 1|act_loss: 0.03277587890625|cri_loss: 0.0176544189453125|unsuper_loss: 0.0
+average reward score: 5.11328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.60%) |Training time=0.43s (19.71%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.35
+epoch: 0|step: 317|ppo_ep: 1|act_loss: -0.0408935546875|cri_loss: -0.0183868408203125|unsuper_loss: 0.0
+average reward score: 5.609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.78%) |Training time=0.45s (20.72%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.35
+epoch: 0|step: 318|ppo_ep: 1|act_loss: 0.0999755859375|cri_loss: 0.059661865234375|unsuper_loss: 0.0
+average reward score: 6.859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.09%) |Training time=0.46s (19.62%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.34
+[2023-04-14 08:59:15,615] [INFO] [logging.py:96:log_dist] [Rank 0] step=320, skipped=7, lr=[9.63378553719082e-06, 9.63378553719082e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 08:59:15,633] [INFO] [timer.py:199:stop] epoch=0/micro_step=320/global_step=320, RunningAvgSamplesPerSec=102.20252850967381, CurrSamplesPerSec=109.19005671926405, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 08:59:15,725] [INFO] [logging.py:96:log_dist] [Rank 0] step=320, skipped=5, lr=[4.991440303299444e-06, 4.991440303299444e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 319|ppo_ep: 1|act_loss: 0.01959228515625|cri_loss: 0.01177978515625|unsuper_loss: 0.0
+average reward score: 5.95703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.51%) |Training time=0.46s (20.98%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.35
+epoch: 0|step: 320|ppo_ep: 1|act_loss: -0.0213623046875|cri_loss: -0.009521484375|unsuper_loss: 0.0
+average reward score: 6.09765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.15%) |Training time=0.46s (21.24%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.35
+epoch: 0|step: 321|ppo_ep: 1|act_loss: 0.0077362060546875|cri_loss: 0.006229400634765625|unsuper_loss: 0.0
+average reward score: 6.9609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.45s |Gather latency=0.00s (0.00%) |Generate time=1.62s (66.12%) |Training time=0.46s (18.88%) |Others=0.37 (15.00%)|CurSamplesPerSec=13.07 |AvgSamplesPerSec=14.34
+epoch: 0|step: 322|ppo_ep: 1|act_loss: 0.031463623046875|cri_loss: 0.0181884765625|unsuper_loss: 0.0
+average reward score: 5.71484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.11%) |Training time=0.46s (21.28%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34
+epoch: 0|step: 323|ppo_ep: 1|act_loss: -0.0440673828125|cri_loss: -0.02008056640625|unsuper_loss: 0.0
+average reward score: 6.68359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.13%) |Training time=0.46s (21.25%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.34
+epoch: 0|step: 324|ppo_ep: 1|act_loss: -0.05035400390625|cri_loss: -0.023223876953125|unsuper_loss: 0.0
+average reward score: 5.9375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.15%) |Training time=0.46s (21.23%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.35
+epoch: 0|step: 325|ppo_ep: 1|act_loss: 0.00405120849609375|cri_loss: 0.004222869873046875|unsuper_loss: 0.0
+average reward score: 5.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.21%) |Training time=0.46s (21.28%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.35
+epoch: 0|step: 326|ppo_ep: 1|act_loss: -0.024139404296875|cri_loss: -0.00982666015625|unsuper_loss: 0.0
+average reward score: 6.15234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.41%) |Training time=0.45s (20.94%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.35
+epoch: 0|step: 327|ppo_ep: 1|act_loss: 0.01953125|cri_loss: 0.0111846923828125|unsuper_loss: 0.0
+average reward score: 5.8515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.77s |Gather latency=0.00s (0.00%) |Generate time=1.62s (58.42%) |Training time=0.47s (16.78%) |Others=0.69 (24.80%)|CurSamplesPerSec=11.55 |AvgSamplesPerSec=14.34
+epoch: 0|step: 328|ppo_ep: 1|act_loss: 0.0179443359375|cri_loss: 0.0124053955078125|unsuper_loss: 0.0
+average reward score: 5.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.81%) |Training time=0.45s (20.57%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.34
+[2023-04-14 08:59:38,260] [INFO] [logging.py:96:log_dist] [Rank 0] step=330, skipped=7, lr=[9.632228270572594e-06, 9.632228270572594e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 08:59:38,278] [INFO] [timer.py:199:stop] epoch=0/micro_step=330/global_step=330, RunningAvgSamplesPerSec=102.38817781321211, CurrSamplesPerSec=120.18967042650056, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 08:59:38,371] [INFO] [logging.py:96:log_dist] [Rank 0] step=330, skipped=5, lr=[4.990626044520905e-06, 4.990626044520905e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 329|ppo_ep: 1|act_loss: -0.00534820556640625|cri_loss: 0.001178741455078125|unsuper_loss: 0.0
+average reward score: 5.6953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.66%) |Training time=0.43s (19.71%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.34
+epoch: 0|step: 330|ppo_ep: 1|act_loss: -0.015716552734375|cri_loss: -0.00428009033203125|unsuper_loss: 0.0
+average reward score: 6.42578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.70%) |Training time=0.43s (19.65%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34
+epoch: 0|step: 331|ppo_ep: 1|act_loss: -0.000396728515625|cri_loss: 0.0016536712646484375|unsuper_loss: 0.0
+average reward score: 6.6640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.11%) |Training time=0.44s (20.35%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
+epoch: 0|step: 332|ppo_ep: 1|act_loss: 0.01067352294921875|cri_loss: 0.01187896728515625|unsuper_loss: 0.0
+average reward score: 6.078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.03%) |Training time=0.44s (20.34%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
+epoch: 0|step: 333|ppo_ep: 1|act_loss: -0.0230712890625|cri_loss: -0.009796142578125|unsuper_loss: 0.0
+average reward score: 5.37109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.59s |Gather latency=0.00s (0.00%) |Generate time=1.80s (69.53%) |Training time=0.43s (16.75%) |Others=0.35 (13.72%)|CurSamplesPerSec=12.38 |AvgSamplesPerSec=14.34
+epoch: 0|step: 334|ppo_ep: 1|act_loss: 0.0168609619140625|cri_loss: 0.0110626220703125|unsuper_loss: 0.0
+average reward score: 6.1953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.68%) |Training time=0.45s (20.69%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.34
+epoch: 0|step: 335|ppo_ep: 1|act_loss: 0.01114654541015625|cri_loss: 0.00699615478515625|unsuper_loss: 0.0
+average reward score: 5.6484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.79%) |Training time=0.45s (20.67%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34
+epoch: 0|step: 336|ppo_ep: 1|act_loss: 0.06365966796875|cri_loss: 0.03387451171875|unsuper_loss: 0.0
+average reward score: 5.921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.95%) |Training time=0.44s (20.42%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
+epoch: 0|step: 337|ppo_ep: 1|act_loss: -0.011566162109375|cri_loss: -0.00455474853515625|unsuper_loss: 0.0
+average reward score: 6.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.64%) |Training time=0.45s (20.70%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34
+epoch: 0|step: 338|ppo_ep: 1|act_loss: 0.023193359375|cri_loss: 0.01505279541015625|unsuper_loss: 0.0
+average reward score: 6.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.61%) |Training time=0.45s (20.75%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
+[2023-04-14 09:00:00,381] [INFO] [logging.py:96:log_dist] [Rank 0] step=340, skipped=7, lr=[9.630599749234592e-06, 9.630599749234592e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:00:01,026] [INFO] [timer.py:199:stop] epoch=0/micro_step=340/global_step=340, RunningAvgSamplesPerSec=102.09652405965743, CurrSamplesPerSec=35.20821465465094, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:00:01,119] [INFO] [logging.py:96:log_dist] [Rank 0] step=340, skipped=5, lr=[4.989774868656078e-06, 4.989774868656078e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 339|ppo_ep: 1|act_loss: -0.022247314453125|cri_loss: -0.0097503662109375|unsuper_loss: 0.0
+average reward score: 8.203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.80s |Gather latency=0.00s (0.00%) |Generate time=1.63s (58.17%) |Training time=1.07s (38.32%) |Others=0.10 (3.51%)|CurSamplesPerSec=11.44 |AvgSamplesPerSec=14.33
+epoch: 0|step: 340|ppo_ep: 1|act_loss: -0.02862548828125|cri_loss: -0.01319122314453125|unsuper_loss: 0.0
+average reward score: 6.9453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.07%) |Training time=0.45s (20.63%) |Others=0.12 (5.30%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.33
+epoch: 0|step: 341|ppo_ep: 1|act_loss: -0.003849029541015625|cri_loss: -0.000995635986328125|unsuper_loss: 0.0
+average reward score: 6.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.73s (76.32%) |Training time=0.44s (19.25%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.33
+epoch: 0|step: 342|ppo_ep: 1|act_loss: 0.01129913330078125|cri_loss: 0.007045745849609375|unsuper_loss: 0.0
+average reward score: 6.6328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.58%) |Training time=0.45s (20.79%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.33
+epoch: 0|step: 343|ppo_ep: 1|act_loss: -0.0092620849609375|cri_loss: -0.00258636474609375|unsuper_loss: 0.0
+average reward score: 5.98046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.92%) |Training time=0.45s (20.53%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.33
+epoch: 0|step: 344|ppo_ep: 1|act_loss: -0.00565338134765625|cri_loss: -0.0018253326416015625|unsuper_loss: 0.0
+average reward score: 5.8125
+-------------------------------------------------------------------------------------
+|E2E latency=2.82s |Gather latency=0.00s (0.00%) |Generate time=1.64s (58.16%) |Training time=0.43s (15.11%) |Others=0.75 (26.73%)|CurSamplesPerSec=11.36 |AvgSamplesPerSec=14.32
+epoch: 0|step: 345|ppo_ep: 1|act_loss: -0.01235198974609375|cri_loss: -0.005222320556640625|unsuper_loss: 0.0
+average reward score: 5.75
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.88%) |Training time=0.45s (20.58%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.32
+epoch: 0|step: 346|ppo_ep: 1|act_loss: 0.03094482421875|cri_loss: 0.0184478759765625|unsuper_loss: 0.0
+average reward score: 6.375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.01%) |Training time=0.45s (20.39%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.32
+epoch: 0|step: 347|ppo_ep: 1|act_loss: 0.0733642578125|cri_loss: 0.044921875|unsuper_loss: 0.0
+average reward score: 5.9453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.65%) |Training time=0.45s (19.14%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.32
+epoch: 0|step: 348|ppo_ep: 1|act_loss: 0.010406494140625|cri_loss: 0.00753021240234375|unsuper_loss: 0.0
+average reward score: 5.79296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.73%) |Training time=0.45s (20.61%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.32
+[2023-04-14 09:00:23,695] [INFO] [logging.py:96:log_dist] [Rank 0] step=350, skipped=7, lr=[9.628899997315426e-06, 9.628899997315426e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:00:23,713] [INFO] [timer.py:199:stop] epoch=0/micro_step=350/global_step=350, RunningAvgSamplesPerSec=102.38370118482274, CurrSamplesPerSec=111.07989661499347, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:00:23,806] [INFO] [logging.py:96:log_dist] [Rank 0] step=350, skipped=5, lr=[4.988886788321443e-06, 4.988886788321443e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 349|ppo_ep: 1|act_loss: -0.00733184814453125|cri_loss: -0.0023441314697265625|unsuper_loss: 0.0
+average reward score: 6.18359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.80%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.32
+epoch: 0|step: 350|ppo_ep: 1|act_loss: 0.00539398193359375|cri_loss: 0.00363922119140625|unsuper_loss: 0.0
+average reward score: 6.51953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.39%) |Training time=0.43s (19.99%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33
+epoch: 0|step: 351|ppo_ep: 1|act_loss: 0.02569580078125|cri_loss: 0.01378631591796875|unsuper_loss: 0.0
+average reward score: 5.9296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.58%) |Training time=0.43s (19.88%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.33
+epoch: 0|step: 352|ppo_ep: 1|act_loss: 0.0211639404296875|cri_loss: 0.0124969482421875|unsuper_loss: 0.0
+average reward score: 6.20703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.73%) |Training time=0.43s (19.62%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.33
+epoch: 0|step: 353|ppo_ep: 1|act_loss: 0.0112762451171875|cri_loss: 0.006603240966796875|unsuper_loss: 0.0
+average reward score: 6.05078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.74%) |Training time=0.43s (19.70%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.33
+epoch: 0|step: 354|ppo_ep: 1|act_loss: -0.017547607421875|cri_loss: -0.008026123046875|unsuper_loss: 0.0
+average reward score: 6.3203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.14%) |Training time=0.44s (20.21%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.33
+epoch: 0|step: 355|ppo_ep: 1|act_loss: 0.001312255859375|cri_loss: 0.00237274169921875|unsuper_loss: 0.0
+average reward score: 5.6015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.88%) |Training time=0.45s (20.58%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.33
+epoch: 0|step: 356|ppo_ep: 1|act_loss: -0.03271484375|cri_loss: -0.01531982421875|unsuper_loss: 0.0
+average reward score: 6.9765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.00%) |Training time=0.44s (20.33%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.33
+epoch: 0|step: 357|ppo_ep: 1|act_loss: -0.007843017578125|cri_loss: -0.0031490325927734375|unsuper_loss: 0.0
+average reward score: 6.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.61%) |Training time=0.45s (20.70%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.33
+epoch: 0|step: 358|ppo_ep: 1|act_loss: -0.00775909423828125|cri_loss: -0.003040313720703125|unsuper_loss: 0.0
+average reward score: 5.66015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.67s |Gather latency=0.00s (0.00%) |Generate time=1.64s (61.37%) |Training time=0.43s (16.08%) |Others=0.60 (22.54%)|CurSamplesPerSec=11.98 |AvgSamplesPerSec=14.33
+[2023-04-14 09:00:45,855] [INFO] [logging.py:96:log_dist] [Rank 0] step=360, skipped=7, lr=[9.627129040009524e-06, 9.627129040009524e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:00:45,873] [INFO] [timer.py:199:stop] epoch=0/micro_step=360/global_step=360, RunningAvgSamplesPerSec=102.79024752173669, CurrSamplesPerSec=142.6891849079872, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:00:45,965] [INFO] [logging.py:96:log_dist] [Rank 0] step=360, skipped=5, lr=[4.987961816680493e-06, 4.987961816680493e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 359|ppo_ep: 1|act_loss: -0.0162811279296875|cri_loss: -0.0064697265625|unsuper_loss: 0.0
+average reward score: 6.65625
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.64s (77.15%) |Training time=0.39s (18.20%) |Others=0.10 (4.64%)|CurSamplesPerSec=15.03 |AvgSamplesPerSec=14.33
+epoch: 0|step: 360|ppo_ep: 1|act_loss: -0.03131103515625|cri_loss: -0.01505279541015625|unsuper_loss: 0.0
+average reward score: 6.44921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.68%) |Training time=0.45s (20.68%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33
+epoch: 0|step: 361|ppo_ep: 1|act_loss: 0.02545166015625|cri_loss: 0.0144805908203125|unsuper_loss: 0.0
+average reward score: 5.66796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.23%) |Training time=0.44s (20.24%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33
+epoch: 0|step: 362|ppo_ep: 1|act_loss: 0.07672119140625|cri_loss: 0.042572021484375|unsuper_loss: 0.0
+average reward score: 6.07421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.80s (76.93%) |Training time=0.44s (18.79%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.33
+epoch: 0|step: 363|ppo_ep: 1|act_loss: -0.13330078125|cri_loss: -0.06036376953125|unsuper_loss: 0.0
+average reward score: 6.20703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.54%) |Training time=0.43s (19.82%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.33
+epoch: 0|step: 364|ppo_ep: 1|act_loss: 0.0775146484375|cri_loss: 0.042633056640625|unsuper_loss: 0.0
+average reward score: 5.8046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.44%) |Training time=0.43s (19.94%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.33
+epoch: 0|step: 365|ppo_ep: 1|act_loss: 0.0132904052734375|cri_loss: 0.00804901123046875|unsuper_loss: 0.0
+average reward score: 5.5703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.53s |Gather latency=0.00s (0.00%) |Generate time=1.63s (64.45%) |Training time=0.44s (17.53%) |Others=0.46 (18.02%)|CurSamplesPerSec=12.66 |AvgSamplesPerSec=14.32
+epoch: 0|step: 366|ppo_ep: 1|act_loss: -0.025360107421875|cri_loss: -0.01123809814453125|unsuper_loss: 0.0
+average reward score: 5.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.18%) |Training time=0.44s (20.19%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.33
+epoch: 0|step: 367|ppo_ep: 1|act_loss: -0.018341064453125|cri_loss: -0.00739288330078125|unsuper_loss: 0.0
+average reward score: 5.3359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.64%) |Training time=0.43s (19.82%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.33
+epoch: 0|step: 368|ppo_ep: 1|act_loss: -0.03570556640625|cri_loss: -0.0162506103515625|unsuper_loss: 0.0
+average reward score: 4.9609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.39%) |Training time=0.43s (19.88%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.33
+[2023-04-14 09:01:08,114] [INFO] [logging.py:96:log_dist] [Rank 0] step=370, skipped=7, lr=[9.625286903566743e-06, 9.625286903566743e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:01:08,133] [INFO] [timer.py:199:stop] epoch=0/micro_step=370/global_step=370, RunningAvgSamplesPerSec=103.09254097134992, CurrSamplesPerSec=103.87621324471226, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:01:08,227] [INFO] [logging.py:96:log_dist] [Rank 0] step=370, skipped=5, lr=[4.986999967443538e-06, 4.986999967443538e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 369|ppo_ep: 1|act_loss: 0.037139892578125|cri_loss: 0.02215576171875|unsuper_loss: 0.0
+average reward score: 5.98046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.98%) |Training time=0.47s (21.40%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.54 |AvgSamplesPerSec=14.33
+epoch: 0|step: 370|ppo_ep: 1|act_loss: -0.025787353515625|cri_loss: -0.0121307373046875|unsuper_loss: 0.0
+average reward score: 5.40625
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.80%) |Training time=0.45s (19.78%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.10 |AvgSamplesPerSec=14.33
+epoch: 0|step: 371|ppo_ep: 1|act_loss: 0.15869140625|cri_loss: 0.1014404296875|unsuper_loss: 0.0
+average reward score: 6.26171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.71%) |Training time=0.45s (20.76%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33
+epoch: 0|step: 372|ppo_ep: 1|act_loss: 0.028961181640625|cri_loss: 0.0159454345703125|unsuper_loss: 0.0
+average reward score: 5.92578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.55%) |Training time=0.46s (20.89%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.33
+epoch: 0|step: 373|ppo_ep: 1|act_loss: -0.00873565673828125|cri_loss: -0.00383758544921875|unsuper_loss: 0.0
+average reward score: 5.73046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.99%) |Training time=0.44s (20.43%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.33
+epoch: 0|step: 374|ppo_ep: 1|act_loss: 0.05352783203125|cri_loss: 0.029754638671875|unsuper_loss: 0.0
+average reward score: 6.60546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.83%) |Training time=0.42s (19.45%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.33
+epoch: 0|step: 375|ppo_ep: 1|act_loss: -0.0106658935546875|cri_loss: -0.004302978515625|unsuper_loss: 0.0
+average reward score: 5.89453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.77%) |Training time=0.45s (20.61%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.33
+epoch: 0|step: 376|ppo_ep: 1|act_loss: -0.08251953125|cri_loss: -0.03900146484375|unsuper_loss: 0.0
+average reward score: 5.85546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.46s |Gather latency=0.00s (0.00%) |Generate time=1.62s (65.79%) |Training time=0.46s (18.63%) |Others=0.38 (15.58%)|CurSamplesPerSec=13.01 |AvgSamplesPerSec=14.33
+epoch: 0|step: 377|ppo_ep: 1|act_loss: 0.0123748779296875|cri_loss: 0.0083465576171875|unsuper_loss: 0.0
+average reward score: 6.8984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.55%) |Training time=0.45s (20.76%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.33
+epoch: 0|step: 378|ppo_ep: 1|act_loss: -0.016845703125|cri_loss: -0.006771087646484375|unsuper_loss: 0.0
+average reward score: 6.5
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.54%) |Training time=0.45s (20.82%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.33
+[2023-04-14 09:01:30,242] [INFO] [logging.py:96:log_dist] [Rank 0] step=380, skipped=7, lr=[9.623373615291988e-06, 9.623373615291988e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:01:30,260] [INFO] [timer.py:199:stop] epoch=0/micro_step=380/global_step=380, RunningAvgSamplesPerSec=103.31409354931576, CurrSamplesPerSec=110.43399930555898, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:01:30,353] [INFO] [logging.py:96:log_dist] [Rank 0] step=380, skipped=5, lr=[4.986001254867505e-06, 4.986001254867505e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 379|ppo_ep: 1|act_loss: 0.00899505615234375|cri_loss: 0.004871368408203125|unsuper_loss: 0.0
+average reward score: 6.72265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.65%) |Training time=0.45s (20.85%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33
+epoch: 0|step: 380|ppo_ep: 1|act_loss: -0.007282257080078125|cri_loss: -0.00290679931640625|unsuper_loss: 0.0
+average reward score: 6.97265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.43%) |Training time=0.46s (20.97%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.33
+epoch: 0|step: 381|ppo_ep: 1|act_loss: -0.0330810546875|cri_loss: -0.0158538818359375|unsuper_loss: 0.0
+average reward score: 7.83203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.45s (20.85%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33
+epoch: 0|step: 382|ppo_ep: 1|act_loss: -0.0264892578125|cri_loss: -0.01214599609375|unsuper_loss: 0.0
+average reward score: 5.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.75%) |Training time=0.45s (20.61%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.34
+epoch: 0|step: 383|ppo_ep: 1|act_loss: -0.029754638671875|cri_loss: -0.01397705078125|unsuper_loss: 0.0
+average reward score: 6.98046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.42%) |Training time=0.43s (20.04%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
+epoch: 0|step: 384|ppo_ep: 1|act_loss: 0.044189453125|cri_loss: 0.0272216796875|unsuper_loss: 0.0
+average reward score: 5.6953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.78%) |Training time=0.45s (20.59%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.34
+epoch: 0|step: 385|ppo_ep: 1|act_loss: 0.024200439453125|cri_loss: 0.0135498046875|unsuper_loss: 0.0
+average reward score: 5.93359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.43%) |Training time=0.45s (20.91%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34
+epoch: 0|step: 386|ppo_ep: 1|act_loss: 0.044403076171875|cri_loss: 0.0243988037109375|unsuper_loss: 0.0
+average reward score: 6.16796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.73%) |Training time=0.45s (20.64%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
+[2023-04-14 09:01:47,714] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 387|ppo_ep: 1|act_loss: 0.048492431640625|cri_loss: 0.026763916015625|unsuper_loss: 0.0
+average reward score: 5.87109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.20%) |Training time=0.45s (20.62%) |Others=0.09 (4.19%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.34
+[2023-04-14 09:01:49,872] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 388|ppo_ep: 1|act_loss: 0.0159149169921875|cri_loss: 0.00897979736328125|unsuper_loss: 0.0
+average reward score: 5.68359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.66%) |Training time=0.43s (20.09%) |Others=0.09 (4.25%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.34
+[2023-04-14 09:01:51,930] [INFO] [logging.py:96:log_dist] [Rank 0] step=390, skipped=7, lr=[9.62138920354481e-06, 9.62138920354481e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:01:51,948] [INFO] [timer.py:199:stop] epoch=0/micro_step=390/global_step=390, RunningAvgSamplesPerSec=103.55384594517648, CurrSamplesPerSec=120.64981208262432, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:01:52,041] [INFO] [logging.py:96:log_dist] [Rank 0] step=390, skipped=7, lr=[4.985175753132026e-06, 4.985175753132026e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 389|ppo_ep: 1|act_loss: -0.0217132568359375|cri_loss: -0.01003265380859375|unsuper_loss: 0.0
+average reward score: 6.18359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.62%) |Training time=0.43s (19.72%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
+epoch: 0|step: 390|ppo_ep: 1|act_loss: 0.026214599609375|cri_loss: 0.0140380859375|unsuper_loss: 0.0
+average reward score: 5.77734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.59%) |Training time=0.45s (20.80%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34
+epoch: 0|step: 391|ppo_ep: 1|act_loss: 0.0062713623046875|cri_loss: 0.00415802001953125|unsuper_loss: 0.0
+average reward score: 5.71484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.42s |Gather latency=0.00s (0.00%) |Generate time=1.63s (67.10%) |Training time=0.44s (18.30%) |Others=0.35 (14.61%)|CurSamplesPerSec=13.21 |AvgSamplesPerSec=14.34
+epoch: 0|step: 392|ppo_ep: 1|act_loss: -0.021392822265625|cri_loss: -0.0094757080078125|unsuper_loss: 0.0
+average reward score: 5.67578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.80s (76.61%) |Training time=0.45s (19.10%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.64 |AvgSamplesPerSec=14.34
+epoch: 0|step: 393|ppo_ep: 1|act_loss: 0.0036029815673828125|cri_loss: 0.0069122314453125|unsuper_loss: 0.0
+average reward score: 5.3828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.56%) |Training time=0.43s (19.89%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.34
+epoch: 0|step: 394|ppo_ep: 1|act_loss: 0.027252197265625|cri_loss: 0.0158843994140625|unsuper_loss: 0.0
+average reward score: 6.08984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.04%) |Training time=0.44s (20.35%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34
+epoch: 0|step: 395|ppo_ep: 1|act_loss: -0.0112457275390625|cri_loss: -0.00496673583984375|unsuper_loss: 0.0
+average reward score: 5.51953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.13%) |Training time=0.44s (20.23%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34
+epoch: 0|step: 396|ppo_ep: 1|act_loss: -0.03106689453125|cri_loss: -0.014862060546875|unsuper_loss: 0.0
+average reward score: 4.6015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.42%) |Training time=0.43s (19.95%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.34
+epoch: 0|step: 397|ppo_ep: 1|act_loss: -0.04022216796875|cri_loss: -0.019287109375|unsuper_loss: 0.0
+average reward score: 5.375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.22%) |Training time=0.44s (20.05%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34
+epoch: 0|step: 398|ppo_ep: 1|act_loss: -0.04376220703125|cri_loss: -0.0200653076171875|unsuper_loss: 0.0
+average reward score: 5.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.98%) |Training time=0.44s (20.24%) |Others=0.11 (4.78%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.34
+[2023-04-14 09:02:14,207] [INFO] [logging.py:96:log_dist] [Rank 0] step=400, skipped=7, lr=[9.619333697738975e-06, 9.619333697738975e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:02:14,225] [INFO] [timer.py:199:stop] epoch=0/micro_step=400/global_step=400, RunningAvgSamplesPerSec=103.79039135515188, CurrSamplesPerSec=104.92432531312882, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:02:14,317] [INFO] [logging.py:96:log_dist] [Rank 0] step=400, skipped=7, lr=[4.984110724217086e-06, 4.984110724217086e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 399|ppo_ep: 1|act_loss: -0.012847900390625|cri_loss: -0.004749298095703125|unsuper_loss: 0.0
+average reward score: 5.34765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.21%) |Training time=0.47s (20.49%) |Others=0.10 (4.30%)|CurSamplesPerSec=14.01 |AvgSamplesPerSec=14.34
+epoch: 0|step: 400|ppo_ep: 1|act_loss: 0.0682373046875|cri_loss: 0.0369873046875|unsuper_loss: 0.0
+average reward score: 5.3828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.58%) |Training time=0.45s (20.79%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34
+epoch: 0|step: 401|ppo_ep: 1|act_loss: 0.024078369140625|cri_loss: 0.012664794921875|unsuper_loss: 0.0
+average reward score: 5.3203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.59s |Gather latency=0.00s (0.00%) |Generate time=1.62s (62.63%) |Training time=0.45s (17.50%) |Others=0.51 (19.86%)|CurSamplesPerSec=12.36 |AvgSamplesPerSec=14.34
+epoch: 0|step: 402|ppo_ep: 1|act_loss: 0.0159912109375|cri_loss: 0.0086822509765625|unsuper_loss: 0.0
+average reward score: 5.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.37%) |Training time=0.46s (21.03%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.34
+epoch: 0|step: 403|ppo_ep: 1|act_loss: 0.021636962890625|cri_loss: 0.0117950439453125|unsuper_loss: 0.0
+average reward score: 5.29296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.52%) |Training time=0.43s (19.83%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.34
+epoch: 0|step: 404|ppo_ep: 1|act_loss: 0.058624267578125|cri_loss: 0.03216552734375|unsuper_loss: 0.0
+average reward score: 5.73828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.61%) |Training time=0.43s (19.75%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.34
+epoch: 0|step: 405|ppo_ep: 1|act_loss: 0.02593994140625|cri_loss: 0.0146026611328125|unsuper_loss: 0.0
+average reward score: 5.6875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.85%) |Training time=0.45s (20.62%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34
+epoch: 0|step: 406|ppo_ep: 1|act_loss: 0.006076812744140625|cri_loss: 0.006511688232421875|unsuper_loss: 0.0
+average reward score: 6.0859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.57%) |Training time=0.45s (20.82%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.34
+epoch: 0|step: 407|ppo_ep: 1|act_loss: -0.01947021484375|cri_loss: -0.00719451904296875|unsuper_loss: 0.0
+average reward score: 5.48046875
+-------------------------------------------------------------------------------------
+|E2E latency=3.09s |Gather latency=0.00s (0.00%) |Generate time=1.78s (57.80%) |Training time=0.45s (14.56%) |Others=0.85 (27.64%)|CurSamplesPerSec=10.36 |AvgSamplesPerSec=14.33
+epoch: 0|step: 408|ppo_ep: 1|act_loss: 0.00424957275390625|cri_loss: 0.0063934326171875|unsuper_loss: 0.0
+average reward score: 5.99609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.50%) |Training time=0.45s (20.87%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.33
+[2023-04-14 09:02:37,266] [INFO] [logging.py:96:log_dist] [Rank 0] step=410, skipped=7, lr=[9.617207128342042e-06, 9.617207128342042e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:02:37,284] [INFO] [timer.py:199:stop] epoch=0/micro_step=410/global_step=410, RunningAvgSamplesPerSec=103.98547538695068, CurrSamplesPerSec=111.4699448786658, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:02:37,377] [INFO] [logging.py:96:log_dist] [Rank 0] step=410, skipped=7, lr=[4.983008874788623e-06, 4.983008874788623e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 409|ppo_ep: 1|act_loss: -0.04351806640625|cri_loss: -0.019500732421875|unsuper_loss: 0.0
+average reward score: 5.24609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.51%) |Training time=0.45s (20.31%) |Others=0.14 (6.18%)|CurSamplesPerSec=14.43 |AvgSamplesPerSec=14.33
+epoch: 0|step: 410|ppo_ep: 1|act_loss: -0.032745361328125|cri_loss: -0.0156402587890625|unsuper_loss: 0.0
+average reward score: 6.37890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.57%) |Training time=0.45s (20.81%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33
+epoch: 0|step: 411|ppo_ep: 1|act_loss: -0.0243377685546875|cri_loss: -0.0116424560546875|unsuper_loss: 0.0
+average reward score: 5.6015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.64%) |Training time=0.45s (20.77%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.33
+epoch: 0|step: 412|ppo_ep: 1|act_loss: 0.04522705078125|cri_loss: 0.0237884521484375|unsuper_loss: 0.0
+average reward score: 5.68359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.64%) |Training time=0.45s (20.70%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.33
+epoch: 0|step: 413|ppo_ep: 1|act_loss: -0.031341552734375|cri_loss: -0.01459503173828125|unsuper_loss: 0.0
+average reward score: 5.93359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.86%) |Training time=0.45s (20.51%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.33
+epoch: 0|step: 414|ppo_ep: 1|act_loss: 0.01171875|cri_loss: 0.0071258544921875|unsuper_loss: 0.0
+average reward score: 5.69140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.72%) |Training time=0.45s (20.67%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.34
+epoch: 0|step: 415|ppo_ep: 1|act_loss: -0.045379638671875|cri_loss: -0.0215301513671875|unsuper_loss: 0.0
+average reward score: 7.15234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.78%) |Training time=0.45s (20.70%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.34
+epoch: 0|step: 416|ppo_ep: 1|act_loss: -0.01280975341796875|cri_loss: -0.0058746337890625|unsuper_loss: 0.0
+average reward score: 6.015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.71%) |Training time=0.45s (20.66%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.34
+epoch: 0|step: 417|ppo_ep: 1|act_loss: -0.0185699462890625|cri_loss: -0.00850677490234375|unsuper_loss: 0.0
+average reward score: 6.12890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.84%) |Training time=0.45s (20.52%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
+epoch: 0|step: 418|ppo_ep: 1|act_loss: 0.0283050537109375|cri_loss: 0.0148162841796875|unsuper_loss: 0.0
+average reward score: 6.3359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.54s |Gather latency=0.00s (0.00%) |Generate time=1.63s (64.15%) |Training time=0.44s (17.27%) |Others=0.47 (18.57%)|CurSamplesPerSec=12.61 |AvgSamplesPerSec=14.33
+[2023-04-14 09:02:59,407] [INFO] [logging.py:96:log_dist] [Rank 0] step=420, skipped=7, lr=[9.615009526874895e-06, 9.615009526874895e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:02:59,425] [INFO] [timer.py:199:stop] epoch=0/micro_step=420/global_step=420, RunningAvgSamplesPerSec=104.16394471702999, CurrSamplesPerSec=110.71603520667838, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:02:59,518] [INFO] [logging.py:96:log_dist] [Rank 0] step=420, skipped=7, lr=[4.981870221178703e-06, 4.981870221178703e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 419|ppo_ep: 1|act_loss: 0.04669189453125|cri_loss: 0.025634765625|unsuper_loss: 0.0
+average reward score: 5.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.55%) |Training time=0.45s (20.82%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33
+epoch: 0|step: 420|ppo_ep: 1|act_loss: 0.01861572265625|cri_loss: 0.0119781494140625|unsuper_loss: 0.0
+average reward score: 5.6953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.79%) |Training time=0.45s (20.54%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
+epoch: 0|step: 421|ppo_ep: 1|act_loss: 0.04266357421875|cri_loss: 0.023651123046875|unsuper_loss: 0.0
+average reward score: 5.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.76%) |Training time=0.45s (20.71%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.34
+epoch: 0|step: 422|ppo_ep: 1|act_loss: -0.026763916015625|cri_loss: -0.01251983642578125|unsuper_loss: 0.0
+average reward score: 6.8046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.66%) |Training time=0.45s (20.65%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34
+epoch: 0|step: 423|ppo_ep: 1|act_loss: -0.02587890625|cri_loss: -0.011993408203125|unsuper_loss: 0.0
+average reward score: 5.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=3.01s |Gather latency=0.00s (0.00%) |Generate time=1.79s (59.41%) |Training time=0.45s (14.82%) |Others=0.78 (25.77%)|CurSamplesPerSec=10.61 |AvgSamplesPerSec=14.33
+epoch: 0|step: 424|ppo_ep: 1|act_loss: 0.0005359649658203125|cri_loss: 0.0005984306335449219|unsuper_loss: 0.0
+average reward score: 6.15234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.89%) |Training time=0.45s (20.58%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.33
+epoch: 0|step: 425|ppo_ep: 1|act_loss: 0.05877685546875|cri_loss: 0.031890869140625|unsuper_loss: 0.0
+average reward score: 6.1171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.81%) |Training time=0.45s (20.67%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.33
+epoch: 0|step: 426|ppo_ep: 1|act_loss: 0.00269317626953125|cri_loss: 0.0024261474609375|unsuper_loss: 0.0
+average reward score: 7.10546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.81%) |Training time=0.46s (20.73%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.51 |AvgSamplesPerSec=14.33
+epoch: 0|step: 427|ppo_ep: 1|act_loss: -0.010650634765625|cri_loss: -0.0042266845703125|unsuper_loss: 0.0
+average reward score: 6.15625
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.66s (73.85%) |Training time=0.46s (20.32%) |Others=0.13 (5.83%)|CurSamplesPerSec=14.25 |AvgSamplesPerSec=14.33
+epoch: 0|step: 428|ppo_ep: 1|act_loss: -0.0057525634765625|cri_loss: -0.0024261474609375|unsuper_loss: 0.0
+average reward score: 7.1171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.80%) |Training time=0.45s (20.66%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.33
+[2023-04-14 09:03:22,100] [INFO] [logging.py:96:log_dist] [Rank 0] step=430, skipped=7, lr=[9.612740925911291e-06, 9.612740925911291e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:03:22,118] [INFO] [timer.py:199:stop] epoch=0/micro_step=430/global_step=430, RunningAvgSamplesPerSec=104.3303442174363, CurrSamplesPerSec=114.00963432544857, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:03:22,211] [INFO] [logging.py:96:log_dist] [Rank 0] step=430, skipped=7, lr=[4.980694780264918e-06, 4.980694780264918e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 429|ppo_ep: 1|act_loss: -0.003879547119140625|cri_loss: -0.0010061264038085938|unsuper_loss: 0.0
+average reward score: 6.046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.17%) |Training time=0.44s (20.33%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.33
+epoch: 0|step: 430|ppo_ep: 1|act_loss: 0.00652313232421875|cri_loss: 0.00467681884765625|unsuper_loss: 0.0
+average reward score: 6.27734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.45s (20.86%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.33
+epoch: 0|step: 431|ppo_ep: 1|act_loss: 0.0018978118896484375|cri_loss: 0.0019092559814453125|unsuper_loss: 0.0
+average reward score: 6.4453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.75%) |Training time=0.45s (20.71%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.33
+epoch: 0|step: 432|ppo_ep: 1|act_loss: 0.007190704345703125|cri_loss: 0.00390625|unsuper_loss: 0.0
+average reward score: 6.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.80%) |Training time=0.45s (20.65%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.33
+epoch: 0|step: 433|ppo_ep: 1|act_loss: 0.003032684326171875|cri_loss: 0.002223968505859375|unsuper_loss: 0.0
+average reward score: 6.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.09%) |Training time=0.44s (20.35%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.33
+epoch: 0|step: 434|ppo_ep: 1|act_loss: -0.058868408203125|cri_loss: -0.0277099609375|unsuper_loss: 0.0
+average reward score: 5.90234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.79%) |Training time=0.45s (20.68%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.33
+epoch: 0|step: 435|ppo_ep: 1|act_loss: -0.022735595703125|cri_loss: -0.01020050048828125|unsuper_loss: 0.0
+average reward score: 6.27734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.41s |Gather latency=0.00s (0.00%) |Generate time=1.62s (67.27%) |Training time=0.45s (18.76%) |Others=0.34 (13.97%)|CurSamplesPerSec=13.27 |AvgSamplesPerSec=14.33
+epoch: 0|step: 436|ppo_ep: 1|act_loss: 0.053619384765625|cri_loss: 0.02984619140625|unsuper_loss: 0.0
+average reward score: 6.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.81%) |Training time=0.45s (20.67%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33
+epoch: 0|step: 437|ppo_ep: 1|act_loss: 0.01629638671875|cri_loss: 0.01190948486328125|unsuper_loss: 0.0
+average reward score: 6.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.74%) |Training time=0.44s (19.03%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.33
+epoch: 0|step: 438|ppo_ep: 1|act_loss: -0.07861328125|cri_loss: -0.0305023193359375|unsuper_loss: 0.0
+average reward score: 6.1796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.73%) |Training time=0.45s (20.73%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33
+[2023-04-14 09:03:44,233] [INFO] [logging.py:96:log_dist] [Rank 0] step=440, skipped=7, lr=[9.61040135907737e-06, 9.61040135907737e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:03:44,251] [INFO] [timer.py:199:stop] epoch=0/micro_step=440/global_step=440, RunningAvgSamplesPerSec=104.48292126996193, CurrSamplesPerSec=108.55886356632661, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:03:44,344] [INFO] [logging.py:96:log_dist] [Rank 0] step=440, skipped=7, lr=[4.97948256947014e-06, 4.97948256947014e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 439|ppo_ep: 1|act_loss: -0.0447998046875|cri_loss: -0.0199737548828125|unsuper_loss: 0.0
+average reward score: 6.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.47%) |Training time=0.46s (21.03%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.33
+epoch: 0|step: 440|ppo_ep: 1|act_loss: 0.052581787109375|cri_loss: 0.0293731689453125|unsuper_loss: 0.0
+average reward score: 5.6640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.96%) |Training time=0.44s (20.49%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.33
+epoch: 0|step: 441|ppo_ep: 1|act_loss: 0.05633544921875|cri_loss: 0.03546142578125|unsuper_loss: 0.0
+average reward score: 5.51953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.06%) |Training time=0.44s (20.41%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.33
+epoch: 0|step: 442|ppo_ep: 1|act_loss: -0.0230560302734375|cri_loss: 0.0008544921875|unsuper_loss: 0.0
+average reward score: 5.3671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.69%) |Training time=0.45s (20.77%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33
+epoch: 0|step: 443|ppo_ep: 1|act_loss: 0.016937255859375|cri_loss: 0.011688232421875|unsuper_loss: 0.0
+average reward score: 6.484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.45s (20.87%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.34
+epoch: 0|step: 444|ppo_ep: 1|act_loss: 0.0214691162109375|cri_loss: 0.01448822021484375|unsuper_loss: 0.0
+average reward score: 4.859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.07%) |Training time=0.44s (20.37%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.34
+epoch: 0|step: 445|ppo_ep: 1|act_loss: -0.0421142578125|cri_loss: -0.016326904296875|unsuper_loss: 0.0
+average reward score: 6.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.77%) |Training time=0.45s (20.70%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34
+epoch: 0|step: 446|ppo_ep: 1|act_loss: 0.120361328125|cri_loss: 0.064453125|unsuper_loss: 0.0
+average reward score: 5.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.28%) |Training time=0.45s (20.36%) |Others=0.14 (6.36%)|CurSamplesPerSec=14.44 |AvgSamplesPerSec=14.34
+epoch: 0|step: 447|ppo_ep: 1|act_loss: -0.0311126708984375|cri_loss: -0.0119476318359375|unsuper_loss: 0.0
+average reward score: 5.62109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.79%) |Training time=0.45s (20.65%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.34
+epoch: 0|step: 448|ppo_ep: 1|act_loss: -0.024383544921875|cri_loss: -0.00986480712890625|unsuper_loss: 0.0
+average reward score: 5.66796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.41%) |Training time=0.43s (20.04%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
+[2023-04-14 09:04:05,991] [INFO] [logging.py:96:log_dist] [Rank 0] step=450, skipped=7, lr=[9.607990861051154e-06, 9.607990861051154e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:04:06,009] [INFO] [timer.py:199:stop] epoch=0/micro_step=450/global_step=450, RunningAvgSamplesPerSec=104.64871045065954, CurrSamplesPerSec=110.18387111339149, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:04:06,104] [INFO] [logging.py:96:log_dist] [Rank 0] step=450, skipped=7, lr=[4.978233606762256e-06, 4.978233606762256e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 449|ppo_ep: 1|act_loss: -0.11602783203125|cri_loss: -0.0455322265625|unsuper_loss: 0.0
+average reward score: 5.38671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.53%) |Training time=0.45s (20.86%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34
+epoch: 0|step: 450|ppo_ep: 1|act_loss: 0.019927978515625|cri_loss: 0.0127716064453125|unsuper_loss: 0.0
+average reward score: 6.1171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.35%) |Training time=0.46s (21.11%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.34
+epoch: 0|step: 451|ppo_ep: 1|act_loss: 0.113037109375|cri_loss: 0.065673828125|unsuper_loss: 0.0
+average reward score: 5.57421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.80%) |Training time=0.45s (20.68%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34
+epoch: 0|step: 452|ppo_ep: 1|act_loss: 0.041015625|cri_loss: 0.0259552001953125|unsuper_loss: 0.0
+average reward score: 6.6640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.80s (76.88%) |Training time=0.44s (18.90%) |Others=0.10 (4.21%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.34
+epoch: 0|step: 453|ppo_ep: 1|act_loss: -0.0408935546875|cri_loss: -0.0171661376953125|unsuper_loss: 0.0
+average reward score: 6.30859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.98%) |Training time=0.45s (20.53%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.34
+epoch: 0|step: 454|ppo_ep: 1|act_loss: -0.0245361328125|cri_loss: -0.01012420654296875|unsuper_loss: 0.0
+average reward score: 5.98046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.83%) |Training time=0.45s (20.65%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34
+epoch: 0|step: 455|ppo_ep: 1|act_loss: 0.012359619140625|cri_loss: 0.01100921630859375|unsuper_loss: 0.0
+average reward score: 6.6953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.75%) |Training time=0.45s (20.72%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.34
+epoch: 0|step: 456|ppo_ep: 1|act_loss: 0.0537109375|cri_loss: 0.0309906005859375|unsuper_loss: 0.0
+average reward score: 6.28515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.13%) |Training time=0.45s (20.41%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.51 |AvgSamplesPerSec=14.34
+epoch: 0|step: 457|ppo_ep: 1|act_loss: 0.038177490234375|cri_loss: 0.026824951171875|unsuper_loss: 0.0
+average reward score: 6.890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.73s (75.83%) |Training time=0.45s (19.71%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.04 |AvgSamplesPerSec=14.34
+epoch: 0|step: 458|ppo_ep: 1|act_loss: 0.062255859375|cri_loss: 0.0347900390625|unsuper_loss: 0.0
+average reward score: 6.5703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.63%) |Training time=0.45s (20.83%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34
+[2023-04-14 09:04:28,063] [INFO] [logging.py:96:log_dist] [Rank 0] step=460, skipped=7, lr=[9.60550946756204e-06, 9.60550946756204e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:04:28,081] [INFO] [timer.py:199:stop] epoch=0/micro_step=460/global_step=460, RunningAvgSamplesPerSec=104.78282677177035, CurrSamplesPerSec=112.42672956817755, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:04:28,174] [INFO] [logging.py:96:log_dist] [Rank 0] step=460, skipped=7, lr=[4.976947910653907e-06, 4.976947910653907e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 459|ppo_ep: 1|act_loss: 0.0418701171875|cri_loss: 0.0247802734375|unsuper_loss: 0.0
+average reward score: 6.96875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.85%) |Training time=0.45s (20.62%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
+epoch: 0|step: 460|ppo_ep: 1|act_loss: -0.0098114013671875|cri_loss: 0.0029449462890625|unsuper_loss: 0.0
+average reward score: 7.3515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.78%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.34
+epoch: 0|step: 461|ppo_ep: 1|act_loss: 0.038818359375|cri_loss: 0.021240234375|unsuper_loss: 0.0
+average reward score: 6.46484375
+-------------------------------------------------------------------------------------
+|E2E latency=3.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (50.91%) |Training time=0.45s (14.09%) |Others=1.12 (35.00%)|CurSamplesPerSec=10.02 |AvgSamplesPerSec=14.33
+epoch: 0|step: 462|ppo_ep: 1|act_loss: -0.0008087158203125|cri_loss: 0.004665374755859375|unsuper_loss: 0.0
+average reward score: 6.25
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.11%) |Training time=0.44s (20.37%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.33
+epoch: 0|step: 463|ppo_ep: 1|act_loss: -0.00408172607421875|cri_loss: 0.0038604736328125|unsuper_loss: 0.0
+average reward score: 6.27734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.30%) |Training time=0.44s (20.13%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.33
+epoch: 0|step: 464|ppo_ep: 1|act_loss: -0.07135009765625|cri_loss: -0.0275421142578125|unsuper_loss: 0.0
+average reward score: 6.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.95%) |Training time=0.45s (20.52%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.33
+epoch: 0|step: 465|ppo_ep: 1|act_loss: 0.051300048828125|cri_loss: 0.0270538330078125|unsuper_loss: 0.0
+average reward score: 5.9921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.74%) |Training time=0.45s (20.72%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.33
+epoch: 0|step: 466|ppo_ep: 1|act_loss: 0.06756591796875|cri_loss: 0.042694091796875|unsuper_loss: 0.0
+average reward score: 6.3125
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.65%) |Training time=0.45s (19.12%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.33
+epoch: 0|step: 467|ppo_ep: 1|act_loss: -0.0213623046875|cri_loss: -0.00701141357421875|unsuper_loss: 0.0
+average reward score: 7.28515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.73%) |Training time=0.45s (20.73%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33
+epoch: 0|step: 468|ppo_ep: 1|act_loss: 0.037811279296875|cri_loss: 0.0226287841796875|unsuper_loss: 0.0
+average reward score: 6.66015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.99%) |Training time=0.44s (20.47%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.33
+[2023-04-14 09:04:50,963] [INFO] [logging.py:96:log_dist] [Rank 0] step=470, skipped=7, lr=[9.602957215390267e-06, 9.602957215390267e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:04:50,981] [INFO] [timer.py:199:stop] epoch=0/micro_step=470/global_step=470, RunningAvgSamplesPerSec=104.94226242948741, CurrSamplesPerSec=112.22189910894872, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:04:51,073] [INFO] [logging.py:96:log_dist] [Rank 0] step=470, skipped=7, lr=[4.9756255002022105e-06, 4.9756255002022105e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 469|ppo_ep: 1|act_loss: 0.03900146484375|cri_loss: 0.0225982666015625|unsuper_loss: 0.0
+average reward score: 6.65234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.87%) |Training time=0.45s (20.62%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34
+epoch: 0|step: 470|ppo_ep: 1|act_loss: -0.027069091796875|cri_loss: -0.00945281982421875|unsuper_loss: 0.0
+average reward score: 5.79296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.29%) |Training time=0.44s (20.18%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.34
+epoch: 0|step: 471|ppo_ep: 1|act_loss: -0.000213623046875|cri_loss: 0.00934600830078125|unsuper_loss: 0.0
+average reward score: 5.99609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.66%) |Training time=0.45s (20.81%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34
+epoch: 0|step: 472|ppo_ep: 1|act_loss: 0.05279541015625|cri_loss: 0.028594970703125|unsuper_loss: 0.0
+average reward score: 6.9375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.19%) |Training time=0.44s (20.25%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34
+epoch: 0|step: 473|ppo_ep: 1|act_loss: -0.17431640625|cri_loss: -0.073974609375|unsuper_loss: 0.0
+average reward score: 5.41015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.70%) |Training time=0.45s (20.79%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.34
+[2023-04-14 09:05:01,823] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, but hysteresis is 2. Reducing hysteresis to 1
+[2023-04-14 09:05:01,909] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
+epoch: 0|step: 474|ppo_ep: 1|act_loss: -0.1414794921875|cri_loss: -0.056488037109375|unsuper_loss: 0.0
+average reward score: 5.6796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.93%) |Training time=0.43s (19.89%) |Others=0.09 (4.18%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.34
+epoch: 0|step: 475|ppo_ep: 1|act_loss: 0.017120361328125|cri_loss: 0.01279449462890625|unsuper_loss: 0.0
+average reward score: 6.078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.76%) |Training time=0.45s (20.71%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.34
+[2023-04-14 09:05:06,139] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096
+epoch: 0|step: 476|ppo_ep: 1|act_loss: -0.20263671875|cri_loss: -0.08056640625|unsuper_loss: 0.0
+average reward score: 4.42578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.71%) |Training time=0.42s (19.72%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.34
+epoch: 0|step: 477|ppo_ep: 1|act_loss: -0.1700439453125|cri_loss: -0.07275390625|unsuper_loss: 0.0
+average reward score: 5.1328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.82%) |Training time=0.45s (20.64%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34
+epoch: 0|step: 478|ppo_ep: 1|act_loss: 0.039215087890625|cri_loss: 0.0303955078125|unsuper_loss: 0.0
+average reward score: 3.51953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.19%) |Training time=0.44s (20.26%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.34
+[2023-04-14 09:05:12,638] [INFO] [logging.py:96:log_dist] [Rank 0] step=480, skipped=9, lr=[9.600864420788175e-06, 9.600864420788175e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:05:12,656] [INFO] [timer.py:199:stop] epoch=0/micro_step=480/global_step=480, RunningAvgSamplesPerSec=105.12317312034696, CurrSamplesPerSec=110.69512469701006, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:05:12,749] [INFO] [logging.py:96:log_dist] [Rank 0] step=480, skipped=8, lr=[4.9744039562213675e-06, 4.9744039562213675e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 479|ppo_ep: 1|act_loss: 0.248291015625|cri_loss: 0.168701171875|unsuper_loss: 0.0
+average reward score: 2.625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.72%) |Training time=0.45s (20.78%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.34
+epoch: 0|step: 480|ppo_ep: 1|act_loss: 0.0872802734375|cri_loss: 0.067626953125|unsuper_loss: 0.0
+average reward score: 2.62109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.40%) |Training time=0.46s (21.10%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.34
+epoch: 0|step: 481|ppo_ep: 1|act_loss: -0.034820556640625|cri_loss: -0.014739990234375|unsuper_loss: 0.0
+average reward score: 3.9765625
+-------------------------------------------------------------------------------------
+|E2E latency=3.11s |Gather latency=0.00s (0.00%) |Generate time=1.80s (57.93%) |Training time=0.45s (14.32%) |Others=0.86 (27.75%)|CurSamplesPerSec=10.28 |AvgSamplesPerSec=14.33
+epoch: 0|step: 482|ppo_ep: 1|act_loss: -0.1807861328125|cri_loss: -0.0804443359375|unsuper_loss: 0.0
+average reward score: 3.55859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.33%) |Training time=0.44s (20.12%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.33
+epoch: 0|step: 483|ppo_ep: 1|act_loss: 0.0008697509765625|cri_loss: 0.007511138916015625|unsuper_loss: 0.0
+average reward score: 4.078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.25%) |Training time=0.44s (20.22%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.33
+epoch: 0|step: 484|ppo_ep: 1|act_loss: 0.034332275390625|cri_loss: 0.033843994140625|unsuper_loss: 0.0
+average reward score: 4.1328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.97%) |Training time=0.44s (20.25%) |Others=0.10 (4.78%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.34
+epoch: 0|step: 485|ppo_ep: 1|act_loss: 0.0885009765625|cri_loss: 0.05035400390625|unsuper_loss: 0.0
+average reward score: 4.46484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.75s (76.50%) |Training time=0.44s (19.20%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.99 |AvgSamplesPerSec=14.33
+epoch: 0|step: 486|ppo_ep: 1|act_loss: 0.040283203125|cri_loss: 0.024200439453125|unsuper_loss: 0.0
+average reward score: 4.3359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.34%) |Training time=0.44s (20.12%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34
+epoch: 0|step: 487|ppo_ep: 1|act_loss: -0.05267333984375|cri_loss: -0.022125244140625|unsuper_loss: 0.0
+average reward score: 4.96484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.34%) |Training time=0.44s (20.12%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.34
+epoch: 0|step: 488|ppo_ep: 1|act_loss: 0.0249786376953125|cri_loss: 0.03216552734375|unsuper_loss: 0.0
+average reward score: 4.9453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.36%) |Training time=0.44s (20.09%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34
+[2023-04-14 09:05:35,437] [INFO] [logging.py:96:log_dist] [Rank 0] step=490, skipped=9, lr=[9.598184719026e-06, 9.598184719026e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:05:35,455] [INFO] [timer.py:199:stop] epoch=0/micro_step=490/global_step=490, RunningAvgSamplesPerSec=105.31313206740428, CurrSamplesPerSec=116.99643739730386, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:05:35,547] [INFO] [logging.py:96:log_dist] [Rank 0] step=490, skipped=8, lr=[4.973011842968471e-06, 4.973011842968471e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 489|ppo_ep: 1|act_loss: -0.0692138671875|cri_loss: -0.027069091796875|unsuper_loss: 0.0
+average reward score: 4.6328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.38%) |Training time=0.44s (20.10%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34
+epoch: 0|step: 490|ppo_ep: 1|act_loss: 0.021575927734375|cri_loss: 0.01507568359375|unsuper_loss: 0.0
+average reward score: 5.07421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.37%) |Training time=0.44s (20.09%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.34
+epoch: 0|step: 491|ppo_ep: 1|act_loss: 0.0997314453125|cri_loss: 0.055267333984375|unsuper_loss: 0.0
+average reward score: 6.04296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.50%) |Training time=0.43s (19.98%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34
+epoch: 0|step: 492|ppo_ep: 1|act_loss: 0.054840087890625|cri_loss: 0.03228759765625|unsuper_loss: 0.0
+average reward score: 5.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.90%) |Training time=0.42s (19.56%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
+epoch: 0|step: 493|ppo_ep: 1|act_loss: 0.181884765625|cri_loss: 0.099365234375|unsuper_loss: 0.0
+average reward score: 4.8984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.31%) |Training time=0.44s (20.14%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
+epoch: 0|step: 494|ppo_ep: 1|act_loss: 0.259765625|cri_loss: 0.1429443359375|unsuper_loss: 0.0
+average reward score: 4.8203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.40%) |Training time=0.44s (20.06%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34
+epoch: 0|step: 495|ppo_ep: 1|act_loss: -0.0028533935546875|cri_loss: 0.007110595703125|unsuper_loss: 0.0
+average reward score: 5.30859375
+-------------------------------------------------------------------------------------
+|E2E latency=3.01s |Gather latency=0.00s (0.00%) |Generate time=1.65s (54.93%) |Training time=0.53s (17.57%) |Others=0.83 (27.49%)|CurSamplesPerSec=10.64 |AvgSamplesPerSec=14.33
+epoch: 0|step: 496|ppo_ep: 1|act_loss: 0.019744873046875|cri_loss: 0.023468017578125|unsuper_loss: 0.0
+average reward score: 4.9453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.93%) |Training time=0.45s (20.52%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33
+epoch: 0|step: 497|ppo_ep: 1|act_loss: -0.0919189453125|cri_loss: -0.03338623046875|unsuper_loss: 0.0
+average reward score: 6.00390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.66%) |Training time=0.45s (20.80%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33
+epoch: 0|step: 498|ppo_ep: 1|act_loss: -0.146728515625|cri_loss: -0.05950927734375|unsuper_loss: 0.0
+average reward score: 4.94921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.63%) |Training time=0.45s (20.84%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.33
+[2023-04-14 09:05:57,994] [INFO] [logging.py:96:log_dist] [Rank 0] step=500, skipped=9, lr=[9.595434267151607e-06, 9.595434267151607e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:05:58,012] [INFO] [timer.py:199:stop] epoch=0/micro_step=500/global_step=500, RunningAvgSamplesPerSec=105.46009089853679, CurrSamplesPerSec=114.55113153375825, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:05:58,105] [INFO] [logging.py:96:log_dist] [Rank 0] step=500, skipped=8, lr=[4.971583073714247e-06, 4.971583073714247e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 499|ppo_ep: 1|act_loss: -0.0750732421875|cri_loss: -0.0311279296875|unsuper_loss: 0.0
+average reward score: 5.1328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.10%) |Training time=0.44s (20.35%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33
+epoch: 0|step: 500|ppo_ep: 1|act_loss: -0.033935546875|cri_loss: -0.014617919921875|unsuper_loss: 0.0
+average reward score: 4.71875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.77%) |Training time=0.45s (20.69%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.34
+epoch: 0|step: 501|ppo_ep: 1|act_loss: 0.173583984375|cri_loss: 0.0955810546875|unsuper_loss: 0.0
+average reward score: 3.822265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.75%) |Training time=0.45s (20.70%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
+epoch: 0|step: 502|ppo_ep: 1|act_loss: 0.10595703125|cri_loss: 0.0550537109375|unsuper_loss: 0.0
+average reward score: 3.6640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.89%) |Training time=0.45s (20.58%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.34
+epoch: 0|step: 503|ppo_ep: 1|act_loss: 0.12017822265625|cri_loss: 0.0670166015625|unsuper_loss: 0.0
+average reward score: 4.44140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.75%) |Training time=0.45s (20.71%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.34
+epoch: 0|step: 504|ppo_ep: 1|act_loss: -0.024993896484375|cri_loss: -0.0099639892578125|unsuper_loss: 0.0
+average reward score: 6.3515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.05%) |Training time=0.44s (20.41%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34
+epoch: 0|step: 505|ppo_ep: 1|act_loss: -0.0570068359375|cri_loss: -0.0201263427734375|unsuper_loss: 0.0
+average reward score: 5.26171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.78%) |Training time=0.45s (20.66%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
+epoch: 0|step: 506|ppo_ep: 1|act_loss: 0.01320648193359375|cri_loss: 0.00848388671875|unsuper_loss: 0.0
+average reward score: 5.80859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.74%) |Training time=0.45s (20.70%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
+epoch: 0|step: 507|ppo_ep: 1|act_loss: 0.11767578125|cri_loss: 0.06378173828125|unsuper_loss: 0.0
+average reward score: 5.80078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.16%) |Training time=0.42s (19.30%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.34
+epoch: 0|step: 508|ppo_ep: 1|act_loss: 0.20751953125|cri_loss: 0.11810302734375|unsuper_loss: 0.0
+average reward score: 6.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.82%) |Training time=0.45s (20.63%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
+[2023-04-14 09:06:19,713] [INFO] [logging.py:96:log_dist] [Rank 0] step=510, skipped=9, lr=[9.592613105933331e-06, 9.592613105933331e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:06:19,731] [INFO] [timer.py:199:stop] epoch=0/micro_step=510/global_step=510, RunningAvgSamplesPerSec=105.59803688950436, CurrSamplesPerSec=110.03175743087647, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:06:19,824] [INFO] [logging.py:96:log_dist] [Rank 0] step=510, skipped=8, lr=[4.970117669636501e-06, 4.970117669636501e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 509|ppo_ep: 1|act_loss: 0.1334228515625|cri_loss: 0.0731201171875|unsuper_loss: 0.0
+average reward score: 5.1796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.56%) |Training time=0.45s (20.92%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
+epoch: 0|step: 510|ppo_ep: 1|act_loss: 0.051361083984375|cri_loss: 0.03314208984375|unsuper_loss: 0.0
+average reward score: 6.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.64%) |Training time=0.45s (19.15%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.34
+epoch: 0|step: 511|ppo_ep: 1|act_loss: 0.033843994140625|cri_loss: 0.02349853515625|unsuper_loss: 0.0
+average reward score: 6.17578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.99%) |Training time=0.44s (20.46%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.34
+epoch: 0|step: 512|ppo_ep: 1|act_loss: -0.001708984375|cri_loss: 0.003505706787109375|unsuper_loss: 0.0
+average reward score: 5.96484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.53%) |Training time=0.45s (20.92%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34
+epoch: 0|step: 513|ppo_ep: 1|act_loss: 0.069091796875|cri_loss: 0.04083251953125|unsuper_loss: 0.0
+average reward score: 5.02734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.86%) |Training time=0.45s (20.61%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.34
+epoch: 0|step: 514|ppo_ep: 1|act_loss: 0.059661865234375|cri_loss: 0.032440185546875|unsuper_loss: 0.0
+average reward score: 4.9375
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.66s (73.99%) |Training time=0.49s (21.61%) |Others=0.10 (4.40%)|CurSamplesPerSec=14.25 |AvgSamplesPerSec=14.34
+epoch: 0|step: 515|ppo_ep: 1|act_loss: 0.04168701171875|cri_loss: 0.021697998046875|unsuper_loss: 0.0
+average reward score: 4.65234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.28%) |Training time=0.44s (20.17%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34
+epoch: 0|step: 516|ppo_ep: 1|act_loss: -0.0787353515625|cri_loss: -0.03155517578125|unsuper_loss: 0.0
+average reward score: 4.28125
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.63s (70.07%) |Training time=0.44s (18.84%) |Others=0.26 (11.09%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.34
+epoch: 0|step: 517|ppo_ep: 1|act_loss: -0.0179443359375|cri_loss: -0.00739288330078125|unsuper_loss: 0.0
+average reward score: 4.2734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.09%) |Training time=0.44s (20.33%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34
+epoch: 0|step: 518|ppo_ep: 1|act_loss: -0.05718994140625|cri_loss: -0.0237884521484375|unsuper_loss: 0.0
+average reward score: 4.625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.80%) |Training time=0.45s (20.66%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.34
+[2023-04-14 09:06:41,836] [INFO] [logging.py:96:log_dist] [Rank 0] step=520, skipped=9, lr=[9.589721277187583e-06, 9.589721277187583e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:06:41,854] [INFO] [timer.py:199:stop] epoch=0/micro_step=520/global_step=520, RunningAvgSamplesPerSec=105.71696360506587, CurrSamplesPerSec=113.11680590894035, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:06:41,947] [INFO] [logging.py:96:log_dist] [Rank 0] step=520, skipped=8, lr=[4.968615652456056e-06, 4.968615652456056e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 519|ppo_ep: 1|act_loss: -0.0887451171875|cri_loss: -0.041229248046875|unsuper_loss: 0.0
+average reward score: 4.7265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.98%) |Training time=0.45s (20.50%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.34
+epoch: 0|step: 520|ppo_ep: 1|act_loss: -0.027496337890625|cri_loss: -0.00603485107421875|unsuper_loss: 0.0
+average reward score: 4.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.76%) |Training time=0.45s (20.69%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.35
+epoch: 0|step: 521|ppo_ep: 1|act_loss: -0.048919677734375|cri_loss: -0.023101806640625|unsuper_loss: 0.0
+average reward score: 4.20703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.02%) |Training time=0.44s (20.44%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.35
+epoch: 0|step: 522|ppo_ep: 1|act_loss: 0.08306884765625|cri_loss: 0.04388427734375|unsuper_loss: 0.0
+average reward score: 4.59375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.75%) |Training time=0.43s (19.69%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.35
+epoch: 0|step: 523|ppo_ep: 1|act_loss: -0.028228759765625|cri_loss: -0.0107574462890625|unsuper_loss: 0.0
+average reward score: 5.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.22%) |Training time=0.44s (20.22%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.35
+epoch: 0|step: 524|ppo_ep: 1|act_loss: -0.03082275390625|cri_loss: -0.01323699951171875|unsuper_loss: 0.0
+average reward score: 5.1328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.44%) |Training time=0.46s (21.03%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.35
+epoch: 0|step: 525|ppo_ep: 1|act_loss: -0.05889892578125|cri_loss: -0.026611328125|unsuper_loss: 0.0
+average reward score: 5.53515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.84%) |Training time=0.44s (18.93%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.70 |AvgSamplesPerSec=14.35
+epoch: 0|step: 526|ppo_ep: 1|act_loss: 0.016693115234375|cri_loss: 0.01453399658203125|unsuper_loss: 0.0
+average reward score: 5.62890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.88%) |Training time=0.45s (20.57%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.35
+epoch: 0|step: 527|ppo_ep: 1|act_loss: 0.04449462890625|cri_loss: 0.025054931640625|unsuper_loss: 0.0
+average reward score: 6.07421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.64%) |Training time=0.45s (20.80%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.35
+epoch: 0|step: 528|ppo_ep: 1|act_loss: 0.0195465087890625|cri_loss: 0.01230621337890625|unsuper_loss: 0.0
+average reward score: 7.73046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.85%) |Training time=0.45s (20.59%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.35
+[2023-04-14 09:07:03,705] [INFO] [logging.py:96:log_dist] [Rank 0] step=530, skipped=9, lr=[9.586758823778245e-06, 9.586758823778245e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:07:03,723] [INFO] [timer.py:199:stop] epoch=0/micro_step=530/global_step=530, RunningAvgSamplesPerSec=105.85254516677392, CurrSamplesPerSec=112.33056644956756, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:07:03,816] [INFO] [logging.py:96:log_dist] [Rank 0] step=530, skipped=8, lr=[4.96707704443643e-06, 4.96707704443643e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 529|ppo_ep: 1|act_loss: 0.120849609375|cri_loss: 0.06866455078125|unsuper_loss: 0.0
+average reward score: 6.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.83%) |Training time=0.45s (20.64%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.35
+epoch: 0|step: 530|ppo_ep: 1|act_loss: 0.026214599609375|cri_loss: 0.016265869140625|unsuper_loss: 0.0
+average reward score: 5.6171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.73%) |Training time=0.45s (20.65%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.35
+epoch: 0|step: 531|ppo_ep: 1|act_loss: -0.0165557861328125|cri_loss: -0.006198883056640625|unsuper_loss: 0.0
+average reward score: 5.94921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.62s (71.93%) |Training time=0.46s (20.20%) |Others=0.18 (7.87%)|CurSamplesPerSec=14.20 |AvgSamplesPerSec=14.35
+epoch: 0|step: 532|ppo_ep: 1|act_loss: -0.03509521484375|cri_loss: -0.012542724609375|unsuper_loss: 0.0
+average reward score: 5.18359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.56%) |Training time=0.45s (20.89%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.35
+epoch: 0|step: 533|ppo_ep: 1|act_loss: 0.018951416015625|cri_loss: 0.0110321044921875|unsuper_loss: 0.0
+average reward score: 6.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.88%) |Training time=0.44s (20.55%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.35
+epoch: 0|step: 534|ppo_ep: 1|act_loss: 0.0035495758056640625|cri_loss: 0.0029582977294921875|unsuper_loss: 0.0
+average reward score: 5.83984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.72%) |Training time=0.45s (20.71%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.35
+epoch: 0|step: 535|ppo_ep: 1|act_loss: 0.0374755859375|cri_loss: 0.02105712890625|unsuper_loss: 0.0
+average reward score: 6.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.86%) |Training time=0.45s (20.63%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.35
+epoch: 0|step: 536|ppo_ep: 1|act_loss: -0.029327392578125|cri_loss: -0.01258087158203125|unsuper_loss: 0.0
+average reward score: 6.7578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.68%) |Training time=0.45s (20.76%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.35
+epoch: 0|step: 537|ppo_ep: 1|act_loss: 0.035675048828125|cri_loss: 0.0200042724609375|unsuper_loss: 0.0
+average reward score: 6.4296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.73%) |Training time=0.43s (19.70%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.35
+epoch: 0|step: 538|ppo_ep: 1|act_loss: 0.0178375244140625|cri_loss: 0.010833740234375|unsuper_loss: 0.0
+average reward score: 5.0
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.46%) |Training time=0.44s (20.02%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.36
+[2023-04-14 09:07:25,507] [INFO] [logging.py:96:log_dist] [Rank 0] step=540, skipped=9, lr=[9.583725789616017e-06, 9.583725789616017e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:07:25,525] [INFO] [timer.py:199:stop] epoch=0/micro_step=540/global_step=540, RunningAvgSamplesPerSec=105.97276576876709, CurrSamplesPerSec=111.52968940341991, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:07:25,618] [INFO] [logging.py:96:log_dist] [Rank 0] step=540, skipped=8, lr=[4.965501868383507e-06, 4.965501868383507e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 539|ppo_ep: 1|act_loss: 0.04998779296875|cri_loss: 0.0268096923828125|unsuper_loss: 0.0
+average reward score: 5.23046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.81%) |Training time=0.45s (20.68%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.36
+epoch: 0|step: 540|ppo_ep: 1|act_loss: 0.0016565322875976562|cri_loss: 0.00145721435546875|unsuper_loss: 0.0
+average reward score: 5.8125
+-------------------------------------------------------------------------------------
+|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.69s (73.78%) |Training time=0.50s (21.90%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.98 |AvgSamplesPerSec=14.36
+epoch: 0|step: 541|ppo_ep: 1|act_loss: -0.017913818359375|cri_loss: -0.0053253173828125|unsuper_loss: 0.0
+average reward score: 5.453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.86%) |Training time=0.45s (20.61%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.36
+epoch: 0|step: 542|ppo_ep: 1|act_loss: -0.090087890625|cri_loss: -0.04132080078125|unsuper_loss: 0.0
+average reward score: 6.328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.99%) |Training time=0.45s (20.44%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.36
+epoch: 0|step: 543|ppo_ep: 1|act_loss: -0.0291748046875|cri_loss: -0.0136871337890625|unsuper_loss: 0.0
+average reward score: 5.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.48%) |Training time=0.44s (20.04%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.51 |AvgSamplesPerSec=14.36
+epoch: 0|step: 544|ppo_ep: 1|act_loss: -0.0049285888671875|cri_loss: -0.0016317367553710938|unsuper_loss: 0.0
+average reward score: 5.3046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.73s (75.89%) |Training time=0.45s (19.61%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.07 |AvgSamplesPerSec=14.36
+epoch: 0|step: 545|ppo_ep: 1|act_loss: 0.0345458984375|cri_loss: 0.01959228515625|unsuper_loss: 0.0
+average reward score: 5.33984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.14%) |Training time=0.44s (20.33%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.36
+epoch: 0|step: 546|ppo_ep: 1|act_loss: 0.030914306640625|cri_loss: 0.016998291015625|unsuper_loss: 0.0
+average reward score: 4.8125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.85%) |Training time=0.45s (20.61%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.36
+epoch: 0|step: 547|ppo_ep: 1|act_loss: -0.01776123046875|cri_loss: -0.00774383544921875|unsuper_loss: 0.0
+average reward score: 5.34375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.54%) |Training time=0.45s (20.92%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.36
+epoch: 0|step: 548|ppo_ep: 1|act_loss: -0.08251953125|cri_loss: -0.03662109375|unsuper_loss: 0.0
+average reward score: 5.484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.79%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.36
+[2023-04-14 09:07:47,483] [INFO] [logging.py:96:log_dist] [Rank 0] step=550, skipped=9, lr=[9.58062221965779e-06, 9.58062221965779e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:07:47,501] [INFO] [timer.py:199:stop] epoch=0/micro_step=550/global_step=550, RunningAvgSamplesPerSec=106.10481150577755, CurrSamplesPerSec=113.16315685191665, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:07:47,594] [INFO] [logging.py:96:log_dist] [Rank 0] step=550, skipped=8, lr=[4.963890147645195e-06, 4.963890147645195e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 549|ppo_ep: 1|act_loss: 0.0102081298828125|cri_loss: 0.0094757080078125|unsuper_loss: 0.0
+average reward score: 5.16796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.92%) |Training time=0.45s (20.56%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.36
+epoch: 0|step: 550|ppo_ep: 1|act_loss: 0.00433349609375|cri_loss: 0.0032825469970703125|unsuper_loss: 0.0
+average reward score: 5.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.92%) |Training time=0.45s (20.55%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.36
+epoch: 0|step: 551|ppo_ep: 1|act_loss: 0.11962890625|cri_loss: 0.0660400390625|unsuper_loss: 0.0
+average reward score: 5.328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.50%) |Training time=0.46s (20.97%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.36
+epoch: 0|step: 552|ppo_ep: 1|act_loss: 0.033538818359375|cri_loss: 0.019012451171875|unsuper_loss: 0.0
+average reward score: 5.9921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.53s |Gather latency=0.00s (0.00%) |Generate time=1.64s (64.93%) |Training time=0.43s (16.84%) |Others=0.46 (18.23%)|CurSamplesPerSec=12.66 |AvgSamplesPerSec=14.36
+epoch: 0|step: 553|ppo_ep: 1|act_loss: -0.00921630859375|cri_loss: -0.0028324127197265625|unsuper_loss: 0.0
+average reward score: 5.51171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.33%) |Training time=0.44s (20.09%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.36
+epoch: 0|step: 554|ppo_ep: 1|act_loss: -0.01349639892578125|cri_loss: -0.005279541015625|unsuper_loss: 0.0
+average reward score: 6.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.38%) |Training time=0.46s (21.06%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.36
+epoch: 0|step: 555|ppo_ep: 1|act_loss: -0.00868988037109375|cri_loss: -0.0027103424072265625|unsuper_loss: 0.0
+average reward score: 5.6484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.32%) |Training time=0.45s (19.43%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.36
+epoch: 0|step: 556|ppo_ep: 1|act_loss: -0.038116455078125|cri_loss: -0.01690673828125|unsuper_loss: 0.0
+average reward score: 5.9375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.64%) |Training time=0.45s (20.81%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.36
+epoch: 0|step: 557|ppo_ep: 1|act_loss: -0.014892578125|cri_loss: -0.0054931640625|unsuper_loss: 0.0
+average reward score: 5.88671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.69%) |Training time=0.45s (20.75%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.36
+epoch: 0|step: 558|ppo_ep: 1|act_loss: 0.01316070556640625|cri_loss: 0.0116729736328125|unsuper_loss: 0.0
+average reward score: 5.16796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.86%) |Training time=0.46s (20.84%) |Others=0.12 (5.31%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.36
+[2023-04-14 09:08:09,697] [INFO] [logging.py:96:log_dist] [Rank 0] step=560, skipped=9, lr=[9.577448159905952e-06, 9.577448159905952e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:08:09,715] [INFO] [timer.py:199:stop] epoch=0/micro_step=560/global_step=560, RunningAvgSamplesPerSec=106.20737357610386, CurrSamplesPerSec=110.12176447228941, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:08:09,810] [INFO] [logging.py:96:log_dist] [Rank 0] step=560, skipped=8, lr=[4.962241906111083e-06, 4.962241906111083e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 559|ppo_ep: 1|act_loss: -0.028106689453125|cri_loss: -0.01242828369140625|unsuper_loss: 0.0
+average reward score: 6.11328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.50%) |Training time=0.45s (20.90%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.36
+epoch: 0|step: 560|ppo_ep: 1|act_loss: -0.00113677978515625|cri_loss: -1.621246337890625e-05|unsuper_loss: 0.0
+average reward score: 6.0859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.40%) |Training time=0.46s (21.07%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.36
+epoch: 0|step: 561|ppo_ep: 1|act_loss: 0.0175323486328125|cri_loss: 0.0101165771484375|unsuper_loss: 0.0
+average reward score: 5.63671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.52%) |Training time=0.45s (20.94%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.36
+epoch: 0|step: 562|ppo_ep: 1|act_loss: 0.1058349609375|cri_loss: 0.055267333984375|unsuper_loss: 0.0
+average reward score: 5.10546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.54%) |Training time=0.46s (20.97%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.36
+epoch: 0|step: 563|ppo_ep: 1|act_loss: 0.050018310546875|cri_loss: 0.0263671875|unsuper_loss: 0.0
+average reward score: 5.50390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.36%) |Training time=0.46s (21.10%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.36
+epoch: 0|step: 564|ppo_ep: 1|act_loss: -0.010040283203125|cri_loss: -0.003032684326171875|unsuper_loss: 0.0
+average reward score: 6.1328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.51%) |Training time=0.45s (20.85%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.36
+epoch: 0|step: 565|ppo_ep: 1|act_loss: -0.00855255126953125|cri_loss: -0.003559112548828125|unsuper_loss: 0.0
+average reward score: 5.15625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.33%) |Training time=0.46s (21.13%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.36
+epoch: 0|step: 566|ppo_ep: 1|act_loss: -0.0287017822265625|cri_loss: -0.0123443603515625|unsuper_loss: 0.0
+average reward score: 5.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.45%) |Training time=0.45s (21.00%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.36
+epoch: 0|step: 567|ppo_ep: 1|act_loss: 0.047515869140625|cri_loss: 0.027313232421875|unsuper_loss: 0.0
+average reward score: 5.0
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.33%) |Training time=0.44s (20.13%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.37
+epoch: 0|step: 568|ppo_ep: 1|act_loss: 0.07122802734375|cri_loss: 0.04351806640625|unsuper_loss: 0.0
+average reward score: 5.375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.59%) |Training time=0.43s (19.79%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.37
+[2023-04-14 09:08:31,392] [INFO] [logging.py:96:log_dist] [Rank 0] step=570, skipped=9, lr=[9.574203657407728e-06, 9.574203657407728e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:08:31,410] [INFO] [timer.py:199:stop] epoch=0/micro_step=570/global_step=570, RunningAvgSamplesPerSec=106.29109456024048, CurrSamplesPerSec=111.84186216269592, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:08:31,502] [INFO] [logging.py:96:log_dist] [Rank 0] step=570, skipped=8, lr=[4.960557168212088e-06, 4.960557168212088e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 569|ppo_ep: 1|act_loss: -0.04010009765625|cri_loss: -0.0181732177734375|unsuper_loss: 0.0
+average reward score: 4.74609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.77%) |Training time=0.45s (20.71%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.37
+epoch: 0|step: 570|ppo_ep: 1|act_loss: -0.067138671875|cri_loss: -0.0316162109375|unsuper_loss: 0.0
+average reward score: 5.09765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.81s (77.41%) |Training time=0.43s (18.38%) |Others=0.10 (4.21%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.37
+epoch: 0|step: 571|ppo_ep: 1|act_loss: -0.029998779296875|cri_loss: -0.01392364501953125|unsuper_loss: 0.0
+average reward score: 5.21875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.59%) |Training time=0.45s (20.86%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.37
+epoch: 0|step: 572|ppo_ep: 1|act_loss: -0.065673828125|cri_loss: -0.0294036865234375|unsuper_loss: 0.0
+average reward score: 4.5234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.51%) |Training time=0.46s (21.02%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.49 |AvgSamplesPerSec=14.37
+epoch: 0|step: 573|ppo_ep: 1|act_loss: 0.06640625|cri_loss: 0.0472412109375|unsuper_loss: 0.0
+average reward score: 4.609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.78s |Gather latency=0.00s (0.00%) |Generate time=1.75s (62.78%) |Training time=0.43s (15.34%) |Others=0.61 (21.88%)|CurSamplesPerSec=11.50 |AvgSamplesPerSec=14.36
+[2023-04-14 09:08:43,056] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 4096, reducing to 2048
+epoch: 0|step: 574|ppo_ep: 1|act_loss: 0.0679931640625|cri_loss: 0.039154052734375|unsuper_loss: 0.0
+average reward score: 4.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.72%) |Training time=0.42s (19.69%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.36
+epoch: 0|step: 575|ppo_ep: 1|act_loss: 0.12646484375|cri_loss: 0.0697021484375|unsuper_loss: 0.0
+average reward score: 4.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.58%) |Training time=0.45s (20.90%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.36
+[2023-04-14 09:08:47,490] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 576|ppo_ep: 1|act_loss: 0.1339111328125|cri_loss: 0.0732421875|unsuper_loss: 0.0
+average reward score: 4.7421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.93%) |Training time=0.45s (20.92%) |Others=0.09 (4.15%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.36
+[2023-04-14 09:08:49,655] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
+epoch: 0|step: 577|ppo_ep: 1|act_loss: 0.02734375|cri_loss: 0.025177001953125|unsuper_loss: 0.0
+average reward score: 4.15234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.05%) |Training time=0.45s (20.79%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.36
+epoch: 0|step: 578|ppo_ep: 1|act_loss: 0.216064453125|cri_loss: 0.143310546875|unsuper_loss: 0.0
+average reward score: 4.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.40%) |Training time=0.46s (21.08%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.36
+[2023-04-14 09:08:53,886] [INFO] [logging.py:96:log_dist] [Rank 0] step=580, skipped=10, lr=[9.571223416337106e-06, 9.571223416337106e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:08:53,904] [INFO] [timer.py:199:stop] epoch=0/micro_step=580/global_step=580, RunningAvgSamplesPerSec=106.40314362946779, CurrSamplesPerSec=112.75446463825047, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:08:53,997] [INFO] [logging.py:96:log_dist] [Rank 0] step=580, skipped=10, lr=[4.959183117273112e-06, 4.959183117273112e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 579|ppo_ep: 1|act_loss: 0.2337646484375|cri_loss: 0.13671875|unsuper_loss: 0.0
+average reward score: 4.58984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.84%) |Training time=0.45s (20.62%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.36
+epoch: 0|step: 580|ppo_ep: 1|act_loss: 0.0645751953125|cri_loss: 0.03631591796875|unsuper_loss: 0.0
+average reward score: 5.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.62%) |Training time=0.45s (20.82%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.37
+epoch: 0|step: 581|ppo_ep: 1|act_loss: 0.08306884765625|cri_loss: 0.04730224609375|unsuper_loss: 0.0
+average reward score: 4.984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.45s (20.86%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.37
+epoch: 0|step: 582|ppo_ep: 1|act_loss: 0.236083984375|cri_loss: 0.1676025390625|unsuper_loss: 0.0
+average reward score: 3.515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.22%) |Training time=0.44s (20.20%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.37
+epoch: 0|step: 583|ppo_ep: 1|act_loss: 0.007049560546875|cri_loss: 0.005939483642578125|unsuper_loss: 0.0
+average reward score: 4.2734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.27%) |Training time=0.44s (20.18%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.37
+epoch: 0|step: 584|ppo_ep: 1|act_loss: -0.004924774169921875|cri_loss: 0.00067138671875|unsuper_loss: 0.0
+average reward score: 4.3828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.90%) |Training time=0.45s (20.56%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.37
+epoch: 0|step: 585|ppo_ep: 1|act_loss: -0.0716552734375|cri_loss: -0.0330810546875|unsuper_loss: 0.0
+average reward score: 4.59375
+-------------------------------------------------------------------------------------
+|E2E latency=2.41s |Gather latency=0.00s (0.00%) |Generate time=1.80s (74.76%) |Training time=0.45s (18.58%) |Others=0.16 (6.65%)|CurSamplesPerSec=13.30 |AvgSamplesPerSec=14.37
+epoch: 0|step: 586|ppo_ep: 1|act_loss: -0.10107421875|cri_loss: -0.04376220703125|unsuper_loss: 0.0
+average reward score: 3.1640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.72%) |Training time=0.45s (20.75%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.37
+epoch: 0|step: 587|ppo_ep: 1|act_loss: -0.05377197265625|cri_loss: -0.02410888671875|unsuper_loss: 0.0
+average reward score: 4.57421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.75%) |Training time=0.45s (20.71%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.37
+epoch: 0|step: 588|ppo_ep: 1|act_loss: -0.0753173828125|cri_loss: -0.03302001953125|unsuper_loss: 0.0
+average reward score: 5.07421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.77%) |Training time=0.45s (20.69%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.37
+[2023-04-14 09:09:15,849] [INFO] [logging.py:96:log_dist] [Rank 0] step=590, skipped=10, lr=[9.567845205974828e-06, 9.567845205974828e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:09:15,867] [INFO] [timer.py:199:stop] epoch=0/micro_step=590/global_step=590, RunningAvgSamplesPerSec=106.49800390590352, CurrSamplesPerSec=110.78347744838766, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:09:15,960] [INFO] [logging.py:96:log_dist] [Rank 0] step=590, skipped=10, lr=[4.957432749209755e-06, 4.957432749209755e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 589|ppo_ep: 1|act_loss: -0.0306396484375|cri_loss: -0.0132293701171875|unsuper_loss: 0.0
+average reward score: 5.65625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.88%) |Training time=0.45s (20.64%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.37
+epoch: 0|step: 590|ppo_ep: 1|act_loss: -0.05267333984375|cri_loss: -0.025299072265625|unsuper_loss: 0.0
+average reward score: 5.5859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.66%) |Training time=0.45s (20.81%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.37
+epoch: 0|step: 591|ppo_ep: 1|act_loss: -0.007110595703125|cri_loss: -0.0025691986083984375|unsuper_loss: 0.0
+average reward score: 5.14453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.71%) |Training time=0.45s (20.74%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.37
+epoch: 0|step: 592|ppo_ep: 1|act_loss: -0.014495849609375|cri_loss: -0.005519866943359375|unsuper_loss: 0.0
+average reward score: 4.89453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.50%) |Training time=0.46s (20.98%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.37
+epoch: 0|step: 593|ppo_ep: 1|act_loss: 0.04583740234375|cri_loss: 0.031585693359375|unsuper_loss: 0.0
+average reward score: 5.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.75%) |Training time=0.45s (20.72%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.37
+epoch: 0|step: 594|ppo_ep: 1|act_loss: -0.0069427490234375|cri_loss: -0.00215911865234375|unsuper_loss: 0.0
+average reward score: 5.5859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.45s (20.86%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.37
+epoch: 0|step: 595|ppo_ep: 1|act_loss: 0.0129241943359375|cri_loss: 0.0084228515625|unsuper_loss: 0.0
+average reward score: 5.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.90%) |Training time=0.45s (20.55%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.37
+epoch: 0|step: 596|ppo_ep: 1|act_loss: 0.01959228515625|cri_loss: 0.01129913330078125|unsuper_loss: 0.0
+average reward score: 5.81640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.08%) |Training time=0.44s (20.35%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.37
+epoch: 0|step: 597|ppo_ep: 1|act_loss: 0.0300140380859375|cri_loss: 0.0162200927734375|unsuper_loss: 0.0
+average reward score: 6.23046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.08%) |Training time=0.42s (19.33%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.37
+epoch: 0|step: 598|ppo_ep: 1|act_loss: 0.0751953125|cri_loss: 0.041015625|unsuper_loss: 0.0
+average reward score: 5.95703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.68%) |Training time=0.43s (19.76%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.37
+[2023-04-14 09:09:37,614] [INFO] [logging.py:96:log_dist] [Rank 0] step=600, skipped=10, lr=[9.564396695205104e-06, 9.564396695205104e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:09:37,618] [INFO] [timer.py:199:stop] epoch=0/micro_step=600/global_step=600, RunningAvgSamplesPerSec=106.58806837441567, CurrSamplesPerSec=100.7356997842199, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:09:37,737] [INFO] [logging.py:96:log_dist] [Rank 0] step=600, skipped=10, lr=[4.955645956064821e-06, 4.955645956064821e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 599|ppo_ep: 1|act_loss: 0.011077880859375|cri_loss: 0.00719451904296875|unsuper_loss: 0.0
+average reward score: 5.87890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.62s (72.81%) |Training time=0.48s (21.61%) |Others=0.12 (5.58%)|CurSamplesPerSec=14.39 |AvgSamplesPerSec=14.37
+epoch: 0|step: 600|ppo_ep: 1|act_loss: 0.03582763671875|cri_loss: 0.0189361572265625|unsuper_loss: 0.0
+average reward score: 6.08984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.91%) |Training time=0.45s (20.53%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.37
+epoch: 0|step: 601|ppo_ep: 1|act_loss: -0.0229949951171875|cri_loss: -0.0109100341796875|unsuper_loss: 0.0
+average reward score: 5.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.70%) |Training time=0.46s (20.80%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.37
+epoch: 0|step: 602|ppo_ep: 1|act_loss: -0.032379150390625|cri_loss: -0.0155487060546875|unsuper_loss: 0.0
+average reward score: 5.8984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.73s (76.26%) |Training time=0.44s (19.28%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.07 |AvgSamplesPerSec=14.37
+epoch: 0|step: 603|ppo_ep: 1|act_loss: -0.0089874267578125|cri_loss: -0.00399017333984375|unsuper_loss: 0.0
+average reward score: 5.703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.18%) |Training time=0.44s (20.28%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.37
+epoch: 0|step: 604|ppo_ep: 1|act_loss: -0.056182861328125|cri_loss: -0.0275115966796875|unsuper_loss: 0.0
+average reward score: 5.8984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.25%) |Training time=0.44s (20.19%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.37
+epoch: 0|step: 605|ppo_ep: 1|act_loss: -0.0023899078369140625|cri_loss: -0.0002040863037109375|unsuper_loss: 0.0
+average reward score: 5.5
+-------------------------------------------------------------------------------------
+|E2E latency=2.52s |Gather latency=0.00s (0.00%) |Generate time=1.63s (64.46%) |Training time=0.44s (17.39%) |Others=0.46 (18.15%)|CurSamplesPerSec=12.69 |AvgSamplesPerSec=14.37
+epoch: 0|step: 606|ppo_ep: 1|act_loss: 0.0224609375|cri_loss: 0.012969970703125|unsuper_loss: 0.0
+average reward score: 5.8828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.77%) |Training time=0.44s (20.46%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.37
+epoch: 0|step: 607|ppo_ep: 1|act_loss: 0.005245208740234375|cri_loss: 0.00289154052734375|unsuper_loss: 0.0
+average reward score: 5.80859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.24%) |Training time=0.44s (20.18%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.37
+epoch: 0|step: 608|ppo_ep: 1|act_loss: 0.064208984375|cri_loss: 0.033111572265625|unsuper_loss: 0.0
+average reward score: 6.0
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.11%) |Training time=0.44s (20.30%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.37
+[2023-04-14 09:09:59,804] [INFO] [logging.py:96:log_dist] [Rank 0] step=610, skipped=10, lr=[9.560877935143189e-06, 9.560877935143189e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:09:59,822] [INFO] [timer.py:199:stop] epoch=0/micro_step=610/global_step=610, RunningAvgSamplesPerSec=106.7184158486569, CurrSamplesPerSec=117.44905641885447, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:09:59,917] [INFO] [logging.py:96:log_dist] [Rank 0] step=610, skipped=10, lr=[4.953822764322896e-06, 4.953822764322896e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 609|ppo_ep: 1|act_loss: -0.0032367706298828125|cri_loss: -0.001232147216796875|unsuper_loss: 0.0
+average reward score: 5.40625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.27%) |Training time=0.44s (20.09%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.37
+epoch: 0|step: 610|ppo_ep: 1|act_loss: -0.024261474609375|cri_loss: -0.011322021484375|unsuper_loss: 0.0
+average reward score: 6.328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.11%) |Training time=0.44s (20.32%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.37
+epoch: 0|step: 611|ppo_ep: 1|act_loss: -0.024688720703125|cri_loss: -0.0111083984375|unsuper_loss: 0.0
+average reward score: 5.48828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.31%) |Training time=0.44s (20.12%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.38
+epoch: 0|step: 612|ppo_ep: 1|act_loss: -0.08856201171875|cri_loss: -0.041107177734375|unsuper_loss: 0.0
+average reward score: 5.3046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.50%) |Training time=0.41s (18.88%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.38
+epoch: 0|step: 613|ppo_ep: 1|act_loss: -0.018341064453125|cri_loss: -0.0086517333984375|unsuper_loss: 0.0
+average reward score: 5.0078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.25%) |Training time=0.42s (19.09%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.38
+epoch: 0|step: 614|ppo_ep: 1|act_loss: -0.032562255859375|cri_loss: -0.014404296875|unsuper_loss: 0.0
+average reward score: 6.59375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.73%) |Training time=0.41s (18.59%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.38
+epoch: 0|step: 615|ppo_ep: 1|act_loss: 0.0167388916015625|cri_loss: 0.0095367431640625|unsuper_loss: 0.0
+average reward score: 6.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.84s (78.72%) |Training time=0.40s (17.01%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.38
+epoch: 0|step: 616|ppo_ep: 1|act_loss: 0.0194091796875|cri_loss: 0.01035308837890625|unsuper_loss: 0.0
+average reward score: 5.375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.16%) |Training time=0.44s (20.27%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.38
+epoch: 0|step: 617|ppo_ep: 1|act_loss: -0.0017223358154296875|cri_loss: -0.0002288818359375|unsuper_loss: 0.0
+average reward score: 5.72265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.67s (75.95%) |Training time=0.43s (19.48%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.38
+epoch: 0|step: 618|ppo_ep: 1|act_loss: 0.0160675048828125|cri_loss: 0.00909423828125|unsuper_loss: 0.0
+average reward score: 5.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.10%) |Training time=0.44s (20.35%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.38
+[2023-04-14 09:10:21,685] [INFO] [logging.py:96:log_dist] [Rank 0] step=620, skipped=10, lr=[9.557288977945587e-06, 9.557288977945587e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:10:21,703] [INFO] [timer.py:199:stop] epoch=0/micro_step=620/global_step=620, RunningAvgSamplesPerSec=106.9398379077226, CurrSamplesPerSec=119.05743097000362, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:10:21,797] [INFO] [logging.py:96:log_dist] [Rank 0] step=620, skipped=10, lr=[4.9519632010080765e-06, 4.9519632010080765e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 619|ppo_ep: 1|act_loss: 0.015380859375|cri_loss: 0.0081634521484375|unsuper_loss: 0.0
+average reward score: 6.078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.44%) |Training time=0.43s (20.00%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.38
+epoch: 0|step: 620|ppo_ep: 1|act_loss: -0.004367828369140625|cri_loss: -0.001682281494140625|unsuper_loss: 0.0
+average reward score: 5.515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.97%) |Training time=0.42s (19.48%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.38
+epoch: 0|step: 621|ppo_ep: 1|act_loss: -0.037322998046875|cri_loss: -0.0178985595703125|unsuper_loss: 0.0
+average reward score: 6.125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.80%) |Training time=0.45s (20.65%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.38
+epoch: 0|step: 622|ppo_ep: 1|act_loss: -0.0214996337890625|cri_loss: -0.0097198486328125|unsuper_loss: 0.0
+average reward score: 6.15625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.79%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.38
+epoch: 0|step: 623|ppo_ep: 1|act_loss: -0.051300048828125|cri_loss: -0.024322509765625|unsuper_loss: 0.0
+average reward score: 6.875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.46%) |Training time=0.45s (20.79%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.38
+epoch: 0|step: 624|ppo_ep: 1|act_loss: -0.03497314453125|cri_loss: -0.016021728515625|unsuper_loss: 0.0
+average reward score: 5.1328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.20%) |Training time=0.44s (20.26%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.38
+epoch: 0|step: 625|ppo_ep: 1|act_loss: 0.016265869140625|cri_loss: 0.00904083251953125|unsuper_loss: 0.0
+average reward score: 6.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.57%) |Training time=0.45s (20.88%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.38
+epoch: 0|step: 626|ppo_ep: 1|act_loss: 0.03240966796875|cri_loss: 0.0170135498046875|unsuper_loss: 0.0
+average reward score: 5.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.40%) |Training time=0.46s (21.08%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.38
+epoch: 0|step: 627|ppo_ep: 1|act_loss: 0.0118408203125|cri_loss: 0.006252288818359375|unsuper_loss: 0.0
+average reward score: 5.59375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.45%) |Training time=0.43s (19.66%) |Others=0.11 (4.89%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.38
+epoch: 0|step: 628|ppo_ep: 1|act_loss: 0.0095062255859375|cri_loss: 0.005832672119140625|unsuper_loss: 0.0
+average reward score: 5.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.35%) |Training time=0.42s (19.12%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.38
+[2023-04-14 09:10:43,620] [INFO] [logging.py:96:log_dist] [Rank 0] step=630, skipped=10, lr=[9.55362987680931e-06, 9.55362987680931e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:10:44,139] [INFO] [timer.py:199:stop] epoch=0/micro_step=630/global_step=630, RunningAvgSamplesPerSec=106.77900790800318, CurrSamplesPerSec=42.10884357156303, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:10:44,235] [INFO] [logging.py:96:log_dist] [Rank 0] step=630, skipped=10, lr=[4.95006729368358e-06, 4.95006729368358e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 629|ppo_ep: 1|act_loss: 0.048309326171875|cri_loss: 0.027130126953125|unsuper_loss: 0.0
+average reward score: 5.5546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.84s |Gather latency=0.00s (0.00%) |Generate time=1.82s (63.97%) |Training time=0.92s (32.46%) |Others=0.10 (3.57%)|CurSamplesPerSec=11.25 |AvgSamplesPerSec=14.38
+epoch: 0|step: 630|ppo_ep: 1|act_loss: 0.08837890625|cri_loss: 0.0487060546875|unsuper_loss: 0.0
+average reward score: 4.65234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.67%) |Training time=0.46s (20.86%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.38
+epoch: 0|step: 631|ppo_ep: 1|act_loss: 0.0255584716796875|cri_loss: 0.0142059326171875|unsuper_loss: 0.0
+average reward score: 4.984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.66%) |Training time=0.46s (20.00%) |Others=0.10 (4.35%)|CurSamplesPerSec=14.05 |AvgSamplesPerSec=14.38
+epoch: 0|step: 632|ppo_ep: 1|act_loss: 0.001983642578125|cri_loss: 0.00154876708984375|unsuper_loss: 0.0
+average reward score: 5.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.44%) |Training time=0.46s (21.01%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.38
+epoch: 0|step: 633|ppo_ep: 1|act_loss: -0.04437255859375|cri_loss: -0.02142333984375|unsuper_loss: 0.0
+average reward score: 5.98828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.39%) |Training time=0.46s (21.04%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.38
+epoch: 0|step: 634|ppo_ep: 1|act_loss: -0.04608154296875|cri_loss: -0.0216827392578125|unsuper_loss: 0.0
+average reward score: 5.75
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.35%) |Training time=0.46s (21.03%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.38
+epoch: 0|step: 635|ppo_ep: 1|act_loss: -0.0011425018310546875|cri_loss: 0.0001544952392578125|unsuper_loss: 0.0
+average reward score: 5.62890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.55%) |Training time=0.45s (20.89%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.38
+epoch: 0|step: 636|ppo_ep: 1|act_loss: 0.02423095703125|cri_loss: 0.01284027099609375|unsuper_loss: 0.0
+average reward score: 5.94921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.34%) |Training time=0.46s (21.04%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.38
+epoch: 0|step: 637|ppo_ep: 1|act_loss: 0.058624267578125|cri_loss: 0.031890869140625|unsuper_loss: 0.0
+average reward score: 5.92578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.46%) |Training time=0.48s (21.83%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.38
+epoch: 0|step: 638|ppo_ep: 1|act_loss: 0.01776123046875|cri_loss: 0.00981903076171875|unsuper_loss: 0.0
+average reward score: 5.73046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.52%) |Training time=0.49s (22.02%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.38
+[2023-04-14 09:11:06,101] [INFO] [logging.py:96:log_dist] [Rank 0] step=640, skipped=10, lr=[9.549900685971059e-06, 9.549900685971059e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:11:06,119] [INFO] [timer.py:199:stop] epoch=0/micro_step=640/global_step=640, RunningAvgSamplesPerSec=106.75939140991701, CurrSamplesPerSec=95.74492697381844, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:11:06,212] [INFO] [logging.py:96:log_dist] [Rank 0] step=640, skipped=10, lr=[4.948135070451325e-06, 4.948135070451325e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 639|ppo_ep: 1|act_loss: 0.023193359375|cri_loss: 0.0130615234375|unsuper_loss: 0.0
+average reward score: 5.796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.14%) |Training time=0.50s (22.43%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.44 |AvgSamplesPerSec=14.38
+epoch: 0|step: 640|ppo_ep: 1|act_loss: -0.017852783203125|cri_loss: -0.00658416748046875|unsuper_loss: 0.0
+average reward score: 5.8828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.58%) |Training time=0.48s (21.95%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.38
+epoch: 0|step: 641|ppo_ep: 1|act_loss: -0.00551605224609375|cri_loss: -0.0018453598022460938|unsuper_loss: 0.0
+average reward score: 6.1875
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.28%) |Training time=0.48s (21.89%) |Others=0.11 (4.83%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.38
+epoch: 0|step: 642|ppo_ep: 1|act_loss: -0.0426025390625|cri_loss: -0.01947021484375|unsuper_loss: 0.0
+average reward score: 6.01171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.63%) |Training time=0.46s (20.89%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.54 |AvgSamplesPerSec=14.38
+epoch: 0|step: 643|ppo_ep: 1|act_loss: -0.005496978759765625|cri_loss: -0.0017728805541992188|unsuper_loss: 0.0
+average reward score: 5.9375
+-------------------------------------------------------------------------------------
+|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.72s (73.10%) |Training time=0.53s (22.70%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.62 |AvgSamplesPerSec=14.38
+epoch: 0|step: 644|ppo_ep: 1|act_loss: -0.01617431640625|cri_loss: -0.007472991943359375|unsuper_loss: 0.0
+average reward score: 6.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.85%) |Training time=0.45s (20.63%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.38
+epoch: 0|step: 645|ppo_ep: 1|act_loss: -0.02459716796875|cri_loss: -0.00748443603515625|unsuper_loss: 0.0
+average reward score: 5.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.72%) |Training time=0.45s (20.76%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.38
+epoch: 0|step: 646|ppo_ep: 1|act_loss: 0.0555419921875|cri_loss: 0.028778076171875|unsuper_loss: 0.0
+average reward score: 5.328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.55%) |Training time=0.45s (20.84%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.38
+epoch: 0|step: 647|ppo_ep: 1|act_loss: 0.06146240234375|cri_loss: 0.03546142578125|unsuper_loss: 0.0
+average reward score: 4.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.79%) |Training time=0.45s (20.67%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.38
+epoch: 0|step: 648|ppo_ep: 1|act_loss: 0.021026611328125|cri_loss: 0.01177215576171875|unsuper_loss: 0.0
+average reward score: 4.6796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.76%) |Training time=0.45s (20.71%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.38
+[2023-04-14 09:11:28,136] [INFO] [logging.py:96:log_dist] [Rank 0] step=650, skipped=10, lr=[9.546101460706439e-06, 9.546101460706439e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:11:28,154] [INFO] [timer.py:199:stop] epoch=0/micro_step=650/global_step=650, RunningAvgSamplesPerSec=106.73659438423795, CurrSamplesPerSec=108.43134463767952, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:11:28,247] [INFO] [logging.py:96:log_dist] [Rank 0] step=650, skipped=10, lr=[4.946166559951523e-06, 4.946166559951523e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 649|ppo_ep: 1|act_loss: 0.06475830078125|cri_loss: 0.035064697265625|unsuper_loss: 0.0
+average reward score: 4.3125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.49%) |Training time=0.46s (21.01%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.38
+epoch: 0|step: 650|ppo_ep: 1|act_loss: 0.03387451171875|cri_loss: 0.018646240234375|unsuper_loss: 0.0
+average reward score: 5.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.62%) |Training time=0.45s (20.83%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.38
+epoch: 0|step: 651|ppo_ep: 1|act_loss: -0.01080322265625|cri_loss: -0.00469207763671875|unsuper_loss: 0.0
+average reward score: 6.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.84%) |Training time=0.45s (20.62%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.38
+epoch: 0|step: 652|ppo_ep: 1|act_loss: 0.023223876953125|cri_loss: 0.0124664306640625|unsuper_loss: 0.0
+average reward score: 6.0
+-------------------------------------------------------------------------------------
+|E2E latency=2.83s |Gather latency=0.00s (0.00%) |Generate time=1.63s (57.58%) |Training time=0.45s (15.96%) |Others=0.75 (26.45%)|CurSamplesPerSec=11.32 |AvgSamplesPerSec=14.38
+epoch: 0|step: 653|ppo_ep: 1|act_loss: -0.010345458984375|cri_loss: -0.00450897216796875|unsuper_loss: 0.0
+average reward score: 5.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.61%) |Training time=0.45s (20.86%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.38
+epoch: 0|step: 654|ppo_ep: 1|act_loss: 0.023651123046875|cri_loss: 0.0128631591796875|unsuper_loss: 0.0
+average reward score: 5.9453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.40%) |Training time=0.46s (21.06%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.38
+epoch: 0|step: 655|ppo_ep: 1|act_loss: -0.0174407958984375|cri_loss: -0.0077667236328125|unsuper_loss: 0.0
+average reward score: 6.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.62%) |Training time=0.45s (20.84%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.38
+epoch: 0|step: 656|ppo_ep: 1|act_loss: -0.03515625|cri_loss: -0.015533447265625|unsuper_loss: 0.0
+average reward score: 5.6796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.62%) |Training time=0.46s (20.85%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.38
+epoch: 0|step: 657|ppo_ep: 1|act_loss: 0.01385498046875|cri_loss: 0.007476806640625|unsuper_loss: 0.0
+average reward score: 5.5859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.67%) |Training time=0.43s (19.79%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.38
+epoch: 0|step: 658|ppo_ep: 1|act_loss: -0.00970458984375|cri_loss: -0.00411224365234375|unsuper_loss: 0.0
+average reward score: 6.0078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.41%) |Training time=0.46s (21.06%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.38
+[2023-04-14 09:11:50,718] [INFO] [logging.py:96:log_dist] [Rank 0] step=660, skipped=10, lr=[9.542232257329135e-06, 9.542232257329135e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:11:50,736] [INFO] [timer.py:199:stop] epoch=0/micro_step=660/global_step=660, RunningAvgSamplesPerSec=106.79015890097004, CurrSamplesPerSec=105.78220256428848, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:11:50,845] [INFO] [logging.py:96:log_dist] [Rank 0] step=660, skipped=10, lr=[4.944161791362246e-06, 4.944161791362246e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 659|ppo_ep: 1|act_loss: 0.037841796875|cri_loss: 0.023101806640625|unsuper_loss: 0.0
+average reward score: 6.63671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.79s (75.59%) |Training time=0.47s (19.60%) |Others=0.11 (4.81%)|CurSamplesPerSec=13.48 |AvgSamplesPerSec=14.38
+epoch: 0|step: 660|ppo_ep: 1|act_loss: -0.0282745361328125|cri_loss: -0.013214111328125|unsuper_loss: 0.0
+average reward score: 6.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.78%) |Training time=0.45s (19.86%) |Others=0.10 (4.35%)|CurSamplesPerSec=14.09 |AvgSamplesPerSec=14.38
+epoch: 0|step: 661|ppo_ep: 1|act_loss: -0.025054931640625|cri_loss: -0.012237548828125|unsuper_loss: 0.0
+average reward score: 5.82421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.50%) |Training time=0.46s (20.97%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.38
+epoch: 0|step: 662|ppo_ep: 1|act_loss: -0.02947998046875|cri_loss: -0.012115478515625|unsuper_loss: 0.0
+average reward score: 6.34375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.23%) |Training time=0.46s (21.24%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.38
+epoch: 0|step: 663|ppo_ep: 1|act_loss: 0.00266265869140625|cri_loss: 0.00197601318359375|unsuper_loss: 0.0
+average reward score: 6.06640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.53%) |Training time=0.45s (20.94%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.38
+epoch: 0|step: 664|ppo_ep: 1|act_loss: 0.026275634765625|cri_loss: 0.01369476318359375|unsuper_loss: 0.0
+average reward score: 5.7421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.68%) |Training time=0.45s (20.79%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.38
+epoch: 0|step: 665|ppo_ep: 1|act_loss: -0.0006084442138671875|cri_loss: 0.00041294097900390625|unsuper_loss: 0.0
+average reward score: 6.203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.46%) |Training time=0.46s (20.99%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.38
+epoch: 0|step: 666|ppo_ep: 1|act_loss: -0.00737762451171875|cri_loss: -0.00324249267578125|unsuper_loss: 0.0
+average reward score: 6.11328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.27%) |Training time=0.46s (21.21%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.38
+epoch: 0|step: 667|ppo_ep: 1|act_loss: 0.00258636474609375|cri_loss: 0.0026836395263671875|unsuper_loss: 0.0
+average reward score: 6.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.51%) |Training time=0.46s (20.95%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.38
+epoch: 0|step: 668|ppo_ep: 1|act_loss: 0.0150604248046875|cri_loss: 0.0088348388671875|unsuper_loss: 0.0
+average reward score: 6.2734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.63%) |Training time=0.45s (20.83%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.38
+[2023-04-14 09:12:12,578] [INFO] [logging.py:96:log_dist] [Rank 0] step=670, skipped=10, lr=[9.538293133190075e-06, 9.538293133190075e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:12:12,596] [INFO] [timer.py:199:stop] epoch=0/micro_step=670/global_step=670, RunningAvgSamplesPerSec=106.82658739397799, CurrSamplesPerSec=110.64967844108436, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:12:12,689] [INFO] [logging.py:96:log_dist] [Rank 0] step=670, skipped=10, lr=[4.942120794399002e-06, 4.942120794399002e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 669|ppo_ep: 1|act_loss: 0.037322998046875|cri_loss: 0.022125244140625|unsuper_loss: 0.0
+average reward score: 5.73828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.71%) |Training time=0.45s (20.78%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.38
+epoch: 0|step: 670|ppo_ep: 1|act_loss: -0.06207275390625|cri_loss: -0.0292816162109375|unsuper_loss: 0.0
+average reward score: 6.35546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.80%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.38
+epoch: 0|step: 671|ppo_ep: 1|act_loss: 0.051605224609375|cri_loss: 0.0302734375|unsuper_loss: 0.0
+average reward score: 6.1015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.61s |Gather latency=0.00s (0.00%) |Generate time=1.64s (63.04%) |Training time=0.45s (17.15%) |Others=0.52 (19.81%)|CurSamplesPerSec=12.27 |AvgSamplesPerSec=14.38
+epoch: 0|step: 672|ppo_ep: 1|act_loss: -0.0310516357421875|cri_loss: -0.0143280029296875|unsuper_loss: 0.0
+average reward score: 6.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.67%) |Training time=0.43s (19.75%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.38
+epoch: 0|step: 673|ppo_ep: 1|act_loss: 0.03326416015625|cri_loss: 0.017730712890625|unsuper_loss: 0.0
+average reward score: 5.83203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.62s (71.00%) |Training time=0.56s (24.67%) |Others=0.10 (4.32%)|CurSamplesPerSec=14.01 |AvgSamplesPerSec=14.38
+epoch: 0|step: 674|ppo_ep: 1|act_loss: 0.06341552734375|cri_loss: 0.033233642578125|unsuper_loss: 0.0
+average reward score: 5.9140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.38%) |Training time=0.44s (20.08%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.38
+epoch: 0|step: 675|ppo_ep: 1|act_loss: 0.003040313720703125|cri_loss: 0.0020275115966796875|unsuper_loss: 0.0
+average reward score: 5.77734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.85%) |Training time=0.45s (20.59%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.38
+epoch: 0|step: 676|ppo_ep: 1|act_loss: -0.002918243408203125|cri_loss: -0.0004673004150390625|unsuper_loss: 0.0
+average reward score: 6.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.62%) |Training time=0.45s (20.83%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.38
+epoch: 0|step: 677|ppo_ep: 1|act_loss: -0.061248779296875|cri_loss: -0.025970458984375|unsuper_loss: 0.0
+average reward score: 5.7109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.63%) |Training time=0.45s (20.81%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.38
+epoch: 0|step: 678|ppo_ep: 1|act_loss: -0.04840087890625|cri_loss: -0.019805908203125|unsuper_loss: 0.0
+average reward score: 6.37109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.63%) |Training time=0.45s (20.82%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.38
+[2023-04-14 09:12:34,852] [INFO] [logging.py:96:log_dist] [Rank 0] step=680, skipped=10, lr=[9.534284146676578e-06, 9.534284146676578e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:12:34,870] [INFO] [timer.py:199:stop] epoch=0/micro_step=680/global_step=680, RunningAvgSamplesPerSec=106.85888050857336, CurrSamplesPerSec=109.72457047932349, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:12:34,963] [INFO] [logging.py:96:log_dist] [Rank 0] step=680, skipped=10, lr=[4.9400435993142895e-06, 4.9400435993142895e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 679|ppo_ep: 1|act_loss: -0.08935546875|cri_loss: -0.039794921875|unsuper_loss: 0.0
+average reward score: 6.80078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.63%) |Training time=0.45s (20.86%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.38
+epoch: 0|step: 680|ppo_ep: 1|act_loss: -0.05218505859375|cri_loss: -0.02313232421875|unsuper_loss: 0.0
+average reward score: 6.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.79%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.38
+epoch: 0|step: 681|ppo_ep: 1|act_loss: -0.0291748046875|cri_loss: -0.01338958740234375|unsuper_loss: 0.0
+average reward score: 6.03125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.78%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.38
+epoch: 0|step: 682|ppo_ep: 1|act_loss: -0.0007305145263671875|cri_loss: 0.0006284713745117188|unsuper_loss: 0.0
+average reward score: 5.96875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.50%) |Training time=0.46s (20.96%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.38
+epoch: 0|step: 683|ppo_ep: 1|act_loss: 0.072021484375|cri_loss: 0.04058837890625|unsuper_loss: 0.0
+average reward score: 6.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.44%) |Training time=0.46s (21.04%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.38
+epoch: 0|step: 684|ppo_ep: 1|act_loss: 0.017791748046875|cri_loss: 0.0110321044921875|unsuper_loss: 0.0
+average reward score: 5.87890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.54%) |Training time=0.45s (20.91%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.38
+epoch: 0|step: 685|ppo_ep: 1|act_loss: 0.0330810546875|cri_loss: 0.018218994140625|unsuper_loss: 0.0
+average reward score: 5.90625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.66%) |Training time=0.45s (20.36%) |Others=0.13 (5.98%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.38
+epoch: 0|step: 686|ppo_ep: 1|act_loss: -0.005847930908203125|cri_loss: 8.392333984375e-05|unsuper_loss: 0.0
+average reward score: 7.765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.57%) |Training time=0.46s (20.89%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.38
+epoch: 0|step: 687|ppo_ep: 1|act_loss: -0.0004673004150390625|cri_loss: 0.0002837181091308594|unsuper_loss: 0.0
+average reward score: 7.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.28%) |Training time=0.44s (20.15%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.39
+epoch: 0|step: 688|ppo_ep: 1|act_loss: -0.022308349609375|cri_loss: -0.01081085205078125|unsuper_loss: 0.0
+average reward score: 7.1875
+-------------------------------------------------------------------------------------
+|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.63s (70.81%) |Training time=0.57s (24.90%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.91 |AvgSamplesPerSec=14.38
+[2023-04-14 09:12:56,792] [INFO] [logging.py:96:log_dist] [Rank 0] step=690, skipped=10, lr=[9.5302053572115e-06, 9.5302053572115e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:12:56,809] [INFO] [timer.py:199:stop] epoch=0/micro_step=690/global_step=690, RunningAvgSamplesPerSec=106.85876245451874, CurrSamplesPerSec=107.36396135087939, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:12:56,902] [INFO] [logging.py:96:log_dist] [Rank 0] step=690, skipped=10, lr=[4.937930236897151e-06, 4.937930236897151e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 689|ppo_ep: 1|act_loss: -0.044921875|cri_loss: -0.0218658447265625|unsuper_loss: 0.0
+average reward score: 6.12109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.47%) |Training time=0.47s (21.09%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.41 |AvgSamplesPerSec=14.38
+epoch: 0|step: 690|ppo_ep: 1|act_loss: 0.017822265625|cri_loss: 0.00949859619140625|unsuper_loss: 0.0
+average reward score: 6.8515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.48%) |Training time=0.46s (20.98%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39
+epoch: 0|step: 691|ppo_ep: 1|act_loss: -0.010772705078125|cri_loss: -0.0048675537109375|unsuper_loss: 0.0
+average reward score: 6.66796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.49%) |Training time=0.46s (20.97%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.39
+epoch: 0|step: 692|ppo_ep: 1|act_loss: -0.02557373046875|cri_loss: -0.011932373046875|unsuper_loss: 0.0
+average reward score: 6.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.49%) |Training time=0.46s (20.99%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.39
+epoch: 0|step: 693|ppo_ep: 1|act_loss: -0.012969970703125|cri_loss: -0.0058441162109375|unsuper_loss: 0.0
+average reward score: 6.5859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.10%) |Training time=0.44s (20.36%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39
+epoch: 0|step: 694|ppo_ep: 1|act_loss: -0.0142974853515625|cri_loss: -0.006664276123046875|unsuper_loss: 0.0
+average reward score: 6.52734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.05%) |Training time=0.44s (20.41%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39
+epoch: 0|step: 695|ppo_ep: 1|act_loss: -0.031768798828125|cri_loss: -0.01519012451171875|unsuper_loss: 0.0
+average reward score: 6.88671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.43%) |Training time=0.46s (21.02%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39
+epoch: 0|step: 696|ppo_ep: 1|act_loss: 0.026397705078125|cri_loss: 0.01434326171875|unsuper_loss: 0.0
+average reward score: 7.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.52%) |Training time=0.46s (20.94%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.39
+epoch: 0|step: 697|ppo_ep: 1|act_loss: 0.00650787353515625|cri_loss: 0.003849029541015625|unsuper_loss: 0.0
+average reward score: 5.8046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.76%) |Training time=0.45s (20.70%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.39
+epoch: 0|step: 698|ppo_ep: 1|act_loss: 0.00577545166015625|cri_loss: 0.0033206939697265625|unsuper_loss: 0.0
+average reward score: 6.82421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.57%) |Training time=0.45s (20.89%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39
+[2023-04-14 09:13:18,531] [INFO] [logging.py:96:log_dist] [Rank 0] step=700, skipped=10, lr=[9.526056825252338e-06, 9.526056825252338e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:13:18,548] [INFO] [timer.py:199:stop] epoch=0/micro_step=700/global_step=700, RunningAvgSamplesPerSec=106.90781997916815, CurrSamplesPerSec=108.76533453483435, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:13:18,641] [INFO] [logging.py:96:log_dist] [Rank 0] step=700, skipped=10, lr=[4.935780738472714e-06, 4.935780738472714e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 699|ppo_ep: 1|act_loss: 0.04449462890625|cri_loss: 0.0231781005859375|unsuper_loss: 0.0
+average reward score: 6.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.49%) |Training time=0.46s (21.00%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.39
+epoch: 0|step: 700|ppo_ep: 1|act_loss: 0.03704833984375|cri_loss: 0.01910400390625|unsuper_loss: 0.0
+average reward score: 6.64453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.41s |Gather latency=0.00s (0.00%) |Generate time=1.62s (67.26%) |Training time=0.46s (19.00%) |Others=0.33 (13.75%)|CurSamplesPerSec=13.30 |AvgSamplesPerSec=14.39
+epoch: 0|step: 701|ppo_ep: 1|act_loss: -0.01427459716796875|cri_loss: -0.00638580322265625|unsuper_loss: 0.0
+average reward score: 6.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.99%) |Training time=0.44s (20.43%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.39
+epoch: 0|step: 702|ppo_ep: 1|act_loss: 0.043365478515625|cri_loss: 0.02227783203125|unsuper_loss: 0.0
+average reward score: 6.65625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.76%) |Training time=0.43s (19.66%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39
+epoch: 0|step: 703|ppo_ep: 1|act_loss: -0.0113067626953125|cri_loss: -0.0049591064453125|unsuper_loss: 0.0
+average reward score: 7.98828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.64s (72.31%) |Training time=0.53s (23.32%) |Others=0.10 (4.37%)|CurSamplesPerSec=14.12 |AvgSamplesPerSec=14.39
+epoch: 0|step: 704|ppo_ep: 1|act_loss: -0.017974853515625|cri_loss: -0.00820159912109375|unsuper_loss: 0.0
+average reward score: 5.9609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.18%) |Training time=0.44s (20.26%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.39
+epoch: 0|step: 705|ppo_ep: 1|act_loss: 0.0028667449951171875|cri_loss: 0.0019893646240234375|unsuper_loss: 0.0
+average reward score: 6.1796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.01%) |Training time=0.44s (20.35%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.39
+epoch: 0|step: 706|ppo_ep: 1|act_loss: 0.0090789794921875|cri_loss: 0.0047760009765625|unsuper_loss: 0.0
+average reward score: 6.68359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.45s |Gather latency=0.00s (0.00%) |Generate time=1.62s (66.09%) |Training time=0.45s (18.32%) |Others=0.38 (15.59%)|CurSamplesPerSec=13.08 |AvgSamplesPerSec=14.39
+epoch: 0|step: 707|ppo_ep: 1|act_loss: -0.019134521484375|cri_loss: -0.00870513916015625|unsuper_loss: 0.0
+average reward score: 5.890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.77%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.39
+epoch: 0|step: 708|ppo_ep: 1|act_loss: -0.0189361572265625|cri_loss: -0.00836181640625|unsuper_loss: 0.0
+average reward score: 6.6953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.09%) |Training time=0.44s (20.36%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.39
+[2023-04-14 09:13:40,822] [INFO] [logging.py:96:log_dist] [Rank 0] step=710, skipped=10, lr=[9.521838612290344e-06, 9.521838612290344e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:13:40,840] [INFO] [timer.py:199:stop] epoch=0/micro_step=710/global_step=710, RunningAvgSamplesPerSec=106.9759180678844, CurrSamplesPerSec=109.9996541451361, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:13:40,933] [INFO] [logging.py:96:log_dist] [Rank 0] step=710, skipped=10, lr=[4.933595135901733e-06, 4.933595135901733e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 709|ppo_ep: 1|act_loss: -0.016876220703125|cri_loss: -0.008026123046875|unsuper_loss: 0.0
+average reward score: 6.3203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.54%) |Training time=0.45s (20.92%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.39
+epoch: 0|step: 710|ppo_ep: 1|act_loss: -0.021514892578125|cri_loss: -0.01042938232421875|unsuper_loss: 0.0
+average reward score: 6.97265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.74%) |Training time=0.45s (20.70%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.39
+epoch: 0|step: 711|ppo_ep: 1|act_loss: -0.0162353515625|cri_loss: -0.00786590576171875|unsuper_loss: 0.0
+average reward score: 6.328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.71%) |Training time=0.45s (20.74%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.39
+epoch: 0|step: 712|ppo_ep: 1|act_loss: -0.00830078125|cri_loss: -0.0038661956787109375|unsuper_loss: 0.0
+average reward score: 6.0
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.55%) |Training time=0.45s (20.91%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39
+epoch: 0|step: 713|ppo_ep: 1|act_loss: 0.0106964111328125|cri_loss: 0.005687713623046875|unsuper_loss: 0.0
+average reward score: 7.04296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.78%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.39
+epoch: 0|step: 714|ppo_ep: 1|act_loss: -0.017486572265625|cri_loss: -0.00839996337890625|unsuper_loss: 0.0
+average reward score: 6.53515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.78%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.39
+epoch: 0|step: 715|ppo_ep: 1|act_loss: -0.013763427734375|cri_loss: -0.006649017333984375|unsuper_loss: 0.0
+average reward score: 5.91796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.78%) |Training time=0.45s (20.66%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.39
+epoch: 0|step: 716|ppo_ep: 1|act_loss: -0.0003304481506347656|cri_loss: 0.0001533031463623047|unsuper_loss: 0.0
+average reward score: 5.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.76%) |Training time=0.45s (20.69%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39
+epoch: 0|step: 717|ppo_ep: 1|act_loss: 0.0340576171875|cri_loss: 0.02008056640625|unsuper_loss: 0.0
+average reward score: 5.8125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.94%) |Training time=0.42s (19.52%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39
+epoch: 0|step: 718|ppo_ep: 1|act_loss: -0.03125|cri_loss: -0.0149383544921875|unsuper_loss: 0.0
+average reward score: 6.3828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.47%) |Training time=0.44s (20.04%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.39
+[2023-04-14 09:14:02,796] [INFO] [logging.py:96:log_dist] [Rank 0] step=720, skipped=10, lr=[9.517550780849608e-06, 9.517550780849608e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:14:02,814] [INFO] [timer.py:199:stop] epoch=0/micro_step=720/global_step=720, RunningAvgSamplesPerSec=107.05598671959284, CurrSamplesPerSec=114.59396915763213, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:14:02,907] [INFO] [logging.py:96:log_dist] [Rank 0] step=720, skipped=10, lr=[4.9313734615801076e-06, 4.9313734615801076e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 719|ppo_ep: 1|act_loss: -0.0001888275146484375|cri_loss: 0.0005574226379394531|unsuper_loss: 0.0
+average reward score: 6.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.43s |Gather latency=0.00s (0.00%) |Generate time=1.89s (77.74%) |Training time=0.44s (18.20%) |Others=0.10 (4.06%)|CurSamplesPerSec=13.19 |AvgSamplesPerSec=14.39
+epoch: 0|step: 720|ppo_ep: 1|act_loss: 0.01340484619140625|cri_loss: 0.00720977783203125|unsuper_loss: 0.0
+average reward score: 6.65625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.14%) |Training time=0.44s (20.29%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.39
+epoch: 0|step: 721|ppo_ep: 1|act_loss: -0.0146636962890625|cri_loss: -0.006771087646484375|unsuper_loss: 0.0
+average reward score: 5.671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.15%) |Training time=0.44s (20.27%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.39
+epoch: 0|step: 722|ppo_ep: 1|act_loss: 0.025054931640625|cri_loss: 0.013336181640625|unsuper_loss: 0.0
+average reward score: 5.57421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.62s (69.13%) |Training time=0.44s (18.96%) |Others=0.28 (11.91%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.39
+epoch: 0|step: 723|ppo_ep: 1|act_loss: 0.007061004638671875|cri_loss: 0.00450897216796875|unsuper_loss: 0.0
+average reward score: 5.5703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.96%) |Training time=0.44s (20.45%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.39
+epoch: 0|step: 724|ppo_ep: 1|act_loss: 0.06036376953125|cri_loss: 0.037261962890625|unsuper_loss: 0.0
+average reward score: 5.515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.16%) |Training time=0.44s (20.27%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.39
+epoch: 0|step: 725|ppo_ep: 1|act_loss: 0.0367431640625|cri_loss: 0.0194091796875|unsuper_loss: 0.0
+average reward score: 5.88671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.08%) |Training time=0.44s (20.34%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.39
+epoch: 0|step: 726|ppo_ep: 1|act_loss: 0.0280914306640625|cri_loss: 0.0167236328125|unsuper_loss: 0.0
+average reward score: 5.984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.25%) |Training time=0.44s (20.20%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39
+epoch: 0|step: 727|ppo_ep: 1|act_loss: 0.0149993896484375|cri_loss: 0.009033203125|unsuper_loss: 0.0
+average reward score: 5.49609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.96%) |Training time=0.42s (19.47%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.39
+epoch: 0|step: 728|ppo_ep: 1|act_loss: 0.01483154296875|cri_loss: 0.0087432861328125|unsuper_loss: 0.0
+average reward score: 5.83203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.57s |Gather latency=0.00s (0.00%) |Generate time=1.62s (63.00%) |Training time=0.44s (16.91%) |Others=0.52 (20.09%)|CurSamplesPerSec=12.43 |AvgSamplesPerSec=14.39
+[2023-04-14 09:14:24,985] [INFO] [logging.py:96:log_dist] [Rank 0] step=730, skipped=10, lr=[9.51319339448614e-06, 9.51319339448614e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:14:25,003] [INFO] [timer.py:199:stop] epoch=0/micro_step=730/global_step=730, RunningAvgSamplesPerSec=107.17587828789893, CurrSamplesPerSec=115.30782176899385, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:14:25,096] [INFO] [logging.py:96:log_dist] [Rank 0] step=730, skipped=10, lr=[4.929115748438415e-06, 4.929115748438415e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 729|ppo_ep: 1|act_loss: 0.02777099609375|cri_loss: 0.01476287841796875|unsuper_loss: 0.0
+average reward score: 5.5546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.04%) |Training time=0.44s (20.40%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.39
+epoch: 0|step: 730|ppo_ep: 1|act_loss: -0.037384033203125|cri_loss: -0.0158538818359375|unsuper_loss: 0.0
+average reward score: 5.4453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.07%) |Training time=0.44s (20.34%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.39
+epoch: 0|step: 731|ppo_ep: 1|act_loss: 0.03265380859375|cri_loss: 0.019927978515625|unsuper_loss: 0.0
+average reward score: 6.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.51%) |Training time=0.43s (19.91%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.39
+epoch: 0|step: 732|ppo_ep: 1|act_loss: 0.0188751220703125|cri_loss: 0.0112762451171875|unsuper_loss: 0.0
+average reward score: 5.9921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.61%) |Training time=0.43s (19.85%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.39
+epoch: 0|step: 733|ppo_ep: 1|act_loss: 0.0577392578125|cri_loss: 0.034088134765625|unsuper_loss: 0.0
+average reward score: 6.3125
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.67%) |Training time=0.45s (19.09%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.39
+epoch: 0|step: 734|ppo_ep: 1|act_loss: 0.01357269287109375|cri_loss: 0.009857177734375|unsuper_loss: 0.0
+average reward score: 5.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.71%) |Training time=0.45s (20.73%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.39
+epoch: 0|step: 735|ppo_ep: 1|act_loss: 0.0099334716796875|cri_loss: 0.007404327392578125|unsuper_loss: 0.0
+average reward score: 6.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.62%) |Training time=0.45s (20.85%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.39
+epoch: 0|step: 736|ppo_ep: 1|act_loss: 0.0087432861328125|cri_loss: 0.00799560546875|unsuper_loss: 0.0
+average reward score: 5.79296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.92%) |Training time=0.44s (20.51%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.39
+epoch: 0|step: 737|ppo_ep: 1|act_loss: -0.020660400390625|cri_loss: -0.00601959228515625|unsuper_loss: 0.0
+average reward score: 5.6484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.87%) |Training time=0.45s (20.57%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.39
+epoch: 0|step: 738|ppo_ep: 1|act_loss: 0.0296478271484375|cri_loss: 0.017120361328125|unsuper_loss: 0.0
+average reward score: 5.80078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.86%) |Training time=0.45s (20.58%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.39
+[2023-04-14 09:14:46,793] [INFO] [logging.py:96:log_dist] [Rank 0] step=740, skipped=10, lr=[9.508766517786914e-06, 9.508766517786914e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:14:46,811] [INFO] [timer.py:199:stop] epoch=0/micro_step=740/global_step=740, RunningAvgSamplesPerSec=107.26615834374832, CurrSamplesPerSec=110.89899609178117, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:14:46,904] [INFO] [logging.py:96:log_dist] [Rank 0] step=740, skipped=10, lr=[4.926822029941406e-06, 4.926822029941406e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 739|ppo_ep: 1|act_loss: 0.06011962890625|cri_loss: 0.03375244140625|unsuper_loss: 0.0
+average reward score: 6.04296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.64%) |Training time=0.45s (20.83%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.40
+epoch: 0|step: 740|ppo_ep: 1|act_loss: 0.05902099609375|cri_loss: 0.0396728515625|unsuper_loss: 0.0
+average reward score: 5.3515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.65%) |Training time=0.45s (20.78%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.40
+epoch: 0|step: 741|ppo_ep: 1|act_loss: 0.0015230178833007812|cri_loss: 0.00215911865234375|unsuper_loss: 0.0
+average reward score: 5.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.81%) |Training time=0.45s (20.63%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.40
+epoch: 0|step: 742|ppo_ep: 1|act_loss: -0.025299072265625|cri_loss: -0.00882720947265625|unsuper_loss: 0.0
+average reward score: 5.58984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.47s |Gather latency=0.00s (0.00%) |Generate time=1.62s (65.72%) |Training time=0.45s (18.24%) |Others=0.40 (16.04%)|CurSamplesPerSec=12.98 |AvgSamplesPerSec=14.39
+epoch: 0|step: 743|ppo_ep: 1|act_loss: 0.13818359375|cri_loss: 0.08477783203125|unsuper_loss: 0.0
+average reward score: 5.765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.93%) |Training time=0.44s (20.50%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.39
+epoch: 0|step: 744|ppo_ep: 1|act_loss: 0.013946533203125|cri_loss: 0.009307861328125|unsuper_loss: 0.0
+average reward score: 5.73828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.65%) |Training time=0.45s (20.78%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.40
+epoch: 0|step: 745|ppo_ep: 1|act_loss: -0.13232421875|cri_loss: -0.056793212890625|unsuper_loss: 0.0
+average reward score: 5.62890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.69%) |Training time=0.45s (20.77%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.40
+epoch: 0|step: 746|ppo_ep: 1|act_loss: 0.020477294921875|cri_loss: 0.013763427734375|unsuper_loss: 0.0
+average reward score: 6.296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.51%) |Training time=0.43s (19.94%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.40
+epoch: 0|step: 747|ppo_ep: 1|act_loss: 0.060943603515625|cri_loss: 0.0345458984375|unsuper_loss: 0.0
+average reward score: 5.7578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.68s (76.20%) |Training time=0.42s (19.31%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.54 |AvgSamplesPerSec=14.40
+epoch: 0|step: 748|ppo_ep: 1|act_loss: 0.0953369140625|cri_loss: 0.051788330078125|unsuper_loss: 0.0
+average reward score: 5.58984375
+-------------------------------------------------------------------------------------
+|E2E latency=3.11s |Gather latency=0.00s (0.00%) |Generate time=1.89s (60.67%) |Training time=0.44s (14.24%) |Others=0.78 (25.09%)|CurSamplesPerSec=10.29 |AvgSamplesPerSec=14.39
+[2023-04-14 09:15:09,708] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 4096, but hysteresis is 2. Reducing hysteresis to 1
+[2023-04-14 09:15:09,708] [INFO] [logging.py:96:log_dist] [Rank 0] step=750, skipped=11, lr=[9.504722968731713e-06, 9.504722968731713e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:15:09,709] [INFO] [timer.py:199:stop] epoch=0/micro_step=750/global_step=750, RunningAvgSamplesPerSec=107.37028457158374, CurrSamplesPerSec=130.24941895276606, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:15:09,801] [INFO] [logging.py:96:log_dist] [Rank 0] step=750, skipped=10, lr=[4.9244923400875245e-06, 4.9244923400875245e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 749|ppo_ep: 1|act_loss: -0.016571044921875|cri_loss: -0.0033111572265625|unsuper_loss: 0.0
+average reward score: 5.74609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.35%) |Training time=0.41s (19.04%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.39
+epoch: 0|step: 750|ppo_ep: 1|act_loss: 0.0894775390625|cri_loss: 0.0479736328125|unsuper_loss: 0.0
+average reward score: 5.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.43%) |Training time=0.43s (20.02%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.39
+epoch: 0|step: 751|ppo_ep: 1|act_loss: -0.052947998046875|cri_loss: -0.0222015380859375|unsuper_loss: 0.0
+average reward score: 5.953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.69%) |Training time=0.43s (19.68%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.39
+epoch: 0|step: 752|ppo_ep: 1|act_loss: -0.16259765625|cri_loss: -0.07012939453125|unsuper_loss: 0.0
+average reward score: 5.8515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.06%) |Training time=0.44s (20.42%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.39
+epoch: 0|step: 753|ppo_ep: 1|act_loss: -0.019073486328125|cri_loss: -0.007190704345703125|unsuper_loss: 0.0
+average reward score: 5.9609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.31%) |Training time=0.44s (20.21%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.39
+epoch: 0|step: 754|ppo_ep: 1|act_loss: 0.01169586181640625|cri_loss: 0.00966644287109375|unsuper_loss: 0.0
+average reward score: 5.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.64s |Gather latency=0.00s (0.00%) |Generate time=1.63s (61.60%) |Training time=0.45s (16.89%) |Others=0.57 (21.51%)|CurSamplesPerSec=12.13 |AvgSamplesPerSec=14.39
+epoch: 0|step: 755|ppo_ep: 1|act_loss: 0.0743408203125|cri_loss: 0.040374755859375|unsuper_loss: 0.0
+average reward score: 5.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.86%) |Training time=0.45s (20.61%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39
+epoch: 0|step: 756|ppo_ep: 1|act_loss: 0.00849151611328125|cri_loss: 0.005237579345703125|unsuper_loss: 0.0
+average reward score: 5.76171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.84%) |Training time=0.45s (20.61%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.39
+epoch: 0|step: 757|ppo_ep: 1|act_loss: -0.019805908203125|cri_loss: -0.006443023681640625|unsuper_loss: 0.0
+average reward score: 5.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.91%) |Training time=0.45s (20.53%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.39
+epoch: 0|step: 758|ppo_ep: 1|act_loss: -0.0068206787109375|cri_loss: -0.0015316009521484375|unsuper_loss: 0.0
+average reward score: 5.3828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.80%) |Training time=0.45s (20.66%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.39
+[2023-04-14 09:15:31,886] [INFO] [logging.py:96:log_dist] [Rank 0] step=760, skipped=11, lr=[9.500164242019886e-06, 9.500164242019886e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:15:31,904] [INFO] [timer.py:199:stop] epoch=0/micro_step=760/global_step=760, RunningAvgSamplesPerSec=107.4533984061924, CurrSamplesPerSec=112.81274968711625, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:15:31,997] [INFO] [logging.py:96:log_dist] [Rank 0] step=760, skipped=10, lr=[4.922126713408392e-06, 4.922126713408392e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 759|ppo_ep: 1|act_loss: -0.0772705078125|cri_loss: -0.036865234375|unsuper_loss: 0.0
+average reward score: 5.9296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.73s |Gather latency=0.00s (0.00%) |Generate time=1.63s (59.60%) |Training time=0.45s (16.36%) |Others=0.66 (24.04%)|CurSamplesPerSec=11.73 |AvgSamplesPerSec=14.39
+epoch: 0|step: 760|ppo_ep: 1|act_loss: -0.0902099609375|cri_loss: -0.041015625|unsuper_loss: 0.0
+average reward score: 5.8359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.71%) |Training time=0.45s (20.74%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.39
+epoch: 0|step: 761|ppo_ep: 1|act_loss: -0.000972747802734375|cri_loss: 0.000545501708984375|unsuper_loss: 0.0
+average reward score: 5.15625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.30%) |Training time=0.44s (20.10%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39
+epoch: 0|step: 762|ppo_ep: 1|act_loss: 0.005298614501953125|cri_loss: 0.0035610198974609375|unsuper_loss: 0.0
+average reward score: 5.82421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.82s (78.13%) |Training time=0.41s (17.61%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.39
+epoch: 0|step: 763|ppo_ep: 1|act_loss: 0.0191497802734375|cri_loss: 0.0120086669921875|unsuper_loss: 0.0
+average reward score: 5.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.77%) |Training time=0.45s (20.67%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.39
+epoch: 0|step: 764|ppo_ep: 1|act_loss: 0.07623291015625|cri_loss: 0.041717529296875|unsuper_loss: 0.0
+average reward score: 5.55859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.91%) |Training time=0.45s (20.53%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.39
+epoch: 0|step: 765|ppo_ep: 1|act_loss: 0.0992431640625|cri_loss: 0.052703857421875|unsuper_loss: 0.0
+average reward score: 5.57421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.45%) |Training time=0.46s (20.99%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39
+epoch: 0|step: 766|ppo_ep: 1|act_loss: -0.0380859375|cri_loss: -0.0171661376953125|unsuper_loss: 0.0
+average reward score: 5.703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.95%) |Training time=0.44s (20.48%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.39
+epoch: 0|step: 767|ppo_ep: 1|act_loss: 0.01441192626953125|cri_loss: 0.010528564453125|unsuper_loss: 0.0
+average reward score: 5.453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.59%) |Training time=0.45s (20.85%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.39
+epoch: 0|step: 768|ppo_ep: 1|act_loss: -0.042572021484375|cri_loss: -0.017120361328125|unsuper_loss: 0.0
+average reward score: 7.19140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.26%) |Training time=0.46s (21.21%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.39
+[2023-04-14 09:15:54,304] [INFO] [logging.py:96:log_dist] [Rank 0] step=770, skipped=11, lr=[9.49553621809577e-06, 9.49553621809577e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:15:55,082] [INFO] [timer.py:199:stop] epoch=0/micro_step=770/global_step=770, RunningAvgSamplesPerSec=107.16824767200629, CurrSamplesPerSec=30.633630233362283, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:15:55,175] [INFO] [logging.py:96:log_dist] [Rank 0] step=770, skipped=10, lr=[4.919725184968307e-06, 4.919725184968307e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 769|ppo_ep: 1|act_loss: -0.01451873779296875|cri_loss: -0.0052490234375|unsuper_loss: 0.0
+average reward score: 6.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.93s |Gather latency=0.00s (0.00%) |Generate time=1.62s (55.38%) |Training time=1.21s (41.26%) |Others=0.10 (3.36%)|CurSamplesPerSec=10.93 |AvgSamplesPerSec=14.38
+epoch: 0|step: 770|ppo_ep: 1|act_loss: 0.0462646484375|cri_loss: 0.0270233154296875|unsuper_loss: 0.0
+average reward score: 5.74609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.77%) |Training time=0.45s (20.67%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.38
+epoch: 0|step: 771|ppo_ep: 1|act_loss: 0.0252532958984375|cri_loss: 0.0135498046875|unsuper_loss: 0.0
+average reward score: 6.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.69%) |Training time=0.45s (20.76%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.38
+epoch: 0|step: 772|ppo_ep: 1|act_loss: -0.01418304443359375|cri_loss: -0.0051727294921875|unsuper_loss: 0.0
+average reward score: 5.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.65%) |Training time=0.45s (20.81%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.38
+epoch: 0|step: 773|ppo_ep: 1|act_loss: -0.01824951171875|cri_loss: -0.00795745849609375|unsuper_loss: 0.0
+average reward score: 6.390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.76%) |Training time=0.45s (20.69%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.38
+epoch: 0|step: 774|ppo_ep: 1|act_loss: 0.12939453125|cri_loss: 0.07391357421875|unsuper_loss: 0.0
+average reward score: 5.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.51%) |Training time=0.45s (20.94%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.38
+epoch: 0|step: 775|ppo_ep: 1|act_loss: -0.010955810546875|cri_loss: -0.00473785400390625|unsuper_loss: 0.0
+average reward score: 6.5234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.23%) |Training time=0.44s (20.20%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.38
+epoch: 0|step: 776|ppo_ep: 1|act_loss: -0.0279388427734375|cri_loss: -0.01334381103515625|unsuper_loss: 0.0
+average reward score: 5.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.51s |Gather latency=0.00s (0.00%) |Generate time=1.85s (73.64%) |Training time=0.45s (17.73%) |Others=0.22 (8.63%)|CurSamplesPerSec=12.74 |AvgSamplesPerSec=14.38
+epoch: 0|step: 777|ppo_ep: 1|act_loss: 0.01385498046875|cri_loss: 0.0074615478515625|unsuper_loss: 0.0
+average reward score: 7.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.45s (20.84%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.38
+[2023-04-14 09:16:15,024] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 778|ppo_ep: 1|act_loss: -0.020172119140625|cri_loss: -0.00988006591796875|unsuper_loss: 0.0
+average reward score: 6.88671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.22%) |Training time=0.45s (20.70%) |Others=0.09 (4.08%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.38
+[2023-04-14 09:16:17,078] [INFO] [logging.py:96:log_dist] [Rank 0] step=780, skipped=11, lr=[9.490838965557847e-06, 9.490838965557847e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:16:17,096] [INFO] [timer.py:199:stop] epoch=0/micro_step=780/global_step=780, RunningAvgSamplesPerSec=107.25929385459395, CurrSamplesPerSec=115.33516940086052, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:16:17,181] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+[2023-04-14 09:16:17,182] [INFO] [logging.py:96:log_dist] [Rank 0] step=780, skipped=12, lr=[4.917778136851294e-06, 4.917778136851294e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 779|ppo_ep: 1|act_loss: -0.0114898681640625|cri_loss: -0.00531005859375|unsuper_loss: 0.0
+average reward score: 6.54296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.73%) |Training time=0.46s (21.14%) |Others=0.09 (4.13%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.38
+epoch: 0|step: 780|ppo_ep: 1|act_loss: -0.04376220703125|cri_loss: -0.021148681640625|unsuper_loss: 0.0
+average reward score: 6.203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.35%) |Training time=0.46s (21.02%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.38
+epoch: 0|step: 781|ppo_ep: 1|act_loss: -0.00690460205078125|cri_loss: -0.003047943115234375|unsuper_loss: 0.0
+average reward score: 6.9140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.59%) |Training time=0.45s (20.85%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.38
+epoch: 0|step: 782|ppo_ep: 1|act_loss: 0.05810546875|cri_loss: 0.031005859375|unsuper_loss: 0.0
+average reward score: 6.40625
+-------------------------------------------------------------------------------------
+|E2E latency=2.90s |Gather latency=0.00s (0.00%) |Generate time=1.62s (55.99%) |Training time=0.45s (15.46%) |Others=0.83 (28.55%)|CurSamplesPerSec=11.05 |AvgSamplesPerSec=14.38
+epoch: 0|step: 783|ppo_ep: 1|act_loss: 0.006511688232421875|cri_loss: 0.00395965576171875|unsuper_loss: 0.0
+average reward score: 6.203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.71%) |Training time=0.45s (20.72%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.38
+epoch: 0|step: 784|ppo_ep: 1|act_loss: -0.01220703125|cri_loss: -0.005767822265625|unsuper_loss: 0.0
+average reward score: 6.125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.45s (20.84%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.38
+epoch: 0|step: 785|ppo_ep: 1|act_loss: -0.0089111328125|cri_loss: -0.0036163330078125|unsuper_loss: 0.0
+average reward score: 6.1015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.94%) |Training time=0.44s (20.49%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.38
+epoch: 0|step: 786|ppo_ep: 1|act_loss: -0.0023040771484375|cri_loss: -0.0006747245788574219|unsuper_loss: 0.0
+average reward score: 6.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.71%) |Training time=0.45s (20.73%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.38
+epoch: 0|step: 787|ppo_ep: 1|act_loss: 0.01407623291015625|cri_loss: 0.00878143310546875|unsuper_loss: 0.0
+average reward score: 5.70703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.45s (20.85%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.38
+epoch: 0|step: 788|ppo_ep: 1|act_loss: 0.0009098052978515625|cri_loss: 0.0007901191711425781|unsuper_loss: 0.0
+average reward score: 6.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.46s |Gather latency=0.00s (0.00%) |Generate time=1.62s (65.68%) |Training time=0.45s (18.26%) |Others=0.40 (16.06%)|CurSamplesPerSec=13.00 |AvgSamplesPerSec=14.38
+[2023-04-14 09:16:39,783] [INFO] [logging.py:96:log_dist] [Rank 0] step=790, skipped=11, lr=[9.486072554030733e-06, 9.486072554030733e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:16:39,801] [INFO] [timer.py:199:stop] epoch=0/micro_step=790/global_step=790, RunningAvgSamplesPerSec=107.30907836345817, CurrSamplesPerSec=113.32836398086337, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:16:39,895] [INFO] [logging.py:96:log_dist] [Rank 0] step=790, skipped=12, lr=[4.915312075301798e-06, 4.915312075301798e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 789|ppo_ep: 1|act_loss: 0.01332855224609375|cri_loss: 0.007904052734375|unsuper_loss: 0.0
+average reward score: 6.0234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.94%) |Training time=0.45s (20.51%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.38
+epoch: 0|step: 790|ppo_ep: 1|act_loss: 0.06280517578125|cri_loss: 0.036529541015625|unsuper_loss: 0.0
+average reward score: 5.82421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.14%) |Training time=0.44s (20.28%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.38
+epoch: 0|step: 791|ppo_ep: 1|act_loss: -0.03814697265625|cri_loss: -0.018585205078125|unsuper_loss: 0.0
+average reward score: 5.67578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.84s (78.81%) |Training time=0.40s (16.97%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.38
+epoch: 0|step: 792|ppo_ep: 1|act_loss: 0.05023193359375|cri_loss: 0.0278167724609375|unsuper_loss: 0.0
+average reward score: 4.94921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.94%) |Training time=0.44s (20.50%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.38
+epoch: 0|step: 793|ppo_ep: 1|act_loss: 0.00034809112548828125|cri_loss: 0.002155303955078125|unsuper_loss: 0.0
+average reward score: 5.09375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.10%) |Training time=0.44s (20.33%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.38
+epoch: 0|step: 794|ppo_ep: 1|act_loss: -0.011444091796875|cri_loss: -0.00421142578125|unsuper_loss: 0.0
+average reward score: 4.73046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.58s |Gather latency=0.00s (0.00%) |Generate time=1.62s (62.67%) |Training time=0.45s (17.42%) |Others=0.51 (19.91%)|CurSamplesPerSec=12.39 |AvgSamplesPerSec=14.38
+epoch: 0|step: 795|ppo_ep: 1|act_loss: 0.06146240234375|cri_loss: 0.0360107421875|unsuper_loss: 0.0
+average reward score: 4.29296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.81%) |Training time=0.45s (20.62%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.38
+epoch: 0|step: 796|ppo_ep: 1|act_loss: 0.07598876953125|cri_loss: 0.0418701171875|unsuper_loss: 0.0
+average reward score: 4.9453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.89%) |Training time=0.45s (20.56%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.38
+epoch: 0|step: 797|ppo_ep: 1|act_loss: -0.06427001953125|cri_loss: -0.0310516357421875|unsuper_loss: 0.0
+average reward score: 5.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.82%) |Training time=0.45s (20.62%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.38
+epoch: 0|step: 798|ppo_ep: 1|act_loss: 0.00328826904296875|cri_loss: 0.0026397705078125|unsuper_loss: 0.0
+average reward score: 5.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.72%) |Training time=0.45s (20.73%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.38
+[2023-04-14 09:17:02,019] [INFO] [logging.py:96:log_dist] [Rank 0] step=800, skipped=11, lr=[9.481237054164141e-06, 9.481237054164141e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:17:02,037] [INFO] [timer.py:199:stop] epoch=0/micro_step=800/global_step=800, RunningAvgSamplesPerSec=107.40037872515786, CurrSamplesPerSec=111.78615850382913, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:17:02,130] [INFO] [logging.py:96:log_dist] [Rank 0] step=800, skipped=12, lr=[4.912810213000723e-06, 4.912810213000723e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 799|ppo_ep: 1|act_loss: -0.02484130859375|cri_loss: -0.0116119384765625|unsuper_loss: 0.0
+average reward score: 5.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.69%) |Training time=0.45s (20.76%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.38
+epoch: 0|step: 800|ppo_ep: 1|act_loss: 0.037261962890625|cri_loss: 0.021484375|unsuper_loss: 0.0
+average reward score: 4.92578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.85%) |Training time=0.45s (20.59%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.38
+epoch: 0|step: 801|ppo_ep: 1|act_loss: -0.002933502197265625|cri_loss: -0.000164031982421875|unsuper_loss: 0.0
+average reward score: 5.44921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.76%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.38
+epoch: 0|step: 802|ppo_ep: 1|act_loss: 0.0721435546875|cri_loss: 0.041412353515625|unsuper_loss: 0.0
+average reward score: 5.71484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.59%) |Training time=0.45s (20.86%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.38
+epoch: 0|step: 803|ppo_ep: 1|act_loss: 0.01995849609375|cri_loss: 0.01092529296875|unsuper_loss: 0.0
+average reward score: 5.3515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.73%) |Training time=0.45s (20.72%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.38
+epoch: 0|step: 804|ppo_ep: 1|act_loss: -0.00691986083984375|cri_loss: -0.0017642974853515625|unsuper_loss: 0.0
+average reward score: 5.37890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.62%) |Training time=0.46s (20.87%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.38
+epoch: 0|step: 805|ppo_ep: 1|act_loss: -0.005359649658203125|cri_loss: -0.001361846923828125|unsuper_loss: 0.0
+average reward score: 5.765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.38s |Gather latency=0.00s (0.00%) |Generate time=1.78s (74.75%) |Training time=0.49s (20.79%) |Others=0.11 (4.46%)|CurSamplesPerSec=13.45 |AvgSamplesPerSec=14.38
+epoch: 0|step: 806|ppo_ep: 1|act_loss: 0.0159759521484375|cri_loss: 0.0096588134765625|unsuper_loss: 0.0
+average reward score: 5.68359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.42%) |Training time=0.42s (19.06%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.38
+epoch: 0|step: 807|ppo_ep: 1|act_loss: -0.0023345947265625|cri_loss: 0.000698089599609375|unsuper_loss: 0.0
+average reward score: 5.48046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.79%) |Training time=0.45s (20.67%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.38
+epoch: 0|step: 808|ppo_ep: 1|act_loss: 0.035400390625|cri_loss: 0.019683837890625|unsuper_loss: 0.0
+average reward score: 6.15625
+-------------------------------------------------------------------------------------
+|E2E latency=2.76s |Gather latency=0.00s (0.00%) |Generate time=1.62s (58.90%) |Training time=0.44s (16.06%) |Others=0.69 (25.05%)|CurSamplesPerSec=11.60 |AvgSamplesPerSec=14.38
+[2023-04-14 09:17:24,516] [INFO] [logging.py:96:log_dist] [Rank 0] step=810, skipped=11, lr=[9.476332537631846e-06, 9.476332537631846e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:17:24,534] [INFO] [timer.py:199:stop] epoch=0/micro_step=810/global_step=810, RunningAvgSamplesPerSec=107.47037359827968, CurrSamplesPerSec=114.3687949064038, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:17:24,627] [INFO] [logging.py:96:log_dist] [Rank 0] step=810, skipped=12, lr=[4.910272587031704e-06, 4.910272587031704e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 809|ppo_ep: 1|act_loss: 0.0015316009521484375|cri_loss: 0.0021305084228515625|unsuper_loss: 0.0
+average reward score: 5.58984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.01%) |Training time=0.44s (20.45%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.38
+epoch: 0|step: 810|ppo_ep: 1|act_loss: 0.000553131103515625|cri_loss: 0.0010042190551757812|unsuper_loss: 0.0
+average reward score: 5.8359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.02%) |Training time=0.44s (20.42%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.38
+epoch: 0|step: 811|ppo_ep: 1|act_loss: -0.03125|cri_loss: -0.01335906982421875|unsuper_loss: 0.0
+average reward score: 5.8359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.07%) |Training time=0.44s (20.36%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.38
+epoch: 0|step: 812|ppo_ep: 1|act_loss: -0.0394287109375|cri_loss: -0.018096923828125|unsuper_loss: 0.0
+average reward score: 5.6953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.23%) |Training time=0.44s (20.22%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.38
+epoch: 0|step: 813|ppo_ep: 1|act_loss: 0.00807952880859375|cri_loss: 0.004505157470703125|unsuper_loss: 0.0
+average reward score: 5.90234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.37%) |Training time=0.43s (20.07%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.38
+epoch: 0|step: 814|ppo_ep: 1|act_loss: 0.003143310546875|cri_loss: 0.0019350051879882812|unsuper_loss: 0.0
+average reward score: 5.6171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.41%) |Training time=0.44s (20.07%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.38
+epoch: 0|step: 815|ppo_ep: 1|act_loss: 0.00887298583984375|cri_loss: 0.00640106201171875|unsuper_loss: 0.0
+average reward score: 5.6328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.88%) |Training time=0.45s (20.60%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.38
+epoch: 0|step: 816|ppo_ep: 1|act_loss: -0.01953125|cri_loss: -0.009552001953125|unsuper_loss: 0.0
+average reward score: 5.63671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.18%) |Training time=0.44s (20.27%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.38
+epoch: 0|step: 817|ppo_ep: 1|act_loss: 0.0401611328125|cri_loss: 0.02154541015625|unsuper_loss: 0.0
+average reward score: 5.50390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.94%) |Training time=0.44s (20.51%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.38
+epoch: 0|step: 818|ppo_ep: 1|act_loss: 0.004489898681640625|cri_loss: 0.0026702880859375|unsuper_loss: 0.0
+average reward score: 5.6015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.90%) |Training time=0.45s (20.55%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.38
+[2023-04-14 09:17:46,179] [INFO] [logging.py:96:log_dist] [Rank 0] step=820, skipped=11, lr=[9.471359077130615e-06, 9.471359077130615e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:17:46,197] [INFO] [timer.py:199:stop] epoch=0/micro_step=820/global_step=820, RunningAvgSamplesPerSec=107.5542575745028, CurrSamplesPerSec=112.59810387019539, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:17:46,290] [INFO] [logging.py:96:log_dist] [Rank 0] step=820, skipped=12, lr=[4.907699235008478e-06, 4.907699235008478e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 819|ppo_ep: 1|act_loss: -0.003543853759765625|cri_loss: -0.0015096664428710938|unsuper_loss: 0.0
+average reward score: 5.57421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.89%) |Training time=0.45s (20.60%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.38
+epoch: 0|step: 820|ppo_ep: 1|act_loss: -0.02679443359375|cri_loss: -0.01294708251953125|unsuper_loss: 0.0
+average reward score: 5.2109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.05%) |Training time=0.44s (19.68%) |Others=0.16 (7.28%)|CurSamplesPerSec=14.30 |AvgSamplesPerSec=14.38
+epoch: 0|step: 821|ppo_ep: 1|act_loss: 0.0236663818359375|cri_loss: 0.012542724609375|unsuper_loss: 0.0
+average reward score: 5.87109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.72s (76.86%) |Training time=0.42s (18.71%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.28 |AvgSamplesPerSec=14.38
+epoch: 0|step: 822|ppo_ep: 1|act_loss: 0.0013227462768554688|cri_loss: 0.0012483596801757812|unsuper_loss: 0.0
+average reward score: 5.328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.81%) |Training time=0.45s (20.66%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.38
+epoch: 0|step: 823|ppo_ep: 1|act_loss: 0.02459716796875|cri_loss: 0.01287078857421875|unsuper_loss: 0.0
+average reward score: 5.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.78%) |Training time=0.45s (20.69%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.38
+epoch: 0|step: 824|ppo_ep: 1|act_loss: -0.045379638671875|cri_loss: -0.0219268798828125|unsuper_loss: 0.0
+average reward score: 6.2109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.80%) |Training time=0.45s (20.67%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.38
+epoch: 0|step: 825|ppo_ep: 1|act_loss: -0.005863189697265625|cri_loss: -0.00244140625|unsuper_loss: 0.0
+average reward score: 5.37109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.45s (20.87%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.38
+epoch: 0|step: 826|ppo_ep: 1|act_loss: -0.01385498046875|cri_loss: -0.006084442138671875|unsuper_loss: 0.0
+average reward score: 5.3203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.70%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39
+epoch: 0|step: 827|ppo_ep: 1|act_loss: -0.01480865478515625|cri_loss: -0.00696563720703125|unsuper_loss: 0.0
+average reward score: 5.6328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.63s (70.82%) |Training time=0.45s (19.77%) |Others=0.22 (9.42%)|CurSamplesPerSec=13.94 |AvgSamplesPerSec=14.38
+epoch: 0|step: 828|ppo_ep: 1|act_loss: 0.0229034423828125|cri_loss: 0.01232147216796875|unsuper_loss: 0.0
+average reward score: 6.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.72%) |Training time=0.45s (20.75%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.38
+[2023-04-14 09:18:08,145] [INFO] [logging.py:96:log_dist] [Rank 0] step=830, skipped=11, lr=[9.466316746379131e-06, 9.466316746379131e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:18:08,163] [INFO] [timer.py:199:stop] epoch=0/micro_step=830/global_step=830, RunningAvgSamplesPerSec=107.61573678676814, CurrSamplesPerSec=112.13863919103744, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:18:08,256] [INFO] [logging.py:96:log_dist] [Rank 0] step=830, skipped=12, lr=[4.905090195074332e-06, 4.905090195074332e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 829|ppo_ep: 1|act_loss: -0.01229095458984375|cri_loss: -0.0051116943359375|unsuper_loss: 0.0
+average reward score: 5.21875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.81%) |Training time=0.45s (20.67%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.39
+epoch: 0|step: 830|ppo_ep: 1|act_loss: 0.00736236572265625|cri_loss: 0.0042724609375|unsuper_loss: 0.0
+average reward score: 6.08203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.79%) |Training time=0.45s (20.68%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39
+epoch: 0|step: 831|ppo_ep: 1|act_loss: 0.0172576904296875|cri_loss: 0.01012420654296875|unsuper_loss: 0.0
+average reward score: 5.984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.88%) |Training time=0.45s (20.58%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39
+epoch: 0|step: 832|ppo_ep: 1|act_loss: -0.016693115234375|cri_loss: -0.00738525390625|unsuper_loss: 0.0
+average reward score: 5.65625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.80%) |Training time=0.45s (20.67%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39
+epoch: 0|step: 833|ppo_ep: 1|act_loss: 0.01226806640625|cri_loss: 0.006694793701171875|unsuper_loss: 0.0
+average reward score: 5.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.89%) |Training time=0.45s (20.47%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.39
+epoch: 0|step: 834|ppo_ep: 1|act_loss: 0.035400390625|cri_loss: 0.018402099609375|unsuper_loss: 0.0
+average reward score: 6.38671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.74s (76.01%) |Training time=0.45s (19.69%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.96 |AvgSamplesPerSec=14.39
+epoch: 0|step: 835|ppo_ep: 1|act_loss: -0.01434326171875|cri_loss: -0.006683349609375|unsuper_loss: 0.0
+average reward score: 6.0
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.63s (72.83%) |Training time=0.49s (21.85%) |Others=0.12 (5.32%)|CurSamplesPerSec=14.30 |AvgSamplesPerSec=14.39
+epoch: 0|step: 836|ppo_ep: 1|act_loss: -0.04925537109375|cri_loss: -0.023651123046875|unsuper_loss: 0.0
+average reward score: 5.76953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.86%) |Training time=0.43s (19.59%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.39
+epoch: 0|step: 837|ppo_ep: 1|act_loss: -0.054840087890625|cri_loss: -0.0267333984375|unsuper_loss: 0.0
+average reward score: 6.25390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.11%) |Training time=0.44s (20.33%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.39
+epoch: 0|step: 838|ppo_ep: 1|act_loss: -0.016571044921875|cri_loss: -0.0059051513671875|unsuper_loss: 0.0
+average reward score: 5.93359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.45s |Gather latency=0.00s (0.00%) |Generate time=1.62s (66.08%) |Training time=0.45s (18.42%) |Others=0.38 (15.50%)|CurSamplesPerSec=13.05 |AvgSamplesPerSec=14.39
+[2023-04-14 09:18:30,351] [INFO] [logging.py:96:log_dist] [Rank 0] step=840, skipped=11, lr=[9.461205620116899e-06, 9.461205620116899e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:18:30,370] [INFO] [timer.py:199:stop] epoch=0/micro_step=840/global_step=840, RunningAvgSamplesPerSec=107.66078239168438, CurrSamplesPerSec=111.81325820119396, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:18:30,463] [INFO] [logging.py:96:log_dist] [Rank 0] step=840, skipped=12, lr=[4.902445505901531e-06, 4.902445505901531e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 839|ppo_ep: 1|act_loss: -0.023773193359375|cri_loss: -0.0101318359375|unsuper_loss: 0.0
+average reward score: 6.68359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.75%) |Training time=0.45s (20.71%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.39
+epoch: 0|step: 840|ppo_ep: 1|act_loss: -0.01178741455078125|cri_loss: -0.005176544189453125|unsuper_loss: 0.0
+average reward score: 6.53515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.40%) |Training time=0.46s (21.07%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.39
+epoch: 0|step: 841|ppo_ep: 1|act_loss: 0.014923095703125|cri_loss: 0.00815582275390625|unsuper_loss: 0.0
+average reward score: 5.390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.68%) |Training time=0.45s (20.78%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39
+epoch: 0|step: 842|ppo_ep: 1|act_loss: -0.0009603500366210938|cri_loss: 0.0004177093505859375|unsuper_loss: 0.0
+average reward score: 5.6171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.80%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39
+epoch: 0|step: 843|ppo_ep: 1|act_loss: 0.0574951171875|cri_loss: 0.0333251953125|unsuper_loss: 0.0
+average reward score: 5.05078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.71%) |Training time=0.45s (20.76%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39
+epoch: 0|step: 844|ppo_ep: 1|act_loss: -0.001766204833984375|cri_loss: 0.002040863037109375|unsuper_loss: 0.0
+average reward score: 6.13671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.44%) |Training time=0.46s (21.03%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.39
+epoch: 0|step: 845|ppo_ep: 1|act_loss: -0.02178955078125|cri_loss: -0.00971221923828125|unsuper_loss: 0.0
+average reward score: 5.23046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.45s (20.87%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39
+epoch: 0|step: 846|ppo_ep: 1|act_loss: -0.05322265625|cri_loss: -0.024993896484375|unsuper_loss: 0.0
+average reward score: 5.63671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.01%) |Training time=0.44s (20.46%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.39
+epoch: 0|step: 847|ppo_ep: 1|act_loss: -0.031646728515625|cri_loss: -0.01514434814453125|unsuper_loss: 0.0
+average reward score: 5.7109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.40%) |Training time=0.46s (21.09%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.39
+epoch: 0|step: 848|ppo_ep: 1|act_loss: -0.044219970703125|cri_loss: -0.0208587646484375|unsuper_loss: 0.0
+average reward score: 5.25390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.47%) |Training time=0.46s (21.00%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39
+[2023-04-14 09:18:52,073] [INFO] [logging.py:96:log_dist] [Rank 0] step=850, skipped=11, lr=[9.456025774103137e-06, 9.456025774103137e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:18:52,091] [INFO] [timer.py:199:stop] epoch=0/micro_step=850/global_step=850, RunningAvgSamplesPerSec=107.68979385696973, CurrSamplesPerSec=109.89481785825113, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:18:52,184] [INFO] [logging.py:96:log_dist] [Rank 0] step=850, skipped=12, lr=[4.899765206690747e-06, 4.899765206690747e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 849|ppo_ep: 1|act_loss: 0.01556396484375|cri_loss: 0.0083770751953125|unsuper_loss: 0.0
+average reward score: 5.890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.55%) |Training time=0.45s (20.91%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39
+epoch: 0|step: 850|ppo_ep: 1|act_loss: 0.027984619140625|cri_loss: 0.01593017578125|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.79%) |Training time=0.45s (20.67%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.39
+epoch: 0|step: 851|ppo_ep: 1|act_loss: -0.01447296142578125|cri_loss: -0.00623321533203125|unsuper_loss: 0.0
+average reward score: 5.75390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.65s (73.45%) |Training time=0.43s (18.96%) |Others=0.17 (7.60%)|CurSamplesPerSec=14.24 |AvgSamplesPerSec=14.39
+epoch: 0|step: 852|ppo_ep: 1|act_loss: -0.02020263671875|cri_loss: -0.00873565673828125|unsuper_loss: 0.0
+average reward score: 5.43359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.95%) |Training time=0.44s (20.19%) |Others=0.11 (4.85%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.39
+epoch: 0|step: 853|ppo_ep: 1|act_loss: -0.04766845703125|cri_loss: -0.0230560302734375|unsuper_loss: 0.0
+average reward score: 5.13671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.05%) |Training time=0.44s (20.41%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.39
+epoch: 0|step: 854|ppo_ep: 1|act_loss: -0.03314208984375|cri_loss: -0.0153350830078125|unsuper_loss: 0.0
+average reward score: 5.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.40s |Gather latency=0.00s (0.00%) |Generate time=1.62s (67.54%) |Training time=0.45s (18.83%) |Others=0.33 (13.63%)|CurSamplesPerSec=13.32 |AvgSamplesPerSec=14.39
+epoch: 0|step: 855|ppo_ep: 1|act_loss: 0.0093231201171875|cri_loss: 0.00567626953125|unsuper_loss: 0.0
+average reward score: 5.6171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.58%) |Training time=0.45s (20.90%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39
+epoch: 0|step: 856|ppo_ep: 1|act_loss: -0.0193023681640625|cri_loss: -0.00839996337890625|unsuper_loss: 0.0
+average reward score: 5.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.54%) |Training time=0.45s (20.93%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39
+epoch: 0|step: 857|ppo_ep: 1|act_loss: 0.003940582275390625|cri_loss: 0.003002166748046875|unsuper_loss: 0.0
+average reward score: 5.39453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.66%) |Training time=0.45s (20.80%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.39
+epoch: 0|step: 858|ppo_ep: 1|act_loss: 0.01480865478515625|cri_loss: 0.00885009765625|unsuper_loss: 0.0
+average reward score: 5.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.79%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.39
+[2023-04-14 09:19:14,107] [INFO] [logging.py:96:log_dist] [Rank 0] step=860, skipped=11, lr=[9.450777285115664e-06, 9.450777285115664e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:19:14,125] [INFO] [timer.py:199:stop] epoch=0/micro_step=860/global_step=860, RunningAvgSamplesPerSec=107.74103031128162, CurrSamplesPerSec=110.24423676217167, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:19:14,218] [INFO] [logging.py:96:log_dist] [Rank 0] step=860, skipped=12, lr=[4.897049337170483e-06, 4.897049337170483e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 859|ppo_ep: 1|act_loss: 0.00782012939453125|cri_loss: 0.005535125732421875|unsuper_loss: 0.0
+average reward score: 5.484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.63%) |Training time=0.45s (20.86%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39
+epoch: 0|step: 860|ppo_ep: 1|act_loss: 0.06195068359375|cri_loss: 0.034088134765625|unsuper_loss: 0.0
+average reward score: 5.1875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.87%) |Training time=0.45s (20.58%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39
+epoch: 0|step: 861|ppo_ep: 1|act_loss: -0.027069091796875|cri_loss: -0.012939453125|unsuper_loss: 0.0
+average reward score: 5.078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.97%) |Training time=0.45s (20.53%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.39
+epoch: 0|step: 862|ppo_ep: 1|act_loss: -0.0242156982421875|cri_loss: -0.010772705078125|unsuper_loss: 0.0
+average reward score: 5.69921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.53%) |Training time=0.46s (20.97%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.39
+epoch: 0|step: 863|ppo_ep: 1|act_loss: -0.0100555419921875|cri_loss: -0.00096893310546875|unsuper_loss: 0.0
+average reward score: 5.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.65s (73.54%) |Training time=0.49s (22.04%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.30 |AvgSamplesPerSec=14.39
+epoch: 0|step: 864|ppo_ep: 1|act_loss: 0.04193115234375|cri_loss: 0.028228759765625|unsuper_loss: 0.0
+average reward score: 5.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.51%) |Training time=0.45s (20.95%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39
+epoch: 0|step: 865|ppo_ep: 1|act_loss: 0.020294189453125|cri_loss: 0.01458740234375|unsuper_loss: 0.0
+average reward score: 5.43359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.66%) |Training time=0.45s (20.81%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.39
+epoch: 0|step: 866|ppo_ep: 1|act_loss: -0.03167724609375|cri_loss: -0.014373779296875|unsuper_loss: 0.0
+average reward score: 5.01953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.82s (77.79%) |Training time=0.42s (17.89%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.39
+epoch: 0|step: 867|ppo_ep: 1|act_loss: 0.0034465789794921875|cri_loss: 0.00566864013671875|unsuper_loss: 0.0
+average reward score: 5.29296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.94%) |Training time=0.45s (20.51%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.39
+epoch: 0|step: 868|ppo_ep: 1|act_loss: 0.0083160400390625|cri_loss: 0.008819580078125|unsuper_loss: 0.0
+average reward score: 5.12890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.58%) |Training time=0.45s (20.89%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39
+[2023-04-14 09:19:36,087] [INFO] [logging.py:96:log_dist] [Rank 0] step=870, skipped=11, lr=[9.445460230949745e-06, 9.445460230949745e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:19:36,491] [INFO] [timer.py:199:stop] epoch=0/micro_step=870/global_step=870, RunningAvgSamplesPerSec=107.61950179826276, CurrSamplesPerSec=47.67854553463432, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:19:36,584] [INFO] [logging.py:96:log_dist] [Rank 0] step=870, skipped=12, lr=[4.894297937596475e-06, 4.894297937596475e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 869|ppo_ep: 1|act_loss: -0.000762939453125|cri_loss: 0.0043182373046875|unsuper_loss: 0.0
+average reward score: 5.33984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.55s |Gather latency=0.00s (0.00%) |Generate time=1.62s (63.49%) |Training time=0.83s (32.66%) |Others=0.10 (3.85%)|CurSamplesPerSec=12.53 |AvgSamplesPerSec=14.39
+epoch: 0|step: 870|ppo_ep: 1|act_loss: 0.06591796875|cri_loss: 0.047149658203125|unsuper_loss: 0.0
+average reward score: 5.08984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.79%) |Training time=0.45s (20.68%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39
+epoch: 0|step: 871|ppo_ep: 1|act_loss: -0.0975341796875|cri_loss: -0.04486083984375|unsuper_loss: 0.0
+average reward score: 5.6953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.50%) |Training time=0.46s (20.98%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.39
+epoch: 0|step: 872|ppo_ep: 1|act_loss: 0.01934814453125|cri_loss: 0.0106658935546875|unsuper_loss: 0.0
+average reward score: 5.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.51%) |Training time=0.46s (20.95%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39
+epoch: 0|step: 873|ppo_ep: 1|act_loss: 0.0455322265625|cri_loss: 0.02703857421875|unsuper_loss: 0.0
+average reward score: 5.56640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.73%) |Training time=0.45s (20.76%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.39
+epoch: 0|step: 874|ppo_ep: 1|act_loss: 0.01427459716796875|cri_loss: 0.00791168212890625|unsuper_loss: 0.0
+average reward score: 5.6015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.98%) |Training time=0.45s (20.49%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.39
+epoch: 0|step: 875|ppo_ep: 1|act_loss: -0.04949951171875|cri_loss: -0.0236358642578125|unsuper_loss: 0.0
+average reward score: 5.3671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.65%) |Training time=0.45s (20.81%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.39
+epoch: 0|step: 876|ppo_ep: 1|act_loss: -0.004947662353515625|cri_loss: -0.00028228759765625|unsuper_loss: 0.0
+average reward score: 5.3359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.73%) |Training time=0.45s (20.76%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.39
+epoch: 0|step: 877|ppo_ep: 1|act_loss: 0.0782470703125|cri_loss: 0.044403076171875|unsuper_loss: 0.0
+average reward score: 5.5234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.86%) |Training time=0.45s (20.61%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.39
+epoch: 0|step: 878|ppo_ep: 1|act_loss: 0.06304931640625|cri_loss: 0.037445068359375|unsuper_loss: 0.0
+average reward score: 5.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.64%) |Training time=0.45s (20.83%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.39
+[2023-04-14 09:19:58,214] [INFO] [logging.py:96:log_dist] [Rank 0] step=880, skipped=11, lr=[9.440074690416949e-06, 9.440074690416949e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:19:58,232] [INFO] [timer.py:199:stop] epoch=0/micro_step=880/global_step=880, RunningAvgSamplesPerSec=107.65370035299243, CurrSamplesPerSec=109.40803398863513, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:19:58,325] [INFO] [logging.py:96:log_dist] [Rank 0] step=880, skipped=12, lr=[4.891511048751102e-06, 4.891511048751102e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 879|ppo_ep: 1|act_loss: -0.0906982421875|cri_loss: -0.03826904296875|unsuper_loss: 0.0
+average reward score: 5.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.55%) |Training time=0.46s (20.94%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.39
+[2023-04-14 09:20:00,493] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 880|ppo_ep: 1|act_loss: 0.0443115234375|cri_loss: 0.02581787109375|unsuper_loss: 0.0
+average reward score: 5.79296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.84%) |Training time=0.46s (21.08%) |Others=0.09 (4.09%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.39
+[2023-04-14 09:20:02,656] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 881|ppo_ep: 1|act_loss: -0.0081787109375|cri_loss: -5.340576171875e-05|unsuper_loss: 0.0
+average reward score: 5.12890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.09%) |Training time=0.43s (19.84%) |Others=0.09 (4.07%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.39
+epoch: 0|step: 882|ppo_ep: 1|act_loss: -0.0518798828125|cri_loss: -0.01336669921875|unsuper_loss: 0.0
+average reward score: 5.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.22%) |Training time=0.43s (18.55%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.39
+epoch: 0|step: 883|ppo_ep: 1|act_loss: -0.050872802734375|cri_loss: -0.01409912109375|unsuper_loss: 0.0
+average reward score: 5.2734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.36%) |Training time=0.46s (21.03%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.39
+epoch: 0|step: 884|ppo_ep: 1|act_loss: 0.0120086669921875|cri_loss: 0.01505279541015625|unsuper_loss: 0.0
+average reward score: 5.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.54%) |Training time=0.45s (20.91%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.39
+epoch: 0|step: 885|ppo_ep: 1|act_loss: 0.0875244140625|cri_loss: 0.05120849609375|unsuper_loss: 0.0
+average reward score: 4.94140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.13%) |Training time=0.44s (20.34%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.40
+[2023-04-14 09:20:13,670] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
+epoch: 0|step: 886|ppo_ep: 1|act_loss: -0.1021728515625|cri_loss: -0.035919189453125|unsuper_loss: 0.0
+average reward score: 5.3671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.50%) |Training time=0.44s (20.34%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.40
+epoch: 0|step: 887|ppo_ep: 1|act_loss: -0.0577392578125|cri_loss: -0.0184173583984375|unsuper_loss: 0.0
+average reward score: 5.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.99%) |Training time=0.45s (20.47%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.40
+epoch: 0|step: 888|ppo_ep: 1|act_loss: -0.0115966796875|cri_loss: 0.014251708984375|unsuper_loss: 0.0
+average reward score: 5.46484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.65s (71.35%) |Training time=0.44s (18.94%) |Others=0.22 (9.70%)|CurSamplesPerSec=13.82 |AvgSamplesPerSec=14.40
+[2023-04-14 09:20:20,226] [INFO] [logging.py:96:log_dist] [Rank 0] step=890, skipped=11, lr=[9.43462074334398e-06, 9.43462074334398e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:20:20,244] [INFO] [timer.py:199:stop] epoch=0/micro_step=890/global_step=890, RunningAvgSamplesPerSec=107.7239576261761, CurrSamplesPerSec=116.03634172425211, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:20:20,337] [INFO] [logging.py:96:log_dist] [Rank 0] step=890, skipped=15, lr=[4.889539132542428e-06, 4.889539132542428e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 889|ppo_ep: 1|act_loss: 0.06048583984375|cri_loss: 0.04217529296875|unsuper_loss: 0.0
+average reward score: 5.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.33%) |Training time=0.44s (20.16%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.40
+[2023-04-14 09:20:22,384] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 890|ppo_ep: 1|act_loss: -0.027069091796875|cri_loss: 0.01715087890625|unsuper_loss: 0.0
+average reward score: 6.2734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.25%) |Training time=0.41s (19.15%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.40
+epoch: 0|step: 891|ppo_ep: 1|act_loss: -0.1314697265625|cri_loss: -0.045501708984375|unsuper_loss: 0.0
+average reward score: 5.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.14%) |Training time=0.44s (20.33%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.40
+epoch: 0|step: 892|ppo_ep: 1|act_loss: -0.0782470703125|cri_loss: -0.028076171875|unsuper_loss: 0.0
+average reward score: 5.4453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.67s (74.39%) |Training time=0.44s (19.66%) |Others=0.13 (5.96%)|CurSamplesPerSec=14.28 |AvgSamplesPerSec=14.40
+epoch: 0|step: 893|ppo_ep: 1|act_loss: -0.04217529296875|cri_loss: 0.0130615234375|unsuper_loss: 0.0
+average reward score: 5.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.70s (76.07%) |Training time=0.44s (19.51%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.32 |AvgSamplesPerSec=14.40
+epoch: 0|step: 894|ppo_ep: 1|act_loss: 0.11956787109375|cri_loss: 0.0692138671875|unsuper_loss: 0.0
+average reward score: 5.44140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.30%) |Training time=0.44s (20.15%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.40
+epoch: 0|step: 895|ppo_ep: 1|act_loss: 0.0298919677734375|cri_loss: 0.025909423828125|unsuper_loss: 0.0
+average reward score: 5.58984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.14%) |Training time=0.44s (20.33%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.40
+epoch: 0|step: 896|ppo_ep: 1|act_loss: 0.169189453125|cri_loss: 0.094482421875|unsuper_loss: 0.0
+average reward score: 5.49609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.81s (77.34%) |Training time=0.43s (18.45%) |Others=0.10 (4.21%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.40
+[2023-04-14 09:20:37,862] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096
+epoch: 0|step: 897|ppo_ep: 1|act_loss: 0.0097503662109375|cri_loss: 0.008026123046875|unsuper_loss: 0.0
+average reward score: 5.234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.43%) |Training time=0.41s (19.00%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.40
+epoch: 0|step: 898|ppo_ep: 1|act_loss: -0.071533203125|cri_loss: -0.0288238525390625|unsuper_loss: 0.0
+average reward score: 5.23828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.79%) |Training time=0.43s (19.67%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.40
+[2023-04-14 09:20:42,190] [INFO] [logging.py:96:log_dist] [Rank 0] step=900, skipped=13, lr=[9.43020838726756e-06, 9.43020838726756e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:20:42,208] [INFO] [timer.py:199:stop] epoch=0/micro_step=900/global_step=900, RunningAvgSamplesPerSec=107.83954100227736, CurrSamplesPerSec=118.38544986262222, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:20:42,302] [INFO] [logging.py:96:log_dist] [Rank 0] step=900, skipped=15, lr=[4.886692007019939e-06, 4.886692007019939e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 899|ppo_ep: 1|act_loss: -0.032318115234375|cri_loss: -0.01042938232421875|unsuper_loss: 0.0
+average reward score: 5.43359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.51%) |Training time=0.43s (19.90%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.40
+epoch: 0|step: 900|ppo_ep: 1|act_loss: -0.1773681640625|cri_loss: -0.0633544921875|unsuper_loss: 0.0
+average reward score: 5.58984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.36%) |Training time=0.44s (20.11%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.40
+epoch: 0|step: 901|ppo_ep: 1|act_loss: -0.02392578125|cri_loss: 0.00042724609375|unsuper_loss: 0.0
+average reward score: 5.8125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.78%) |Training time=0.43s (19.68%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.40
+epoch: 0|step: 902|ppo_ep: 1|act_loss: 0.1036376953125|cri_loss: 0.0618896484375|unsuper_loss: 0.0
+average reward score: 5.5234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.45%) |Training time=0.43s (20.00%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.40
+epoch: 0|step: 903|ppo_ep: 1|act_loss: 0.25048828125|cri_loss: 0.17138671875|unsuper_loss: 0.0
+average reward score: 5.203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.30%) |Training time=0.44s (20.15%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.40
+epoch: 0|step: 904|ppo_ep: 1|act_loss: 0.1358642578125|cri_loss: 0.0823974609375|unsuper_loss: 0.0
+average reward score: 5.6796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.49%) |Training time=0.43s (19.97%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.40
+epoch: 0|step: 905|ppo_ep: 1|act_loss: -0.014801025390625|cri_loss: 0.017059326171875|unsuper_loss: 0.0
+average reward score: 6.02734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.28%) |Training time=0.44s (20.18%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.40
+epoch: 0|step: 906|ppo_ep: 1|act_loss: -0.05535888671875|cri_loss: -0.0138092041015625|unsuper_loss: 0.0
+average reward score: 5.35546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.49%) |Training time=0.43s (19.97%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.40
+epoch: 0|step: 907|ppo_ep: 1|act_loss: -0.06268310546875|cri_loss: -0.0184478759765625|unsuper_loss: 0.0
+average reward score: 6.30859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.26%) |Training time=0.44s (20.21%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.40
+epoch: 0|step: 908|ppo_ep: 1|act_loss: 0.07330322265625|cri_loss: 0.0513916015625|unsuper_loss: 0.0
+average reward score: 5.71875
+-------------------------------------------------------------------------------------
+|E2E latency=2.40s |Gather latency=0.00s (0.00%) |Generate time=1.64s (68.21%) |Training time=0.44s (18.19%) |Others=0.33 (13.60%)|CurSamplesPerSec=13.34 |AvgSamplesPerSec=14.40
+[2023-04-14 09:21:04,148] [INFO] [logging.py:96:log_dist] [Rank 0] step=910, skipped=13, lr=[9.424631512821333e-06, 9.424631512821333e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:21:04,166] [INFO] [timer.py:199:stop] epoch=0/micro_step=910/global_step=910, RunningAvgSamplesPerSec=107.93576925943141, CurrSamplesPerSec=117.37038319604036, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:21:04,259] [INFO] [logging.py:96:log_dist] [Rank 0] step=910, skipped=15, lr=[4.883809504964325e-06, 4.883809504964325e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 909|ppo_ep: 1|act_loss: -0.1029052734375|cri_loss: -0.020233154296875|unsuper_loss: 0.0
+average reward score: 5.65234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.45%) |Training time=0.44s (20.03%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.40
+epoch: 0|step: 910|ppo_ep: 1|act_loss: 0.165283203125|cri_loss: 0.11480712890625|unsuper_loss: 0.0
+average reward score: 5.48046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.15%) |Training time=0.44s (20.32%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.40
+epoch: 0|step: 911|ppo_ep: 1|act_loss: 0.1444091796875|cri_loss: 0.0936279296875|unsuper_loss: 0.0
+average reward score: 5.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.82s (77.78%) |Training time=0.42s (17.99%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.65 |AvgSamplesPerSec=14.40
+epoch: 0|step: 912|ppo_ep: 1|act_loss: 0.0100555419921875|cri_loss: 0.03411865234375|unsuper_loss: 0.0
+average reward score: 5.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.38%) |Training time=0.44s (20.09%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.40
+epoch: 0|step: 913|ppo_ep: 1|act_loss: 0.0283355712890625|cri_loss: 0.022918701171875|unsuper_loss: 0.0
+average reward score: 5.59375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.51%) |Training time=0.46s (20.97%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.40
+[2023-04-14 09:21:15,175] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 4096, reducing to 2048
+epoch: 0|step: 914|ppo_ep: 1|act_loss: -0.191162109375|cri_loss: -0.0084228515625|unsuper_loss: 0.0
+average reward score: 5.13671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.52%) |Training time=0.43s (19.88%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.40
+[2023-04-14 09:21:17,321] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 2048, reducing to 1024
+epoch: 0|step: 915|ppo_ep: 1|act_loss: 0.1402587890625|cri_loss: 0.149658203125|unsuper_loss: 0.0
+average reward score: 5.3359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.54%) |Training time=0.43s (19.88%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.40
+epoch: 0|step: 916|ppo_ep: 1|act_loss: -0.060089111328125|cri_loss: -0.01513671875|unsuper_loss: 0.0
+average reward score: 4.9765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.59%) |Training time=0.46s (20.92%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.40
+epoch: 0|step: 917|ppo_ep: 1|act_loss: 0.202880859375|cri_loss: 0.1185302734375|unsuper_loss: 0.0
+average reward score: 5.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.68%) |Training time=0.45s (20.78%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.40
+epoch: 0|step: 918|ppo_ep: 1|act_loss: 0.038116455078125|cri_loss: 0.027862548828125|unsuper_loss: 0.0
+average reward score: 5.015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.59%) |Training time=0.45s (20.88%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.40
+[2023-04-14 09:21:26,014] [INFO] [logging.py:96:log_dist] [Rank 0] step=920, skipped=15, lr=[9.420120921365356e-06, 9.420120921365356e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:21:26,032] [INFO] [timer.py:199:stop] epoch=0/micro_step=920/global_step=920, RunningAvgSamplesPerSec=108.00241603015057, CurrSamplesPerSec=111.10003311039004, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:21:26,125] [INFO] [logging.py:96:log_dist] [Rank 0] step=920, skipped=15, lr=[4.880891669101221e-06, 4.880891669101221e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 919|ppo_ep: 1|act_loss: -0.0063018798828125|cri_loss: 0.0113983154296875|unsuper_loss: 0.0
+average reward score: 5.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.71%) |Training time=0.45s (20.75%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.40
+epoch: 0|step: 920|ppo_ep: 1|act_loss: 0.07421875|cri_loss: 0.04022216796875|unsuper_loss: 0.0
+average reward score: 5.41015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.53%) |Training time=0.46s (20.94%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.40
+epoch: 0|step: 921|ppo_ep: 1|act_loss: 0.251953125|cri_loss: 0.1390380859375|unsuper_loss: 0.0
+average reward score: 4.82421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.66s (74.98%) |Training time=0.45s (20.53%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.40
+epoch: 0|step: 922|ppo_ep: 1|act_loss: 0.0997314453125|cri_loss: 0.060546875|unsuper_loss: 0.0
+average reward score: 5.390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.73s (75.79%) |Training time=0.45s (19.83%) |Others=0.10 (4.38%)|CurSamplesPerSec=14.05 |AvgSamplesPerSec=14.40
+epoch: 0|step: 923|ppo_ep: 1|act_loss: 0.0804443359375|cri_loss: 0.051025390625|unsuper_loss: 0.0
+average reward score: 5.59765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.73s |Gather latency=0.00s (0.00%) |Generate time=1.63s (59.51%) |Training time=0.45s (16.48%) |Others=0.66 (24.01%)|CurSamplesPerSec=11.71 |AvgSamplesPerSec=14.40
+epoch: 0|step: 924|ppo_ep: 1|act_loss: -0.0679931640625|cri_loss: -0.022064208984375|unsuper_loss: 0.0
+average reward score: 5.72265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.62%) |Training time=0.45s (20.85%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.40
+epoch: 0|step: 925|ppo_ep: 1|act_loss: -0.064208984375|cri_loss: -0.0222625732421875|unsuper_loss: 0.0
+average reward score: 5.40625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.48%) |Training time=0.46s (20.98%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.40
+epoch: 0|step: 926|ppo_ep: 1|act_loss: -0.00994873046875|cri_loss: 0.00093841552734375|unsuper_loss: 0.0
+average reward score: 6.06640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.81s (76.77%) |Training time=0.45s (19.00%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.61 |AvgSamplesPerSec=14.40
+epoch: 0|step: 927|ppo_ep: 1|act_loss: -0.01502227783203125|cri_loss: -0.00019073486328125|unsuper_loss: 0.0
+average reward score: 5.6796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.46%) |Training time=0.43s (19.95%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.40
+epoch: 0|step: 928|ppo_ep: 1|act_loss: -0.0238800048828125|cri_loss: -0.0068359375|unsuper_loss: 0.0
+average reward score: 5.67578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.31%) |Training time=0.43s (20.13%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.40
+[2023-04-14 09:21:48,591] [INFO] [logging.py:96:log_dist] [Rank 0] step=930, skipped=15, lr=[9.414421387372385e-06, 9.414421387372385e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:21:48,609] [INFO] [timer.py:199:stop] epoch=0/micro_step=930/global_step=930, RunningAvgSamplesPerSec=108.04897948065681, CurrSamplesPerSec=117.29724807363407, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:21:48,702] [INFO] [logging.py:96:log_dist] [Rank 0] step=930, skipped=15, lr=[4.877938542679992e-06, 4.877938542679992e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 929|ppo_ep: 1|act_loss: -0.038818359375|cri_loss: -0.0157012939453125|unsuper_loss: 0.0
+average reward score: 5.97265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.26%) |Training time=0.44s (20.20%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.40
+epoch: 0|step: 930|ppo_ep: 1|act_loss: 0.05596923828125|cri_loss: 0.030120849609375|unsuper_loss: 0.0
+average reward score: 5.47265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.93%) |Training time=0.44s (20.53%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.40
+epoch: 0|step: 931|ppo_ep: 1|act_loss: 0.02618408203125|cri_loss: 0.01433563232421875|unsuper_loss: 0.0
+average reward score: 5.89453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.10%) |Training time=0.44s (20.36%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.40
+epoch: 0|step: 932|ppo_ep: 1|act_loss: 0.002109527587890625|cri_loss: 0.001583099365234375|unsuper_loss: 0.0
+average reward score: 5.8515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.03%) |Training time=0.44s (20.42%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.40
+epoch: 0|step: 933|ppo_ep: 1|act_loss: 0.03338623046875|cri_loss: 0.0186920166015625|unsuper_loss: 0.0
+average reward score: 5.828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.77%) |Training time=0.45s (20.69%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.40
+epoch: 0|step: 934|ppo_ep: 1|act_loss: 0.0928955078125|cri_loss: 0.05352783203125|unsuper_loss: 0.0
+average reward score: 5.23828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.92%) |Training time=0.45s (20.54%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.40
+epoch: 0|step: 935|ppo_ep: 1|act_loss: 0.0657958984375|cri_loss: 0.034393310546875|unsuper_loss: 0.0
+average reward score: 4.85546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.11%) |Training time=0.44s (20.34%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.40
+epoch: 0|step: 936|ppo_ep: 1|act_loss: 0.0772705078125|cri_loss: 0.042449951171875|unsuper_loss: 0.0
+average reward score: 5.3515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.01%) |Training time=0.44s (20.45%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.40
+epoch: 0|step: 937|ppo_ep: 1|act_loss: -0.01788330078125|cri_loss: -0.008026123046875|unsuper_loss: 0.0
+average reward score: 5.3515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.80%) |Training time=0.45s (20.65%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.40
+epoch: 0|step: 938|ppo_ep: 1|act_loss: -0.053955078125|cri_loss: -0.024993896484375|unsuper_loss: 0.0
+average reward score: 5.55859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.99%) |Training time=0.44s (20.46%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.40
+[2023-04-14 09:22:10,262] [INFO] [logging.py:96:log_dist] [Rank 0] step=940, skipped=15, lr=[9.408653827083077e-06, 9.408653827083077e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:22:10,280] [INFO] [timer.py:199:stop] epoch=0/micro_step=940/global_step=940, RunningAvgSamplesPerSec=108.10580988208648, CurrSamplesPerSec=113.72648917961666, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:22:10,374] [INFO] [logging.py:96:log_dist] [Rank 0] step=940, skipped=15, lr=[4.874950169473097e-06, 4.874950169473097e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 939|ppo_ep: 1|act_loss: -0.029998779296875|cri_loss: -0.0115203857421875|unsuper_loss: 0.0
+average reward score: 5.453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.93%) |Training time=0.44s (20.46%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.40
+epoch: 0|step: 940|ppo_ep: 1|act_loss: -0.0288848876953125|cri_loss: -0.0118560791015625|unsuper_loss: 0.0
+average reward score: 5.890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.74%) |Training time=0.45s (20.72%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.40
+epoch: 0|step: 941|ppo_ep: 1|act_loss: -0.04986572265625|cri_loss: -0.02099609375|unsuper_loss: 0.0
+average reward score: 5.5234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.98%) |Training time=0.44s (18.78%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.40
+epoch: 0|step: 942|ppo_ep: 1|act_loss: -0.00787353515625|cri_loss: -0.002758026123046875|unsuper_loss: 0.0
+average reward score: 5.05859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.30%) |Training time=0.44s (20.16%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.40
+epoch: 0|step: 943|ppo_ep: 1|act_loss: -0.0435791015625|cri_loss: -0.0201873779296875|unsuper_loss: 0.0
+average reward score: 6.0078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.14%) |Training time=0.44s (20.34%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.40
+epoch: 0|step: 944|ppo_ep: 1|act_loss: 0.05010986328125|cri_loss: 0.0257720947265625|unsuper_loss: 0.0
+average reward score: 4.5703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.45s |Gather latency=0.00s (0.00%) |Generate time=1.62s (66.19%) |Training time=0.44s (18.12%) |Others=0.38 (15.68%)|CurSamplesPerSec=13.04 |AvgSamplesPerSec=14.40
+epoch: 0|step: 945|ppo_ep: 1|act_loss: 0.0153350830078125|cri_loss: 0.00891876220703125|unsuper_loss: 0.0
+average reward score: 4.94140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.84%) |Training time=0.45s (20.62%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.40
+epoch: 0|step: 946|ppo_ep: 1|act_loss: 0.058349609375|cri_loss: 0.033203125|unsuper_loss: 0.0
+average reward score: 4.88671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.88%) |Training time=0.45s (20.58%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.40
+epoch: 0|step: 947|ppo_ep: 1|act_loss: 0.108642578125|cri_loss: 0.0631103515625|unsuper_loss: 0.0
+average reward score: 4.83203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.35%) |Training time=0.43s (20.08%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.40
+epoch: 0|step: 948|ppo_ep: 1|act_loss: 0.010772705078125|cri_loss: 0.00787353515625|unsuper_loss: 0.0
+average reward score: 4.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.70%) |Training time=0.43s (19.76%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.40
+[2023-04-14 09:22:32,403] [INFO] [logging.py:96:log_dist] [Rank 0] step=950, skipped=15, lr=[9.402818325986586e-06, 9.402818325986586e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:22:32,421] [INFO] [timer.py:199:stop] epoch=0/micro_step=950/global_step=950, RunningAvgSamplesPerSec=108.17638441637497, CurrSamplesPerSec=115.0792441441233, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:22:32,514] [INFO] [logging.py:96:log_dist] [Rank 0] step=950, skipped=15, lr=[4.871926593775433e-06, 4.871926593775433e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 949|ppo_ep: 1|act_loss: -0.05145263671875|cri_loss: -0.0225830078125|unsuper_loss: 0.0
+average reward score: 4.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.19%) |Training time=0.44s (20.27%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.40
+epoch: 0|step: 950|ppo_ep: 1|act_loss: 0.0074462890625|cri_loss: 0.004741668701171875|unsuper_loss: 0.0
+average reward score: 4.765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.64s (70.07%) |Training time=0.46s (19.71%) |Others=0.24 (10.22%)|CurSamplesPerSec=13.65 |AvgSamplesPerSec=14.40
+epoch: 0|step: 951|ppo_ep: 1|act_loss: -0.062255859375|cri_loss: -0.0283966064453125|unsuper_loss: 0.0
+average reward score: 4.30859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.73s (76.23%) |Training time=0.44s (19.42%) |Others=0.10 (4.35%)|CurSamplesPerSec=14.10 |AvgSamplesPerSec=14.40
+epoch: 0|step: 952|ppo_ep: 1|act_loss: -0.0128021240234375|cri_loss: -0.003631591796875|unsuper_loss: 0.0
+average reward score: 4.62890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.73%) |Training time=0.45s (20.74%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.40
+epoch: 0|step: 953|ppo_ep: 1|act_loss: -0.0440673828125|cri_loss: -0.0195465087890625|unsuper_loss: 0.0
+average reward score: 4.96484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.51%) |Training time=0.46s (20.96%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.40
+epoch: 0|step: 954|ppo_ep: 1|act_loss: -0.02337646484375|cri_loss: -0.0098724365234375|unsuper_loss: 0.0
+average reward score: 4.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.07%) |Training time=0.44s (20.36%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.40
+epoch: 0|step: 955|ppo_ep: 1|act_loss: 0.00646209716796875|cri_loss: 0.005374908447265625|unsuper_loss: 0.0
+average reward score: 4.6484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.64s (72.41%) |Training time=0.44s (19.38%) |Others=0.19 (8.21%)|CurSamplesPerSec=14.15 |AvgSamplesPerSec=14.40
+epoch: 0|step: 956|ppo_ep: 1|act_loss: -0.00202178955078125|cri_loss: 0.002307891845703125|unsuper_loss: 0.0
+average reward score: 4.7109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.90%) |Training time=0.43s (19.56%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.40
+epoch: 0|step: 957|ppo_ep: 1|act_loss: 0.0703125|cri_loss: 0.038421630859375|unsuper_loss: 0.0
+average reward score: 4.63671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.01%) |Training time=0.42s (19.45%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.40
+epoch: 0|step: 958|ppo_ep: 1|act_loss: 0.0372314453125|cri_loss: 0.0194244384765625|unsuper_loss: 0.0
+average reward score: 4.56640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.19%) |Training time=0.42s (19.25%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.40
+[2023-04-14 09:22:54,486] [INFO] [logging.py:96:log_dist] [Rank 0] step=960, skipped=15, lr=[9.396914970579121e-06, 9.396914970579121e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:22:54,504] [INFO] [timer.py:199:stop] epoch=0/micro_step=960/global_step=960, RunningAvgSamplesPerSec=108.26503201681297, CurrSamplesPerSec=133.41457227490508, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:22:54,597] [INFO] [logging.py:96:log_dist] [Rank 0] step=960, skipped=15, lr=[4.86886786040369e-06, 4.86886786040369e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 959|ppo_ep: 1|act_loss: 0.00933074951171875|cri_loss: 0.005535125732421875|unsuper_loss: 0.0
+average reward score: 4.99609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.89%) |Training time=0.40s (18.57%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.40
+epoch: 0|step: 960|ppo_ep: 1|act_loss: 0.065673828125|cri_loss: 0.034149169921875|unsuper_loss: 0.0
+average reward score: 5.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.00%) |Training time=0.45s (20.47%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.40
+epoch: 0|step: 961|ppo_ep: 1|act_loss: 0.04327392578125|cri_loss: 0.023590087890625|unsuper_loss: 0.0
+average reward score: 4.6796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.43%) |Training time=0.43s (20.02%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.41
+epoch: 0|step: 962|ppo_ep: 1|act_loss: -0.0791015625|cri_loss: -0.03839111328125|unsuper_loss: 0.0
+average reward score: 5.015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.31%) |Training time=0.44s (20.15%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41
+epoch: 0|step: 963|ppo_ep: 1|act_loss: 0.05120849609375|cri_loss: 0.027435302734375|unsuper_loss: 0.0
+average reward score: 5.18359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.79%) |Training time=0.43s (19.67%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
+epoch: 0|step: 964|ppo_ep: 1|act_loss: 0.0233154296875|cri_loss: 0.014129638671875|unsuper_loss: 0.0
+average reward score: 5.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.50%) |Training time=0.43s (19.95%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
+epoch: 0|step: 965|ppo_ep: 1|act_loss: 0.08660888671875|cri_loss: 0.04522705078125|unsuper_loss: 0.0
+average reward score: 4.3359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.68%) |Training time=0.45s (20.78%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.41
+epoch: 0|step: 966|ppo_ep: 1|act_loss: -0.00670623779296875|cri_loss: -0.0023937225341796875|unsuper_loss: 0.0
+average reward score: 5.03125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.71%) |Training time=0.45s (20.76%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.41
+epoch: 0|step: 967|ppo_ep: 1|act_loss: -0.03643798828125|cri_loss: -0.016998291015625|unsuper_loss: 0.0
+average reward score: 5.109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.44s |Gather latency=0.00s (0.00%) |Generate time=1.63s (66.53%) |Training time=0.45s (18.34%) |Others=0.37 (15.13%)|CurSamplesPerSec=13.09 |AvgSamplesPerSec=14.41
+epoch: 0|step: 968|ppo_ep: 1|act_loss: -0.0234375|cri_loss: -0.0086212158203125|unsuper_loss: 0.0
+average reward score: 5.3125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.90%) |Training time=0.44s (20.54%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41
+[2023-04-14 09:23:16,482] [INFO] [logging.py:96:log_dist] [Rank 0] step=970, skipped=15, lr=[9.390943848362648e-06, 9.390943848362648e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:23:16,499] [INFO] [timer.py:199:stop] epoch=0/micro_step=970/global_step=970, RunningAvgSamplesPerSec=108.32570871091323, CurrSamplesPerSec=111.58485938589877, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:23:16,592] [INFO] [logging.py:96:log_dist] [Rank 0] step=970, skipped=15, lr=[4.8657740146956724e-06, 4.8657740146956724e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 969|ppo_ep: 1|act_loss: -0.00354766845703125|cri_loss: -0.0011920928955078125|unsuper_loss: 0.0
+average reward score: 5.51171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.65%) |Training time=0.45s (20.83%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
+epoch: 0|step: 970|ppo_ep: 1|act_loss: -0.0154571533203125|cri_loss: -0.00266265869140625|unsuper_loss: 0.0
+average reward score: 5.08984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.54%) |Training time=0.46s (20.98%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.46 |AvgSamplesPerSec=14.41
+epoch: 0|step: 971|ppo_ep: 1|act_loss: -0.0240478515625|cri_loss: -0.01140594482421875|unsuper_loss: 0.0
+average reward score: 5.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.38%) |Training time=0.41s (19.03%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41
+epoch: 0|step: 972|ppo_ep: 1|act_loss: -0.014373779296875|cri_loss: -0.00548553466796875|unsuper_loss: 0.0
+average reward score: 5.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.47%) |Training time=0.43s (19.96%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.41
+epoch: 0|step: 973|ppo_ep: 1|act_loss: -0.0204620361328125|cri_loss: -0.00933837890625|unsuper_loss: 0.0
+average reward score: 6.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.12%) |Training time=0.44s (20.34%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41
+epoch: 0|step: 974|ppo_ep: 1|act_loss: 0.0244140625|cri_loss: 0.0131072998046875|unsuper_loss: 0.0
+average reward score: 5.09765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.16%) |Training time=0.44s (20.33%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
+epoch: 0|step: 975|ppo_ep: 1|act_loss: 0.022674560546875|cri_loss: 0.013916015625|unsuper_loss: 0.0
+average reward score: 6.2734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.12%) |Training time=0.44s (20.33%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
+epoch: 0|step: 976|ppo_ep: 1|act_loss: -0.0102996826171875|cri_loss: -0.003696441650390625|unsuper_loss: 0.0
+average reward score: 6.1328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.92%) |Training time=0.45s (20.54%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41
+epoch: 0|step: 977|ppo_ep: 1|act_loss: -0.0123748779296875|cri_loss: -0.00479888916015625|unsuper_loss: 0.0
+average reward score: 5.69140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.66%) |Training time=0.45s (20.81%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
+epoch: 0|step: 978|ppo_ep: 1|act_loss: -0.020172119140625|cri_loss: -0.00809478759765625|unsuper_loss: 0.0
+average reward score: 5.91015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.85%) |Training time=0.45s (20.62%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41
+[2023-04-14 09:23:38,256] [INFO] [logging.py:96:log_dist] [Rank 0] step=980, skipped=15, lr=[9.384905047843602e-06, 9.384905047843602e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:23:38,616] [INFO] [timer.py:199:stop] epoch=0/micro_step=980/global_step=980, RunningAvgSamplesPerSec=108.26986922621812, CurrSamplesPerSec=51.679806183969674, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:23:38,710] [INFO] [logging.py:96:log_dist] [Rank 0] step=980, skipped=15, lr=[4.862645102509638e-06, 4.862645102509638e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 979|ppo_ep: 1|act_loss: -0.041595458984375|cri_loss: -0.0178680419921875|unsuper_loss: 0.0
+average reward score: 5.859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.53s |Gather latency=0.00s (0.00%) |Generate time=1.64s (64.81%) |Training time=0.79s (31.28%) |Others=0.10 (3.91%)|CurSamplesPerSec=12.67 |AvgSamplesPerSec=14.41
+epoch: 0|step: 980|ppo_ep: 1|act_loss: 0.01123046875|cri_loss: 0.0067138671875|unsuper_loss: 0.0
+average reward score: 6.11328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.73s (75.98%) |Training time=0.45s (19.68%) |Others=0.10 (4.34%)|CurSamplesPerSec=14.08 |AvgSamplesPerSec=14.41
+epoch: 0|step: 981|ppo_ep: 1|act_loss: 0.071533203125|cri_loss: 0.037384033203125|unsuper_loss: 0.0
+average reward score: 5.9609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.54%) |Training time=0.45s (20.92%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41
+epoch: 0|step: 982|ppo_ep: 1|act_loss: -0.0034942626953125|cri_loss: -8.7738037109375e-05|unsuper_loss: 0.0
+average reward score: 5.921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.84%) |Training time=0.45s (20.61%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
+epoch: 0|step: 983|ppo_ep: 1|act_loss: -0.00067138671875|cri_loss: 3.4809112548828125e-05|unsuper_loss: 0.0
+average reward score: 5.76953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.72%) |Training time=0.45s (20.74%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41
+epoch: 0|step: 984|ppo_ep: 1|act_loss: -0.009124755859375|cri_loss: -0.004352569580078125|unsuper_loss: 0.0
+average reward score: 6.12890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.54%) |Training time=0.45s (20.91%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
+epoch: 0|step: 985|ppo_ep: 1|act_loss: -0.0148773193359375|cri_loss: -0.00699615478515625|unsuper_loss: 0.0
+average reward score: 5.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.00%) |Training time=0.44s (18.75%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.41
+epoch: 0|step: 986|ppo_ep: 1|act_loss: -0.018524169921875|cri_loss: -0.0087890625|unsuper_loss: 0.0
+average reward score: 5.6640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.38%) |Training time=0.44s (20.06%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
+epoch: 0|step: 987|ppo_ep: 1|act_loss: 0.069580078125|cri_loss: 0.036865234375|unsuper_loss: 0.0
+average reward score: 5.77734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.34%) |Training time=0.44s (20.12%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
+epoch: 0|step: 988|ppo_ep: 1|act_loss: 0.01471710205078125|cri_loss: 0.00804901123046875|unsuper_loss: 0.0
+average reward score: 6.09375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.91%) |Training time=0.45s (20.53%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41
+[2023-04-14 09:24:00,562] [INFO] [logging.py:96:log_dist] [Rank 0] step=990, skipped=15, lr=[9.378798658531574e-06, 9.378798658531574e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:24:00,580] [INFO] [timer.py:199:stop] epoch=0/micro_step=990/global_step=990, RunningAvgSamplesPerSec=108.31861477028136, CurrSamplesPerSec=114.56618951285925, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:24:00,673] [INFO] [logging.py:96:log_dist] [Rank 0] step=990, skipped=15, lr=[4.859481170223614e-06, 4.859481170223614e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 989|ppo_ep: 1|act_loss: 0.0231475830078125|cri_loss: 0.0121002197265625|unsuper_loss: 0.0
+average reward score: 6.42578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.06%) |Training time=0.44s (20.41%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 990|ppo_ep: 1|act_loss: -0.007503509521484375|cri_loss: -0.003055572509765625|unsuper_loss: 0.0
+average reward score: 6.16015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.04%) |Training time=0.44s (20.39%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
+epoch: 0|step: 991|ppo_ep: 1|act_loss: 0.0267181396484375|cri_loss: 0.01448822021484375|unsuper_loss: 0.0
+average reward score: 6.234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.72%) |Training time=0.45s (20.65%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
+epoch: 0|step: 992|ppo_ep: 1|act_loss: 0.029205322265625|cri_loss: 0.017303466796875|unsuper_loss: 0.0
+average reward score: 6.21875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.71%) |Training time=0.45s (20.74%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
+epoch: 0|step: 993|ppo_ep: 1|act_loss: -0.031524658203125|cri_loss: -0.01493072509765625|unsuper_loss: 0.0
+average reward score: 5.78515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.63%) |Training time=0.45s (20.84%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.41
+epoch: 0|step: 994|ppo_ep: 1|act_loss: -0.037261962890625|cri_loss: -0.016632080078125|unsuper_loss: 0.0
+average reward score: 5.89453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.71%) |Training time=0.45s (20.74%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
+epoch: 0|step: 995|ppo_ep: 1|act_loss: 0.0576171875|cri_loss: 0.0328369140625|unsuper_loss: 0.0
+average reward score: 6.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.40%) |Training time=0.46s (20.98%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
+epoch: 0|step: 996|ppo_ep: 1|act_loss: 0.0141754150390625|cri_loss: 0.0078125|unsuper_loss: 0.0
+average reward score: 5.375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.63%) |Training time=0.45s (20.84%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
+epoch: 0|step: 997|ppo_ep: 1|act_loss: 0.04144287109375|cri_loss: 0.0221099853515625|unsuper_loss: 0.0
+average reward score: 6.1640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.45%) |Training time=0.46s (21.01%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41
+epoch: 0|step: 998|ppo_ep: 1|act_loss: 0.00325775146484375|cri_loss: 0.0024585723876953125|unsuper_loss: 0.0
+average reward score: 5.97265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.88%) |Training time=0.45s (20.62%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.41
+[2023-04-14 09:24:22,297] [INFO] [logging.py:96:log_dist] [Rank 0] step=1000, skipped=15, lr=[9.372624770937987e-06, 9.372624770937987e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:24:22,315] [INFO] [timer.py:199:stop] epoch=0/micro_step=1000/global_step=1000, RunningAvgSamplesPerSec=108.34636245517726, CurrSamplesPerSec=111.06482200944505, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:24:22,408] [INFO] [logging.py:96:log_dist] [Rank 0] step=1000, skipped=15, lr=[4.856282264734708e-06, 4.856282264734708e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 999|ppo_ep: 1|act_loss: 0.003055572509765625|cri_loss: 0.0018558502197265625|unsuper_loss: 0.0
+average reward score: 6.09375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.69%) |Training time=0.45s (20.79%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1000|ppo_ep: 1|act_loss: -0.008026123046875|cri_loss: -0.003406524658203125|unsuper_loss: 0.0
+average reward score: 5.75390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.68%) |Training time=0.44s (19.02%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1001|ppo_ep: 1|act_loss: -0.03692626953125|cri_loss: -0.017974853515625|unsuper_loss: 0.0
+average reward score: 5.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.69%) |Training time=0.43s (19.77%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1002|ppo_ep: 1|act_loss: -0.041015625|cri_loss: -0.0196990966796875|unsuper_loss: 0.0
+average reward score: 5.515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.52%) |Training time=0.43s (19.95%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1003|ppo_ep: 1|act_loss: 0.01285552978515625|cri_loss: 0.006832122802734375|unsuper_loss: 0.0
+average reward score: 5.49609375
+-------------------------------------------------------------------------------------
+|E2E latency=3.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (51.14%) |Training time=0.44s (14.02%) |Others=1.10 (34.84%)|CurSamplesPerSec=10.12 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1004|ppo_ep: 1|act_loss: 0.014739990234375|cri_loss: 0.0085296630859375|unsuper_loss: 0.0
+average reward score: 5.42578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.93%) |Training time=0.44s (20.51%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1005|ppo_ep: 1|act_loss: 0.0325927734375|cri_loss: 0.019317626953125|unsuper_loss: 0.0
+average reward score: 5.9453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.77%) |Training time=0.45s (20.66%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1006|ppo_ep: 1|act_loss: 0.00997161865234375|cri_loss: 0.00556182861328125|unsuper_loss: 0.0
+average reward score: 5.7265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.56%) |Training time=0.45s (20.89%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1007|ppo_ep: 1|act_loss: 0.01336669921875|cri_loss: 0.00836944580078125|unsuper_loss: 0.0
+average reward score: 5.4453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.01%) |Training time=0.44s (20.44%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1008|ppo_ep: 1|act_loss: -0.04754638671875|cri_loss: -0.018310546875|unsuper_loss: 0.0
+average reward score: 5.71875
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.46%) |Training time=0.46s (21.05%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.41
+[2023-04-14 09:24:45,254] [INFO] [logging.py:96:log_dist] [Rank 0] step=1010, skipped=15, lr=[9.366383476574745e-06, 9.366383476574745e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:24:45,272] [INFO] [timer.py:199:stop] epoch=0/micro_step=1010/global_step=1010, RunningAvgSamplesPerSec=108.39448732121316, CurrSamplesPerSec=110.60107949535652, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:24:45,365] [INFO] [logging.py:96:log_dist] [Rank 0] step=1010, skipped=15, lr=[4.853048433458417e-06, 4.853048433458417e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1009|ppo_ep: 1|act_loss: -0.010223388671875|cri_loss: -0.004230499267578125|unsuper_loss: 0.0
+average reward score: 5.6328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.75%) |Training time=0.45s (19.93%) |Others=0.10 (4.32%)|CurSamplesPerSec=14.10 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1010|ppo_ep: 1|act_loss: -0.05865478515625|cri_loss: -0.0288543701171875|unsuper_loss: 0.0
+average reward score: 5.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.53%) |Training time=0.45s (20.93%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1011|ppo_ep: 1|act_loss: -0.03656005859375|cri_loss: -0.017578125|unsuper_loss: 0.0
+average reward score: 5.47265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.98%) |Training time=0.44s (20.47%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1012|ppo_ep: 1|act_loss: -0.009002685546875|cri_loss: -0.003559112548828125|unsuper_loss: 0.0
+average reward score: 5.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.94%) |Training time=0.44s (20.52%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1013|ppo_ep: 1|act_loss: 0.029022216796875|cri_loss: 0.0154571533203125|unsuper_loss: 0.0
+average reward score: 5.85546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.88%) |Training time=0.45s (20.57%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1014|ppo_ep: 1|act_loss: 0.0226593017578125|cri_loss: 0.01175689697265625|unsuper_loss: 0.0
+average reward score: 5.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.72%) |Training time=0.45s (20.73%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1015|ppo_ep: 1|act_loss: -0.0012493133544921875|cri_loss: 0.0007171630859375|unsuper_loss: 0.0
+average reward score: 5.66015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.00%) |Training time=0.44s (18.78%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.70 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1016|ppo_ep: 1|act_loss: 0.038787841796875|cri_loss: 0.020965576171875|unsuper_loss: 0.0
+average reward score: 5.43359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.84%) |Training time=0.43s (19.62%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1017|ppo_ep: 1|act_loss: -0.019256591796875|cri_loss: -0.00925445556640625|unsuper_loss: 0.0
+average reward score: 5.65625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.90%) |Training time=0.44s (20.55%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1018|ppo_ep: 1|act_loss: -0.06585693359375|cri_loss: -0.031982421875|unsuper_loss: 0.0
+average reward score: 6.078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.62s (76.81%) |Training time=0.39s (18.33%) |Others=0.10 (4.85%)|CurSamplesPerSec=15.17 |AvgSamplesPerSec=14.41
+[2023-04-14 09:25:07,744] [INFO] [logging.py:96:log_dist] [Rank 0] step=1020, skipped=15, lr=[9.360074867952892e-06, 9.360074867952892e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:25:07,762] [INFO] [timer.py:199:stop] epoch=0/micro_step=1020/global_step=1020, RunningAvgSamplesPerSec=108.46349582136385, CurrSamplesPerSec=110.18052442942728, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:25:07,855] [INFO] [logging.py:96:log_dist] [Rank 0] step=1020, skipped=15, lr=[4.849779724327923e-06, 4.849779724327923e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1019|ppo_ep: 1|act_loss: 0.060546875|cri_loss: 0.03167724609375|unsuper_loss: 0.0
+average reward score: 5.390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.45s (20.88%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1020|ppo_ep: 1|act_loss: 0.036865234375|cri_loss: 0.019134521484375|unsuper_loss: 0.0
+average reward score: 5.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.89%) |Training time=0.45s (20.57%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1021|ppo_ep: 1|act_loss: 0.0103302001953125|cri_loss: 0.00662994384765625|unsuper_loss: 0.0
+average reward score: 5.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.70%) |Training time=0.45s (20.77%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1022|ppo_ep: 1|act_loss: -0.0287017822265625|cri_loss: -0.01360321044921875|unsuper_loss: 0.0
+average reward score: 5.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.06%) |Training time=0.44s (20.39%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1023|ppo_ep: 1|act_loss: 8.0108642578125e-05|cri_loss: 0.00118255615234375|unsuper_loss: 0.0
+average reward score: 4.91015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.97%) |Training time=0.44s (20.47%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1024|ppo_ep: 1|act_loss: -0.00025081634521484375|cri_loss: 0.0006818771362304688|unsuper_loss: 0.0
+average reward score: 5.42578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.55%) |Training time=0.45s (20.64%) |Others=0.11 (4.81%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1025|ppo_ep: 1|act_loss: 0.051239013671875|cri_loss: 0.027984619140625|unsuper_loss: 0.0
+average reward score: 5.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.83%) |Training time=0.45s (20.62%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1026|ppo_ep: 1|act_loss: -0.001129150390625|cri_loss: 0.001171112060546875|unsuper_loss: 0.0
+average reward score: 5.23828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.72%) |Training time=0.45s (20.74%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1027|ppo_ep: 1|act_loss: 0.031158447265625|cri_loss: 0.0161895751953125|unsuper_loss: 0.0
+average reward score: 5.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.87%) |Training time=0.45s (20.59%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1028|ppo_ep: 1|act_loss: 0.01092529296875|cri_loss: 0.006145477294921875|unsuper_loss: 0.0
+average reward score: 5.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.57%) |Training time=0.45s (20.90%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
+[2023-04-14 09:25:29,433] [INFO] [logging.py:96:log_dist] [Rank 0] step=1030, skipped=15, lr=[9.353699038581223e-06, 9.353699038581223e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:25:29,451] [INFO] [timer.py:199:stop] epoch=0/micro_step=1030/global_step=1030, RunningAvgSamplesPerSec=108.50163632858461, CurrSamplesPerSec=113.407363407382, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:25:29,544] [INFO] [logging.py:96:log_dist] [Rank 0] step=1030, skipped=15, lr=[4.84647618579338e-06, 4.84647618579338e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1029|ppo_ep: 1|act_loss: -0.00655364990234375|cri_loss: -0.00279998779296875|unsuper_loss: 0.0
+average reward score: 5.3125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.93%) |Training time=0.44s (20.54%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1030|ppo_ep: 1|act_loss: 0.0004353523254394531|cri_loss: 0.001064300537109375|unsuper_loss: 0.0
+average reward score: 5.28515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.73%) |Training time=0.45s (20.70%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1031|ppo_ep: 1|act_loss: -0.03240966796875|cri_loss: -0.015289306640625|unsuper_loss: 0.0
+average reward score: 5.19140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.19%) |Training time=0.43s (18.57%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1032|ppo_ep: 1|act_loss: 0.00957489013671875|cri_loss: 0.005382537841796875|unsuper_loss: 0.0
+average reward score: 5.7421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.78%) |Training time=0.45s (20.63%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1033|ppo_ep: 1|act_loss: 0.00380706787109375|cri_loss: 0.003082275390625|unsuper_loss: 0.0
+average reward score: 5.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.66%) |Training time=0.45s (20.78%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1034|ppo_ep: 1|act_loss: 0.040985107421875|cri_loss: 0.024444580078125|unsuper_loss: 0.0
+average reward score: 5.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.51s |Gather latency=0.00s (0.00%) |Generate time=1.62s (64.50%) |Training time=0.45s (17.83%) |Others=0.44 (17.66%)|CurSamplesPerSec=12.76 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1035|ppo_ep: 1|act_loss: 0.0794677734375|cri_loss: 0.04132080078125|unsuper_loss: 0.0
+average reward score: 5.3125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.74%) |Training time=0.45s (20.69%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1036|ppo_ep: 1|act_loss: 0.020751953125|cri_loss: 0.0122833251953125|unsuper_loss: 0.0
+average reward score: 5.078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.38%) |Training time=0.43s (20.07%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1037|ppo_ep: 1|act_loss: -0.04425048828125|cri_loss: -0.0213623046875|unsuper_loss: 0.0
+average reward score: 5.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.59%) |Training time=0.46s (20.91%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1038|ppo_ep: 1|act_loss: -0.0184478759765625|cri_loss: -0.0088348388671875|unsuper_loss: 0.0
+average reward score: 5.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.96%) |Training time=0.45s (19.69%) |Others=0.10 (4.35%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.41
+[2023-04-14 09:25:51,701] [INFO] [logging.py:96:log_dist] [Rank 0] step=1040, skipped=15, lr=[9.347256082964908e-06, 9.347256082964908e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:25:51,719] [INFO] [timer.py:199:stop] epoch=0/micro_step=1040/global_step=1040, RunningAvgSamplesPerSec=108.5441294190187, CurrSamplesPerSec=113.64030904049277, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:25:51,812] [INFO] [logging.py:96:log_dist] [Rank 0] step=1040, skipped=15, lr=[4.843137866821196e-06, 4.843137866821196e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1039|ppo_ep: 1|act_loss: -0.0318603515625|cri_loss: -0.015228271484375|unsuper_loss: 0.0
+average reward score: 6.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.90%) |Training time=0.44s (20.58%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1040|ppo_ep: 1|act_loss: -0.003589630126953125|cri_loss: -0.0008382797241210938|unsuper_loss: 0.0
+average reward score: 5.859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.07%) |Training time=0.44s (20.36%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1041|ppo_ep: 1|act_loss: 0.020965576171875|cri_loss: 0.0125579833984375|unsuper_loss: 0.0
+average reward score: 6.23828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.95%) |Training time=0.44s (20.49%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1042|ppo_ep: 1|act_loss: 0.06640625|cri_loss: 0.03594970703125|unsuper_loss: 0.0
+average reward score: 6.8046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.68%) |Training time=0.43s (19.77%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1043|ppo_ep: 1|act_loss: 0.0677490234375|cri_loss: 0.035980224609375|unsuper_loss: 0.0
+average reward score: 6.69140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.70%) |Training time=0.43s (19.74%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1044|ppo_ep: 1|act_loss: 0.028533935546875|cri_loss: 0.015899658203125|unsuper_loss: 0.0
+average reward score: 6.125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.34%) |Training time=0.43s (20.09%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1045|ppo_ep: 1|act_loss: -0.02740478515625|cri_loss: -0.011199951171875|unsuper_loss: 0.0
+average reward score: 6.6171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.11%) |Training time=0.43s (18.61%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1046|ppo_ep: 1|act_loss: -0.00704193115234375|cri_loss: -0.0029010772705078125|unsuper_loss: 0.0
+average reward score: 6.53515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.64%) |Training time=0.43s (19.75%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1047|ppo_ep: 1|act_loss: -0.041900634765625|cri_loss: -0.0191650390625|unsuper_loss: 0.0
+average reward score: 5.9765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.03%) |Training time=0.42s (19.39%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1048|ppo_ep: 1|act_loss: -0.02435302734375|cri_loss: -0.01096343994140625|unsuper_loss: 0.0
+average reward score: 6.5234375
+-------------------------------------------------------------------------------------
+|E2E latency=3.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (51.33%) |Training time=0.42s (13.24%) |Others=1.13 (35.43%)|CurSamplesPerSec=10.03 |AvgSamplesPerSec=14.41
+[2023-04-14 09:26:14,502] [INFO] [logging.py:96:log_dist] [Rank 0] step=1050, skipped=15, lr=[9.340746096604097e-06, 9.340746096604097e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:26:14,520] [INFO] [timer.py:199:stop] epoch=0/micro_step=1050/global_step=1050, RunningAvgSamplesPerSec=108.63875987318835, CurrSamplesPerSec=119.30008719666715, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:26:14,613] [INFO] [logging.py:96:log_dist] [Rank 0] step=1050, skipped=15, lr=[4.839764816893315e-06, 4.839764816893315e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1049|ppo_ep: 1|act_loss: -0.0160675048828125|cri_loss: -0.0062408447265625|unsuper_loss: 0.0
+average reward score: 6.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.49%) |Training time=0.43s (19.96%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1050|ppo_ep: 1|act_loss: 0.043060302734375|cri_loss: 0.02301025390625|unsuper_loss: 0.0
+average reward score: 6.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.75%) |Training time=0.43s (19.67%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1051|ppo_ep: 1|act_loss: 0.0732421875|cri_loss: 0.038848876953125|unsuper_loss: 0.0
+average reward score: 6.47265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.35%) |Training time=0.42s (19.48%) |Others=0.11 (5.17%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1052|ppo_ep: 1|act_loss: 0.10516357421875|cri_loss: 0.055908203125|unsuper_loss: 0.0
+average reward score: 6.5546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.51%) |Training time=0.43s (19.92%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1053|ppo_ep: 1|act_loss: 0.01715087890625|cri_loss: 0.0106201171875|unsuper_loss: 0.0
+average reward score: 7.18359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.70%) |Training time=0.43s (19.74%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1054|ppo_ep: 1|act_loss: -0.00988006591796875|cri_loss: -0.0038089752197265625|unsuper_loss: 0.0
+average reward score: 6.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.75%) |Training time=0.43s (19.70%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1055|ppo_ep: 1|act_loss: -0.012725830078125|cri_loss: -0.0054473876953125|unsuper_loss: 0.0
+average reward score: 5.83984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.56%) |Training time=0.43s (19.87%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1056|ppo_ep: 1|act_loss: -0.029205322265625|cri_loss: -0.0108184814453125|unsuper_loss: 0.0
+average reward score: 6.203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.64%) |Training time=0.43s (19.80%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1057|ppo_ep: 1|act_loss: -0.03485107421875|cri_loss: -0.0145416259765625|unsuper_loss: 0.0
+average reward score: 5.52734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.77%) |Training time=0.43s (19.69%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1058|ppo_ep: 1|act_loss: -0.027923583984375|cri_loss: -0.0127105712890625|unsuper_loss: 0.0
+average reward score: 6.32421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.77%) |Training time=0.43s (19.68%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
+[2023-04-14 09:26:36,137] [INFO] [logging.py:96:log_dist] [Rank 0] step=1060, skipped=15, lr=[9.334169175992489e-06, 9.334169175992489e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:26:36,479] [INFO] [timer.py:199:stop] epoch=0/micro_step=1060/global_step=1060, RunningAvgSamplesPerSec=108.62859228172402, CurrSamplesPerSec=53.83196378092723, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:26:36,653] [INFO] [logging.py:96:log_dist] [Rank 0] step=1060, skipped=15, lr=[4.836357086006471e-06, 4.836357086006471e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1059|ppo_ep: 1|act_loss: 0.0212249755859375|cri_loss: 0.01244354248046875|unsuper_loss: 0.0
+average reward score: 6.14453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.58s |Gather latency=0.00s (0.00%) |Generate time=1.63s (63.28%) |Training time=0.76s (29.42%) |Others=0.19 (7.30%)|CurSamplesPerSec=12.41 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1060|ppo_ep: 1|act_loss: 0.059539794921875|cri_loss: 0.03155517578125|unsuper_loss: 0.0
+average reward score: 5.90234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.67s (75.82%) |Training time=0.42s (19.23%) |Others=0.11 (4.94%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1061|ppo_ep: 1|act_loss: 0.025146484375|cri_loss: 0.0146636962890625|unsuper_loss: 0.0
+average reward score: 5.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.44%) |Training time=0.43s (19.97%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1062|ppo_ep: 1|act_loss: 0.04669189453125|cri_loss: 0.0294342041015625|unsuper_loss: 0.0
+average reward score: 5.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.02%) |Training time=0.44s (20.41%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1063|ppo_ep: 1|act_loss: -0.025238037109375|cri_loss: -0.00970458984375|unsuper_loss: 0.0
+average reward score: 5.28515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.94%) |Training time=0.44s (20.52%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1064|ppo_ep: 1|act_loss: -0.0404052734375|cri_loss: -0.018768310546875|unsuper_loss: 0.0
+average reward score: 5.91015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.58%) |Training time=0.43s (19.87%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1065|ppo_ep: 1|act_loss: -0.181884765625|cri_loss: -0.07672119140625|unsuper_loss: 0.0
+average reward score: 5.13671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.63%) |Training time=0.43s (19.79%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1066|ppo_ep: 1|act_loss: 0.025543212890625|cri_loss: 0.014892578125|unsuper_loss: 0.0
+average reward score: 5.42578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.36%) |Training time=0.44s (20.13%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1067|ppo_ep: 1|act_loss: 0.03558349609375|cri_loss: 0.0216064453125|unsuper_loss: 0.0
+average reward score: 5.6171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.73s (76.62%) |Training time=0.43s (19.02%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.15 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1068|ppo_ep: 1|act_loss: 0.0838623046875|cri_loss: 0.0458984375|unsuper_loss: 0.0
+average reward score: 4.89453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.66%) |Training time=0.43s (19.77%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41
+[2023-04-14 09:26:58,334] [INFO] [logging.py:96:log_dist] [Rank 0] step=1070, skipped=15, lr=[9.327525418615915e-06, 9.327525418615915e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:26:58,352] [INFO] [timer.py:199:stop] epoch=0/micro_step=1070/global_step=1070, RunningAvgSamplesPerSec=108.71501038930042, CurrSamplesPerSec=122.18717841341514, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:26:58,445] [INFO] [logging.py:96:log_dist] [Rank 0] step=1070, skipped=15, lr=[4.8329147246714595e-06, 4.8329147246714595e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1069|ppo_ep: 1|act_loss: 0.06964111328125|cri_loss: 0.0382080078125|unsuper_loss: 0.0
+average reward score: 5.17578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.82%) |Training time=0.42s (19.64%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1070|ppo_ep: 1|act_loss: -0.0236053466796875|cri_loss: -0.008270263671875|unsuper_loss: 0.0
+average reward score: 5.52734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.72%) |Training time=0.43s (19.72%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1071|ppo_ep: 1|act_loss: 0.009796142578125|cri_loss: 0.010284423828125|unsuper_loss: 0.0
+average reward score: 5.6015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.72%) |Training time=0.42s (19.70%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1072|ppo_ep: 1|act_loss: 0.021942138671875|cri_loss: 0.013916015625|unsuper_loss: 0.0
+average reward score: 5.07421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.89%) |Training time=0.44s (20.56%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1073|ppo_ep: 1|act_loss: 0.016815185546875|cri_loss: 0.0126800537109375|unsuper_loss: 0.0
+average reward score: 5.17578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.25%) |Training time=0.44s (20.19%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1074|ppo_ep: 1|act_loss: 0.0816650390625|cri_loss: 0.04913330078125|unsuper_loss: 0.0
+average reward score: 4.76171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.67s (74.00%) |Training time=0.48s (21.30%) |Others=0.11 (4.70%)|CurSamplesPerSec=14.19 |AvgSamplesPerSec=14.41
+[2023-04-14 09:27:11,505] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 1075|ppo_ep: 1|act_loss: 0.052154541015625|cri_loss: 0.035186767578125|unsuper_loss: 0.0
+average reward score: 4.91796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.31%) |Training time=0.44s (20.20%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1076|ppo_ep: 1|act_loss: -0.02850341796875|cri_loss: -0.00995635986328125|unsuper_loss: 0.0
+average reward score: 4.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.93%) |Training time=0.45s (20.55%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1077|ppo_ep: 1|act_loss: 0.0109100341796875|cri_loss: 0.01220703125|unsuper_loss: 0.0
+average reward score: 4.75
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.29%) |Training time=0.43s (20.15%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.41
+[2023-04-14 09:27:17,989] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
+epoch: 0|step: 1078|ppo_ep: 1|act_loss: -0.045440673828125|cri_loss: 0.000762939453125|unsuper_loss: 0.0
+average reward score: 4.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.52%) |Training time=0.44s (20.28%) |Others=0.09 (4.20%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.41
+[2023-04-14 09:27:20,053] [INFO] [logging.py:96:log_dist] [Rank 0] step=1080, skipped=15, lr=[9.320814922950886e-06, 9.320814922950886e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:27:20,071] [INFO] [timer.py:199:stop] epoch=0/micro_step=1080/global_step=1080, RunningAvgSamplesPerSec=108.79082765447629, CurrSamplesPerSec=116.2762955904011, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:27:20,164] [INFO] [logging.py:96:log_dist] [Rank 0] step=1080, skipped=17, lr=[4.830135935951734e-06, 4.830135935951734e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1079|ppo_ep: 1|act_loss: 0.034088134765625|cri_loss: 0.0189666748046875|unsuper_loss: 0.0
+average reward score: 4.1953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.72s |Gather latency=0.00s (0.00%) |Generate time=1.64s (60.37%) |Training time=0.44s (16.12%) |Others=0.64 (23.51%)|CurSamplesPerSec=11.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1080|ppo_ep: 1|act_loss: 0.00751495361328125|cri_loss: 0.01019287109375|unsuper_loss: 0.0
+average reward score: 4.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.20%) |Training time=0.44s (20.22%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1081|ppo_ep: 1|act_loss: -0.002838134765625|cri_loss: 0.00644683837890625|unsuper_loss: 0.0
+average reward score: 4.43359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.08%) |Training time=0.44s (20.37%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1082|ppo_ep: 1|act_loss: 0.0215911865234375|cri_loss: 0.01245880126953125|unsuper_loss: 0.0
+average reward score: 3.99609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.76%) |Training time=0.45s (20.71%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1083|ppo_ep: 1|act_loss: 0.0736083984375|cri_loss: 0.04473876953125|unsuper_loss: 0.0
+average reward score: 3.4453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.03%) |Training time=0.44s (20.42%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1084|ppo_ep: 1|act_loss: 0.0174407958984375|cri_loss: 0.01013946533203125|unsuper_loss: 0.0
+average reward score: 4.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.98%) |Training time=0.44s (20.44%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1085|ppo_ep: 1|act_loss: 0.0565185546875|cri_loss: 0.04034423828125|unsuper_loss: 0.0
+average reward score: 3.533203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.75%) |Training time=0.45s (20.70%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1086|ppo_ep: 1|act_loss: -0.1109619140625|cri_loss: -0.04583740234375|unsuper_loss: 0.0
+average reward score: 3.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.62s (68.75%) |Training time=0.44s (18.86%) |Others=0.29 (12.39%)|CurSamplesPerSec=13.57 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1087|ppo_ep: 1|act_loss: 0.01435089111328125|cri_loss: 0.008758544921875|unsuper_loss: 0.0
+average reward score: 3.759765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.85%) |Training time=0.45s (20.61%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1088|ppo_ep: 1|act_loss: 0.047698974609375|cri_loss: 0.025726318359375|unsuper_loss: 0.0
+average reward score: 4.625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.96%) |Training time=0.44s (20.49%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
+[2023-04-14 09:27:42,603] [INFO] [logging.py:96:log_dist] [Rank 0] step=1090, skipped=15, lr=[9.314037788463137e-06, 9.314037788463137e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:27:42,621] [INFO] [timer.py:199:stop] epoch=0/micro_step=1090/global_step=1090, RunningAvgSamplesPerSec=108.83461521557098, CurrSamplesPerSec=114.59602382121284, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:27:42,714] [INFO] [logging.py:96:log_dist] [Rank 0] step=1090, skipped=17, lr=[4.826631368735207e-06, 4.826631368735207e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1089|ppo_ep: 1|act_loss: -0.01056671142578125|cri_loss: -0.00135040283203125|unsuper_loss: 0.0
+average reward score: 4.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.83%) |Training time=0.44s (18.96%) |Others=0.10 (4.21%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1090|ppo_ep: 1|act_loss: 0.05499267578125|cri_loss: 0.038818359375|unsuper_loss: 0.0
+average reward score: 4.03125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.65%) |Training time=0.45s (20.84%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1091|ppo_ep: 1|act_loss: 0.348388671875|cri_loss: 0.212890625|unsuper_loss: 0.0
+average reward score: 3.84765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.17%) |Training time=0.44s (20.31%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1092|ppo_ep: 1|act_loss: 0.2303466796875|cri_loss: 0.137939453125|unsuper_loss: 0.0
+average reward score: 3.955078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.25%) |Training time=0.44s (20.05%) |Others=0.15 (6.70%)|CurSamplesPerSec=14.44 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1093|ppo_ep: 1|act_loss: 0.0892333984375|cri_loss: 0.049407958984375|unsuper_loss: 0.0
+average reward score: 3.763671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.05%) |Training time=0.44s (20.41%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1094|ppo_ep: 1|act_loss: 0.0048675537109375|cri_loss: 0.007244110107421875|unsuper_loss: 0.0
+average reward score: 3.6640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.00%) |Training time=0.44s (20.46%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1095|ppo_ep: 1|act_loss: -0.173828125|cri_loss: -0.07159423828125|unsuper_loss: 0.0
+average reward score: 4.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.98%) |Training time=0.44s (20.48%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1096|ppo_ep: 1|act_loss: -0.06512451171875|cri_loss: -0.02496337890625|unsuper_loss: 0.0
+average reward score: 4.2734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.74s (76.39%) |Training time=0.44s (19.29%) |Others=0.10 (4.33%)|CurSamplesPerSec=14.03 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1097|ppo_ep: 1|act_loss: -0.01050567626953125|cri_loss: 0.0022735595703125|unsuper_loss: 0.0
+average reward score: 4.17578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.37%) |Training time=0.43s (20.08%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1098|ppo_ep: 1|act_loss: 0.01104736328125|cri_loss: 0.01215362548828125|unsuper_loss: 0.0
+average reward score: 4.64453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.62s (76.93%) |Training time=0.39s (18.41%) |Others=0.10 (4.66%)|CurSamplesPerSec=15.16 |AvgSamplesPerSec=14.41
+[2023-04-14 09:28:05,186] [INFO] [logging.py:96:log_dist] [Rank 0] step=1100, skipped=15, lr=[9.307194115606148e-06, 9.307194115606148e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:28:05,204] [INFO] [timer.py:199:stop] epoch=0/micro_step=1100/global_step=1100, RunningAvgSamplesPerSec=108.9002611488009, CurrSamplesPerSec=115.39545924679588, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:28:05,296] [INFO] [logging.py:96:log_dist] [Rank 0] step=1100, skipped=17, lr=[4.823092315229102e-06, 4.823092315229102e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1099|ppo_ep: 1|act_loss: -0.0202178955078125|cri_loss: -0.003326416015625|unsuper_loss: 0.0
+average reward score: 3.9765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.13%) |Training time=0.44s (20.34%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1100|ppo_ep: 1|act_loss: -0.01149749755859375|cri_loss: -0.00286865234375|unsuper_loss: 0.0
+average reward score: 4.5859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.96%) |Training time=0.44s (20.50%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1101|ppo_ep: 1|act_loss: -0.02362060546875|cri_loss: -0.00521087646484375|unsuper_loss: 0.0
+average reward score: 4.79296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.05%) |Training time=0.44s (20.40%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1102|ppo_ep: 1|act_loss: 0.00652313232421875|cri_loss: 0.00743865966796875|unsuper_loss: 0.0
+average reward score: 4.76171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.87%) |Training time=0.45s (20.58%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1103|ppo_ep: 1|act_loss: 0.023468017578125|cri_loss: 0.01531982421875|unsuper_loss: 0.0
+average reward score: 5.0234375
+-------------------------------------------------------------------------------------
+|E2E latency=3.39s |Gather latency=0.00s (0.00%) |Generate time=1.78s (52.71%) |Training time=0.44s (12.95%) |Others=1.16 (34.34%)|CurSamplesPerSec=9.45 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1104|ppo_ep: 1|act_loss: 0.036376953125|cri_loss: 0.01971435546875|unsuper_loss: 0.0
+average reward score: 4.43359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.45%) |Training time=0.43s (19.78%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1105|ppo_ep: 1|act_loss: 0.07080078125|cri_loss: 0.03900146484375|unsuper_loss: 0.0
+average reward score: 4.6796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.01%) |Training time=0.42s (19.47%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1106|ppo_ep: 1|act_loss: 0.011138916015625|cri_loss: 0.0069122314453125|unsuper_loss: 0.0
+average reward score: 4.63671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.34%) |Training time=0.44s (20.12%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1107|ppo_ep: 1|act_loss: 0.0400390625|cri_loss: 0.022705078125|unsuper_loss: 0.0
+average reward score: 4.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.37%) |Training time=0.43s (20.06%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1108|ppo_ep: 1|act_loss: -0.109130859375|cri_loss: -0.045318603515625|unsuper_loss: 0.0
+average reward score: 4.1796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.87s |Gather latency=0.00s (0.00%) |Generate time=1.63s (56.75%) |Training time=0.44s (15.15%) |Others=0.81 (28.10%)|CurSamplesPerSec=11.13 |AvgSamplesPerSec=14.41
+[2023-04-14 09:28:28,786] [INFO] [logging.py:96:log_dist] [Rank 0] step=1110, skipped=15, lr=[9.300284005819661e-06, 9.300284005819661e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:28:28,804] [INFO] [timer.py:199:stop] epoch=0/micro_step=1110/global_step=1110, RunningAvgSamplesPerSec=108.96558639565792, CurrSamplesPerSec=116.06514008993427, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:28:28,897] [INFO] [logging.py:96:log_dist] [Rank 0] step=1110, skipped=17, lr=[4.8195188278907305e-06, 4.8195188278907305e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1109|ppo_ep: 1|act_loss: -0.0684814453125|cri_loss: -0.0257110595703125|unsuper_loss: 0.0
+average reward score: 4.12109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.19%) |Training time=0.44s (20.28%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1110|ppo_ep: 1|act_loss: 0.0562744140625|cri_loss: 0.031768798828125|unsuper_loss: 0.0
+average reward score: 5.16796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.98%) |Training time=0.44s (20.47%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1111|ppo_ep: 1|act_loss: -0.0019283294677734375|cri_loss: 0.0|unsuper_loss: 0.0
+average reward score: 4.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.26%) |Training time=0.44s (20.20%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1112|ppo_ep: 1|act_loss: 0.04559326171875|cri_loss: 0.0241241455078125|unsuper_loss: 0.0
+average reward score: 5.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.81%) |Training time=0.43s (19.66%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1113|ppo_ep: 1|act_loss: 0.0626220703125|cri_loss: 0.03411865234375|unsuper_loss: 0.0
+average reward score: 4.84765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.02%) |Training time=0.44s (20.44%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1114|ppo_ep: 1|act_loss: 0.01316070556640625|cri_loss: 0.00732421875|unsuper_loss: 0.0
+average reward score: 4.48828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.56s |Gather latency=0.00s (0.00%) |Generate time=1.62s (63.50%) |Training time=0.44s (17.30%) |Others=0.49 (19.20%)|CurSamplesPerSec=12.51 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1115|ppo_ep: 1|act_loss: -0.030792236328125|cri_loss: -0.014434814453125|unsuper_loss: 0.0
+average reward score: 3.97265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.05%) |Training time=0.44s (20.38%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1116|ppo_ep: 1|act_loss: 0.01446533203125|cri_loss: 0.0109100341796875|unsuper_loss: 0.0
+average reward score: 4.43359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.04%) |Training time=0.44s (20.42%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1117|ppo_ep: 1|act_loss: -0.0129547119140625|cri_loss: -0.00620269775390625|unsuper_loss: 0.0
+average reward score: 5.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.27%) |Training time=0.43s (18.50%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.77 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1118|ppo_ep: 1|act_loss: 0.0660400390625|cri_loss: 0.03564453125|unsuper_loss: 0.0
+average reward score: 5.03125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.07%) |Training time=0.44s (20.37%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41
+[2023-04-14 09:28:50,989] [INFO] [logging.py:96:log_dist] [Rank 0] step=1120, skipped=15, lr=[9.293307561528172e-06, 9.293307561528172e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:28:51,007] [INFO] [timer.py:199:stop] epoch=0/micro_step=1120/global_step=1120, RunningAvgSamplesPerSec=109.03085977626249, CurrSamplesPerSec=126.14459975996218, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:28:51,103] [INFO] [logging.py:96:log_dist] [Rank 0] step=1120, skipped=17, lr=[4.815910959687795e-06, 4.815910959687795e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1119|ppo_ep: 1|act_loss: 0.044677734375|cri_loss: 0.0248870849609375|unsuper_loss: 0.0
+average reward score: 4.21875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.08%) |Training time=0.42s (19.25%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1120|ppo_ep: 1|act_loss: 0.0006799697875976562|cri_loss: 0.001979827880859375|unsuper_loss: 0.0
+average reward score: 4.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.89%) |Training time=0.42s (19.56%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1121|ppo_ep: 1|act_loss: 0.0147705078125|cri_loss: 0.00927734375|unsuper_loss: 0.0
+average reward score: 5.06640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.70%) |Training time=0.45s (20.75%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1122|ppo_ep: 1|act_loss: 0.0072174072265625|cri_loss: 0.00495147705078125|unsuper_loss: 0.0
+average reward score: 4.875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.66%) |Training time=0.45s (20.79%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1123|ppo_ep: 1|act_loss: 0.047943115234375|cri_loss: 0.0294342041015625|unsuper_loss: 0.0
+average reward score: 4.54296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.74%) |Training time=0.45s (20.43%) |Others=0.11 (4.83%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1124|ppo_ep: 1|act_loss: 0.00019073486328125|cri_loss: 0.0015573501586914062|unsuper_loss: 0.0
+average reward score: 4.015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.65s (71.87%) |Training time=0.45s (19.42%) |Others=0.20 (8.71%)|CurSamplesPerSec=13.91 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1125|ppo_ep: 1|act_loss: 0.04345703125|cri_loss: 0.0232696533203125|unsuper_loss: 0.0
+average reward score: 4.109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.62s (76.99%) |Training time=0.39s (18.33%) |Others=0.10 (4.68%)|CurSamplesPerSec=15.18 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1126|ppo_ep: 1|act_loss: -0.046051025390625|cri_loss: -0.02191162109375|unsuper_loss: 0.0
+average reward score: 4.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.09%) |Training time=0.44s (20.34%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1127|ppo_ep: 1|act_loss: -0.0043487548828125|cri_loss: -0.0015249252319335938|unsuper_loss: 0.0
+average reward score: 5.03125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.11%) |Training time=0.44s (20.32%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1128|ppo_ep: 1|act_loss: 0.04193115234375|cri_loss: 0.023284912109375|unsuper_loss: 0.0
+average reward score: 4.8125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.19%) |Training time=0.44s (20.25%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+[2023-04-14 09:29:12,723] [INFO] [logging.py:96:log_dist] [Rank 0] step=1130, skipped=15, lr=[9.286264886139418e-06, 9.286264886139418e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:29:12,741] [INFO] [timer.py:199:stop] epoch=0/micro_step=1130/global_step=1130, RunningAvgSamplesPerSec=109.09936493188563, CurrSamplesPerSec=115.27771474509555, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:29:12,834] [INFO] [logging.py:96:log_dist] [Rank 0] step=1130, skipped=17, lr=[4.812268764097606e-06, 4.812268764097606e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1129|ppo_ep: 1|act_loss: 0.0244140625|cri_loss: 0.0134124755859375|unsuper_loss: 0.0
+average reward score: 4.63671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.05%) |Training time=0.44s (20.41%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1130|ppo_ep: 1|act_loss: 0.0638427734375|cri_loss: 0.0350341796875|unsuper_loss: 0.0
+average reward score: 4.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.02%) |Training time=0.44s (20.41%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1131|ppo_ep: 1|act_loss: 0.012664794921875|cri_loss: 0.00897216796875|unsuper_loss: 0.0
+average reward score: 4.8203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.42%) |Training time=0.43s (20.02%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1132|ppo_ep: 1|act_loss: -0.006801605224609375|cri_loss: -0.00240325927734375|unsuper_loss: 0.0
+average reward score: 3.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.80s (76.72%) |Training time=0.44s (18.77%) |Others=0.11 (4.51%)|CurSamplesPerSec=13.64 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1133|ppo_ep: 1|act_loss: -0.061920166015625|cri_loss: -0.02935791015625|unsuper_loss: 0.0
+average reward score: 4.25
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.95%) |Training time=0.44s (20.39%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1134|ppo_ep: 1|act_loss: -0.033355712890625|cri_loss: -0.0149993896484375|unsuper_loss: 0.0
+average reward score: 4.8828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.02%) |Training time=0.42s (19.42%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1135|ppo_ep: 1|act_loss: -0.023468017578125|cri_loss: -0.01123046875|unsuper_loss: 0.0
+average reward score: 4.31640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.51s |Gather latency=0.00s (0.00%) |Generate time=1.65s (65.98%) |Training time=0.40s (16.15%) |Others=0.45 (17.87%)|CurSamplesPerSec=12.77 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1136|ppo_ep: 1|act_loss: -0.0093231201171875|cri_loss: -0.004039764404296875|unsuper_loss: 0.0
+average reward score: 5.02734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.92%) |Training time=0.44s (20.52%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1137|ppo_ep: 1|act_loss: 0.023101806640625|cri_loss: 0.0122833251953125|unsuper_loss: 0.0
+average reward score: 4.63671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.99%) |Training time=0.44s (20.45%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1138|ppo_ep: 1|act_loss: 0.02978515625|cri_loss: 0.0157318115234375|unsuper_loss: 0.0
+average reward score: 4.2109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.90%) |Training time=0.45s (20.55%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
+[2023-04-14 09:29:34,886] [INFO] [logging.py:96:log_dist] [Rank 0] step=1140, skipped=15, lr=[9.279156084042835e-06, 9.279156084042835e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:29:34,904] [INFO] [timer.py:199:stop] epoch=0/micro_step=1140/global_step=1140, RunningAvgSamplesPerSec=109.17206823492205, CurrSamplesPerSec=121.0444594753028, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:29:34,997] [INFO] [logging.py:96:log_dist] [Rank 0] step=1140, skipped=17, lr=[4.80859229510629e-06, 4.80859229510629e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1139|ppo_ep: 1|act_loss: 0.02178955078125|cri_loss: 0.011505126953125|unsuper_loss: 0.0
+average reward score: 3.923828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.76%) |Training time=0.43s (19.72%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1140|ppo_ep: 1|act_loss: -0.01971435546875|cri_loss: -0.00472259521484375|unsuper_loss: 0.0
+average reward score: 4.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.19%) |Training time=0.44s (20.26%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1141|ppo_ep: 1|act_loss: -0.04974365234375|cri_loss: -0.022308349609375|unsuper_loss: 0.0
+average reward score: 4.73828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.89%) |Training time=0.45s (20.58%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1142|ppo_ep: 1|act_loss: 0.0038242340087890625|cri_loss: 0.003997802734375|unsuper_loss: 0.0
+average reward score: 4.66796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.12%) |Training time=0.44s (20.32%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1143|ppo_ep: 1|act_loss: 0.006988525390625|cri_loss: 0.00522613525390625|unsuper_loss: 0.0
+average reward score: 4.69140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.05%) |Training time=0.44s (20.41%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1144|ppo_ep: 1|act_loss: 0.0173187255859375|cri_loss: 0.0114898681640625|unsuper_loss: 0.0
+average reward score: 4.8515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.82%) |Training time=0.45s (20.63%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1145|ppo_ep: 1|act_loss: 0.0137481689453125|cri_loss: 0.0083465576171875|unsuper_loss: 0.0
+average reward score: 5.01171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.84%) |Training time=0.45s (20.61%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1146|ppo_ep: 1|act_loss: 0.04644775390625|cri_loss: 0.025482177734375|unsuper_loss: 0.0
+average reward score: 5.1015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.91%) |Training time=0.44s (20.54%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1147|ppo_ep: 1|act_loss: -0.000690460205078125|cri_loss: 0.0012369155883789062|unsuper_loss: 0.0
+average reward score: 4.29296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.80s (76.88%) |Training time=0.44s (18.92%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.66 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1148|ppo_ep: 1|act_loss: 0.0494384765625|cri_loss: 0.0260009765625|unsuper_loss: 0.0
+average reward score: 4.12109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.21%) |Training time=0.44s (20.23%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.41
+[2023-04-14 09:29:56,703] [INFO] [logging.py:96:log_dist] [Rank 0] step=1150, skipped=15, lr=[9.27198126060802e-06, 9.27198126060802e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:29:56,721] [INFO] [timer.py:199:stop] epoch=0/micro_step=1150/global_step=1150, RunningAvgSamplesPerSec=109.21557534240085, CurrSamplesPerSec=115.28613124391221, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:29:56,814] [INFO] [logging.py:96:log_dist] [Rank 0] step=1150, skipped=17, lr=[4.804881607207983e-06, 4.804881607207983e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1149|ppo_ep: 1|act_loss: 0.0153045654296875|cri_loss: 0.01132965087890625|unsuper_loss: 0.0
+average reward score: 5.625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.15%) |Training time=0.44s (20.32%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1150|ppo_ep: 1|act_loss: -0.0416259765625|cri_loss: -0.018768310546875|unsuper_loss: 0.0
+average reward score: 4.6796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.93s |Gather latency=0.00s (0.00%) |Generate time=1.64s (55.98%) |Training time=0.42s (14.40%) |Others=0.87 (29.62%)|CurSamplesPerSec=10.94 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1151|ppo_ep: 1|act_loss: -0.05712890625|cri_loss: -0.0264434814453125|unsuper_loss: 0.0
+average reward score: 3.96875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.46%) |Training time=0.43s (19.94%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1152|ppo_ep: 1|act_loss: -0.078369140625|cri_loss: -0.036956787109375|unsuper_loss: 0.0
+average reward score: 4.48046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.08%) |Training time=0.44s (20.37%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1153|ppo_ep: 1|act_loss: -0.02105712890625|cri_loss: -0.0081329345703125|unsuper_loss: 0.0
+average reward score: 4.91015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.01%) |Training time=0.45s (20.49%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1154|ppo_ep: 1|act_loss: 0.0235443115234375|cri_loss: 0.01214599609375|unsuper_loss: 0.0
+average reward score: 4.66796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.40%) |Training time=0.43s (20.04%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1155|ppo_ep: 1|act_loss: 0.0322265625|cri_loss: 0.0175933837890625|unsuper_loss: 0.0
+average reward score: 4.71484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.46%) |Training time=0.43s (19.98%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1156|ppo_ep: 1|act_loss: 0.02813720703125|cri_loss: 0.01535797119140625|unsuper_loss: 0.0
+average reward score: 3.984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.18%) |Training time=0.44s (20.24%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1157|ppo_ep: 1|act_loss: 0.008514404296875|cri_loss: 0.005710601806640625|unsuper_loss: 0.0
+average reward score: 4.1953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.39%) |Training time=0.43s (20.02%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1158|ppo_ep: 1|act_loss: 0.0053863525390625|cri_loss: 0.0035552978515625|unsuper_loss: 0.0
+average reward score: 4.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.96%) |Training time=0.43s (19.94%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41
+[2023-04-14 09:30:19,101] [INFO] [logging.py:96:log_dist] [Rank 0] step=1160, skipped=15, lr=[9.264740522183165e-06, 9.264740522183165e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:30:19,119] [INFO] [timer.py:199:stop] epoch=0/micro_step=1160/global_step=1160, RunningAvgSamplesPerSec=109.29043728671321, CurrSamplesPerSec=118.15315274075587, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:30:19,212] [INFO] [logging.py:96:log_dist] [Rank 0] step=1160, skipped=17, lr=[4.801136755404034e-06, 4.801136755404034e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1159|ppo_ep: 1|act_loss: -0.0248260498046875|cri_loss: -0.01092529296875|unsuper_loss: 0.0
+average reward score: 4.16796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.39%) |Training time=0.43s (20.07%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1160|ppo_ep: 1|act_loss: -0.0252685546875|cri_loss: -0.01198577880859375|unsuper_loss: 0.0
+average reward score: 4.75
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.42%) |Training time=0.44s (20.06%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1161|ppo_ep: 1|act_loss: -0.025543212890625|cri_loss: -0.01092529296875|unsuper_loss: 0.0
+average reward score: 4.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.79s (77.13%) |Training time=0.43s (18.60%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.80 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1162|ppo_ep: 1|act_loss: -0.08428955078125|cri_loss: -0.0318603515625|unsuper_loss: 0.0
+average reward score: 5.06640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.12%) |Training time=0.44s (20.33%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1163|ppo_ep: 1|act_loss: 0.057861328125|cri_loss: 0.0304412841796875|unsuper_loss: 0.0
+average reward score: 5.62890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.46%) |Training time=0.43s (19.98%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1164|ppo_ep: 1|act_loss: 0.05133056640625|cri_loss: 0.028472900390625|unsuper_loss: 0.0
+average reward score: 4.40234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.63s (69.99%) |Training time=0.44s (18.87%) |Others=0.26 (11.14%)|CurSamplesPerSec=13.75 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1165|ppo_ep: 1|act_loss: 0.0540771484375|cri_loss: 0.027984619140625|unsuper_loss: 0.0
+average reward score: 5.75390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.60%) |Training time=0.43s (19.83%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1166|ppo_ep: 1|act_loss: 0.00930023193359375|cri_loss: 0.005130767822265625|unsuper_loss: 0.0
+average reward score: 4.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.08%) |Training time=0.44s (20.37%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1167|ppo_ep: 1|act_loss: -0.0136566162109375|cri_loss: -0.006427764892578125|unsuper_loss: 0.0
+average reward score: 4.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.96%) |Training time=0.44s (20.49%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1168|ppo_ep: 1|act_loss: -0.025115966796875|cri_loss: -0.0105133056640625|unsuper_loss: 0.0
+average reward score: 4.28515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.92%) |Training time=0.45s (20.54%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41
+[2023-04-14 09:30:41,075] [INFO] [logging.py:96:log_dist] [Rank 0] step=1170, skipped=15, lr=[9.25743397609348e-06, 9.25743397609348e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:30:41,093] [INFO] [timer.py:199:stop] epoch=0/micro_step=1170/global_step=1170, RunningAvgSamplesPerSec=109.34554743622506, CurrSamplesPerSec=113.94323795455607, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:30:41,186] [INFO] [logging.py:96:log_dist] [Rank 0] step=1170, skipped=17, lr=[4.797357795202179e-06, 4.797357795202179e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1169|ppo_ep: 1|act_loss: -0.04736328125|cri_loss: -0.018341064453125|unsuper_loss: 0.0
+average reward score: 3.822265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.62s (71.43%) |Training time=0.44s (19.54%) |Others=0.21 (9.03%)|CurSamplesPerSec=14.09 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1170|ppo_ep: 1|act_loss: -0.05035400390625|cri_loss: -0.021392822265625|unsuper_loss: 0.0
+average reward score: 4.90625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.10%) |Training time=0.44s (20.35%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1171|ppo_ep: 1|act_loss: 0.00125885009765625|cri_loss: 0.0021820068359375|unsuper_loss: 0.0
+average reward score: 4.43359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.96%) |Training time=0.44s (20.50%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1172|ppo_ep: 1|act_loss: -0.00449371337890625|cri_loss: -0.0005588531494140625|unsuper_loss: 0.0
+average reward score: 3.71484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.86%) |Training time=0.45s (20.60%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1173|ppo_ep: 1|act_loss: 0.022857666015625|cri_loss: 0.01275634765625|unsuper_loss: 0.0
+average reward score: 3.71484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.98%) |Training time=0.44s (20.47%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1174|ppo_ep: 1|act_loss: 0.087158203125|cri_loss: 0.047210693359375|unsuper_loss: 0.0
+average reward score: 4.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.03%) |Training time=0.44s (20.41%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1175|ppo_ep: 1|act_loss: -0.0106964111328125|cri_loss: -0.0047454833984375|unsuper_loss: 0.0
+average reward score: 4.234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.31%) |Training time=0.44s (20.16%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1176|ppo_ep: 1|act_loss: -0.036376953125|cri_loss: -0.0170440673828125|unsuper_loss: 0.0
+average reward score: 5.74609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.12%) |Training time=0.46s (19.58%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1177|ppo_ep: 1|act_loss: -0.05767822265625|cri_loss: -0.0277862548828125|unsuper_loss: 0.0
+average reward score: 4.23828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.65%) |Training time=0.45s (20.82%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1178|ppo_ep: 1|act_loss: 0.023590087890625|cri_loss: 0.014129638671875|unsuper_loss: 0.0
+average reward score: 4.44140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.05%) |Training time=0.44s (20.40%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41
+[2023-04-14 09:31:03,012] [INFO] [logging.py:96:log_dist] [Rank 0] step=1180, skipped=15, lr=[9.250061730639604e-06, 9.250061730639604e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:31:03,276] [INFO] [timer.py:199:stop] epoch=0/micro_step=1180/global_step=1180, RunningAvgSamplesPerSec=109.3098238350795, CurrSamplesPerSec=63.37054074560561, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:31:03,369] [INFO] [logging.py:96:log_dist] [Rank 0] step=1180, skipped=17, lr=[4.793544782615725e-06, 4.793544782615725e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1179|ppo_ep: 1|act_loss: 0.005401611328125|cri_loss: 0.0032062530517578125|unsuper_loss: 0.0
+average reward score: 3.94921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.41s |Gather latency=0.00s (0.00%) |Generate time=1.64s (68.15%) |Training time=0.67s (27.78%) |Others=0.10 (4.08%)|CurSamplesPerSec=13.29 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1180|ppo_ep: 1|act_loss: -0.025115966796875|cri_loss: -0.011627197265625|unsuper_loss: 0.0
+average reward score: 4.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.60%) |Training time=0.43s (19.84%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1181|ppo_ep: 1|act_loss: 0.06866455078125|cri_loss: 0.036651611328125|unsuper_loss: 0.0
+average reward score: 4.6171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.16%) |Training time=0.44s (20.30%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1182|ppo_ep: 1|act_loss: -0.0316162109375|cri_loss: -0.014556884765625|unsuper_loss: 0.0
+average reward score: 4.6015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.45%) |Training time=0.44s (20.06%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1183|ppo_ep: 1|act_loss: 0.024444580078125|cri_loss: 0.0137939453125|unsuper_loss: 0.0
+average reward score: 5.03125
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.74s (77.02%) |Training time=0.42s (18.62%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1184|ppo_ep: 1|act_loss: -0.04498291015625|cri_loss: -0.02166748046875|unsuper_loss: 0.0
+average reward score: 4.625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.60%) |Training time=0.43s (19.86%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1185|ppo_ep: 1|act_loss: 0.0167236328125|cri_loss: 0.009552001953125|unsuper_loss: 0.0
+average reward score: 4.8203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.49s |Gather latency=0.00s (0.00%) |Generate time=1.64s (65.59%) |Training time=0.43s (17.23%) |Others=0.43 (17.18%)|CurSamplesPerSec=12.83 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1186|ppo_ep: 1|act_loss: -0.0032024383544921875|cri_loss: -0.0007781982421875|unsuper_loss: 0.0
+average reward score: 5.01171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.92%) |Training time=0.42s (19.54%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1187|ppo_ep: 1|act_loss: -0.017578125|cri_loss: -0.00304412841796875|unsuper_loss: 0.0
+average reward score: 3.634765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.42%) |Training time=0.41s (19.05%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1188|ppo_ep: 1|act_loss: -0.009918212890625|cri_loss: -0.0033702850341796875|unsuper_loss: 0.0
+average reward score: 4.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.60%) |Training time=0.43s (19.85%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41
+[2023-04-14 09:31:25,384] [INFO] [logging.py:96:log_dist] [Rank 0] step=1190, skipped=15, lr=[9.242623895096e-06, 9.242623895096e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:31:25,403] [INFO] [timer.py:199:stop] epoch=0/micro_step=1190/global_step=1190, RunningAvgSamplesPerSec=109.38797397111014, CurrSamplesPerSec=113.55204980063334, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:31:25,497] [INFO] [logging.py:96:log_dist] [Rank 0] step=1190, skipped=17, lr=[4.789697774162718e-06, 4.789697774162718e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1189|ppo_ep: 1|act_loss: -0.028289794921875|cri_loss: -0.01258087158203125|unsuper_loss: 0.0
+average reward score: 4.91796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.85%) |Training time=0.44s (20.53%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1190|ppo_ep: 1|act_loss: -0.020721435546875|cri_loss: -0.0089569091796875|unsuper_loss: 0.0
+average reward score: 4.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.98%) |Training time=0.44s (20.45%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1191|ppo_ep: 1|act_loss: -0.03985595703125|cri_loss: -0.018280029296875|unsuper_loss: 0.0
+average reward score: 4.02734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.79s (77.01%) |Training time=0.44s (18.72%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.77 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1192|ppo_ep: 1|act_loss: -0.05194091796875|cri_loss: -0.02423095703125|unsuper_loss: 0.0
+average reward score: 4.234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.96%) |Training time=0.44s (20.50%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1193|ppo_ep: 1|act_loss: 0.035552978515625|cri_loss: 0.018402099609375|unsuper_loss: 0.0
+average reward score: 4.6328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.94%) |Training time=0.44s (20.51%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1194|ppo_ep: 1|act_loss: 0.11077880859375|cri_loss: 0.060760498046875|unsuper_loss: 0.0
+average reward score: 4.484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.97%) |Training time=0.42s (19.47%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1195|ppo_ep: 1|act_loss: 0.10888671875|cri_loss: 0.05926513671875|unsuper_loss: 0.0
+average reward score: 4.8203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.44%) |Training time=0.43s (20.00%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1196|ppo_ep: 1|act_loss: 0.04571533203125|cri_loss: 0.0241851806640625|unsuper_loss: 0.0
+average reward score: 4.48828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.14%) |Training time=0.44s (20.30%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1197|ppo_ep: 1|act_loss: 0.028533935546875|cri_loss: 0.0150604248046875|unsuper_loss: 0.0
+average reward score: 4.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.62s (76.97%) |Training time=0.39s (18.34%) |Others=0.10 (4.69%)|CurSamplesPerSec=15.18 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1198|ppo_ep: 1|act_loss: -0.020477294921875|cri_loss: -0.00858306884765625|unsuper_loss: 0.0
+average reward score: 4.16796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.67%) |Training time=0.43s (19.78%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
+[2023-04-14 09:31:47,505] [INFO] [logging.py:96:log_dist] [Rank 0] step=1200, skipped=15, lr=[9.235120579709336e-06, 9.235120579709336e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:31:47,520] [INFO] [timer.py:199:stop] epoch=0/micro_step=1200/global_step=1200, RunningAvgSamplesPerSec=109.46486512641026, CurrSamplesPerSec=118.27549262462009, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:31:47,613] [INFO] [logging.py:96:log_dist] [Rank 0] step=1200, skipped=17, lr=[4.7858168268651025e-06, 4.7858168268651025e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1199|ppo_ep: 1|act_loss: -0.04156494140625|cri_loss: -0.0186614990234375|unsuper_loss: 0.0
+average reward score: 4.33984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.45%) |Training time=0.43s (20.02%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1200|ppo_ep: 1|act_loss: -0.052703857421875|cri_loss: -0.02392578125|unsuper_loss: 0.0
+average reward score: 4.7421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.32%) |Training time=0.44s (20.14%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1201|ppo_ep: 1|act_loss: -0.0755615234375|cri_loss: -0.033935546875|unsuper_loss: 0.0
+average reward score: 4.234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.56%) |Training time=0.43s (19.91%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1202|ppo_ep: 1|act_loss: 0.0014781951904296875|cri_loss: 0.002246856689453125|unsuper_loss: 0.0
+average reward score: 4.328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.72%) |Training time=0.43s (19.74%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1203|ppo_ep: 1|act_loss: 0.00370025634765625|cri_loss: 0.0035266876220703125|unsuper_loss: 0.0
+average reward score: 4.2109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.52%) |Training time=0.43s (19.94%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1204|ppo_ep: 1|act_loss: 0.00565338134765625|cri_loss: 0.003631591796875|unsuper_loss: 0.0
+average reward score: 4.109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.22%) |Training time=0.44s (20.26%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1205|ppo_ep: 1|act_loss: 0.00897216796875|cri_loss: 0.00499725341796875|unsuper_loss: 0.0
+average reward score: 4.83984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.52%) |Training time=0.43s (19.94%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1206|ppo_ep: 1|act_loss: 0.01291656494140625|cri_loss: 0.007625579833984375|unsuper_loss: 0.0
+average reward score: 4.3203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.91s |Gather latency=0.00s (0.00%) |Generate time=1.81s (62.01%) |Training time=0.42s (14.56%) |Others=0.68 (23.43%)|CurSamplesPerSec=10.98 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1207|ppo_ep: 1|act_loss: 0.04498291015625|cri_loss: 0.024261474609375|unsuper_loss: 0.0
+average reward score: 4.453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.68%) |Training time=0.43s (19.77%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1208|ppo_ep: 1|act_loss: 0.0108795166015625|cri_loss: 0.00605010986328125|unsuper_loss: 0.0
+average reward score: 5.44921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.43%) |Training time=0.43s (20.03%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
+[2023-04-14 09:32:09,937] [INFO] [logging.py:96:log_dist] [Rank 0] step=1210, skipped=15, lr=[9.22755189569684e-06, 9.22755189569684e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:32:09,955] [INFO] [timer.py:199:stop] epoch=0/micro_step=1210/global_step=1210, RunningAvgSamplesPerSec=109.54153886070056, CurrSamplesPerSec=129.74929212839035, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:32:10,048] [INFO] [logging.py:96:log_dist] [Rank 0] step=1210, skipped=17, lr=[4.7819019982478805e-06, 4.7819019982478805e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1209|ppo_ep: 1|act_loss: 0.0146484375|cri_loss: 0.008087158203125|unsuper_loss: 0.0
+average reward score: 4.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.45%) |Training time=0.41s (18.99%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1210|ppo_ep: 1|act_loss: -0.0238800048828125|cri_loss: -0.01039886474609375|unsuper_loss: 0.0
+average reward score: 5.46484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.82%) |Training time=0.43s (19.64%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1211|ppo_ep: 1|act_loss: 0.01238250732421875|cri_loss: 0.006862640380859375|unsuper_loss: 0.0
+average reward score: 4.66796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.66s (74.33%) |Training time=0.44s (19.62%) |Others=0.14 (6.05%)|CurSamplesPerSec=14.33 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1212|ppo_ep: 1|act_loss: -0.0153656005859375|cri_loss: -0.004589080810546875|unsuper_loss: 0.0
+average reward score: 4.5546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.67s |Gather latency=0.00s (0.00%) |Generate time=1.69s (63.17%) |Training time=0.44s (16.36%) |Others=0.55 (20.47%)|CurSamplesPerSec=11.98 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1213|ppo_ep: 1|act_loss: 0.00885772705078125|cri_loss: 0.005619049072265625|unsuper_loss: 0.0
+average reward score: 4.19921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.27%) |Training time=0.43s (20.14%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1214|ppo_ep: 1|act_loss: 0.0186920166015625|cri_loss: 0.0106964111328125|unsuper_loss: 0.0
+average reward score: 5.26171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.51%) |Training time=0.43s (19.95%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1215|ppo_ep: 1|act_loss: -0.0203704833984375|cri_loss: -0.00943756103515625|unsuper_loss: 0.0
+average reward score: 4.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.17%) |Training time=0.44s (20.18%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1216|ppo_ep: 1|act_loss: -0.0011844635009765625|cri_loss: 0.00022029876708984375|unsuper_loss: 0.0
+average reward score: 4.6171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.05%) |Training time=0.44s (20.30%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1217|ppo_ep: 1|act_loss: -0.01849365234375|cri_loss: -0.00757598876953125|unsuper_loss: 0.0
+average reward score: 4.64453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.75%) |Training time=0.45s (20.69%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1218|ppo_ep: 1|act_loss: 0.00743865966796875|cri_loss: 0.00434112548828125|unsuper_loss: 0.0
+average reward score: 4.1328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.56%) |Training time=0.44s (20.17%) |Others=0.11 (5.27%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.41
+[2023-04-14 09:32:32,197] [INFO] [logging.py:96:log_dist] [Rank 0] step=1220, skipped=15, lr=[9.219917955244674e-06, 9.219917955244674e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:32:32,214] [INFO] [timer.py:199:stop] epoch=0/micro_step=1220/global_step=1220, RunningAvgSamplesPerSec=109.59446571481158, CurrSamplesPerSec=116.17172795170941, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:32:32,308] [INFO] [logging.py:96:log_dist] [Rank 0] step=1220, skipped=17, lr=[4.777953346338256e-06, 4.777953346338256e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1219|ppo_ep: 1|act_loss: 0.03564453125|cri_loss: 0.01837158203125|unsuper_loss: 0.0
+average reward score: 4.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.18%) |Training time=0.44s (20.26%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1220|ppo_ep: 1|act_loss: 0.0859375|cri_loss: 0.046600341796875|unsuper_loss: 0.0
+average reward score: 4.80859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.06%) |Training time=0.44s (20.37%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1221|ppo_ep: 1|act_loss: -0.004993438720703125|cri_loss: -0.002292633056640625|unsuper_loss: 0.0
+average reward score: 4.91015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.63s (72.70%) |Training time=0.45s (20.14%) |Others=0.16 (7.17%)|CurSamplesPerSec=14.30 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1222|ppo_ep: 1|act_loss: -0.022979736328125|cri_loss: -0.009552001953125|unsuper_loss: 0.0
+average reward score: 4.7421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.29%) |Training time=0.44s (20.18%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1223|ppo_ep: 1|act_loss: -0.041015625|cri_loss: -0.019622802734375|unsuper_loss: 0.0
+average reward score: 5.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.00%) |Training time=0.44s (20.39%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1224|ppo_ep: 1|act_loss: -0.0154571533203125|cri_loss: -0.007038116455078125|unsuper_loss: 0.0
+average reward score: 4.015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.01%) |Training time=0.42s (19.41%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1225|ppo_ep: 1|act_loss: -0.04803466796875|cri_loss: -0.022705078125|unsuper_loss: 0.0
+average reward score: 4.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.55%) |Training time=0.43s (19.89%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1226|ppo_ep: 1|act_loss: 0.0100250244140625|cri_loss: 0.00542449951171875|unsuper_loss: 0.0
+average reward score: 4.296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.41%) |Training time=0.43s (20.05%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1227|ppo_ep: 1|act_loss: 0.06689453125|cri_loss: 0.03717041015625|unsuper_loss: 0.0
+average reward score: 4.55859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.51%) |Training time=0.43s (19.93%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1228|ppo_ep: 1|act_loss: 0.047698974609375|cri_loss: 0.024688720703125|unsuper_loss: 0.0
+average reward score: 4.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.46%) |Training time=0.43s (20.00%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42
+[2023-04-14 09:32:53,944] [INFO] [logging.py:96:log_dist] [Rank 0] step=1230, skipped=15, lr=[9.212218871506249e-06, 9.212218871506249e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:32:53,962] [INFO] [timer.py:199:stop] epoch=0/micro_step=1230/global_step=1230, RunningAvgSamplesPerSec=109.6524556882401, CurrSamplesPerSec=118.64602981670637, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:32:54,055] [INFO] [logging.py:96:log_dist] [Rank 0] step=1230, skipped=17, lr=[4.773970929664774e-06, 4.773970929664774e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1229|ppo_ep: 1|act_loss: -0.00830078125|cri_loss: -0.0036220550537109375|unsuper_loss: 0.0
+average reward score: 4.5546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.53%) |Training time=0.43s (19.94%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1230|ppo_ep: 1|act_loss: -0.00394439697265625|cri_loss: -0.0006084442138671875|unsuper_loss: 0.0
+average reward score: 4.94921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.23%) |Training time=0.44s (20.22%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1231|ppo_ep: 1|act_loss: -0.1021728515625|cri_loss: -0.039215087890625|unsuper_loss: 0.0
+average reward score: 5.01171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.25%) |Training time=0.44s (20.12%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1232|ppo_ep: 1|act_loss: -0.007389068603515625|cri_loss: -0.00281524658203125|unsuper_loss: 0.0
+average reward score: 4.1796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.34%) |Training time=0.44s (20.14%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1233|ppo_ep: 1|act_loss: 0.00864410400390625|cri_loss: 0.004642486572265625|unsuper_loss: 0.0
+average reward score: 5.3125
+-------------------------------------------------------------------------------------
+|E2E latency=2.48s |Gather latency=0.00s (0.00%) |Generate time=1.64s (65.95%) |Training time=0.44s (17.58%) |Others=0.41 (16.47%)|CurSamplesPerSec=12.91 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1234|ppo_ep: 1|act_loss: 0.05426025390625|cri_loss: 0.03143310546875|unsuper_loss: 0.0
+average reward score: 4.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.21%) |Training time=0.44s (20.25%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1235|ppo_ep: 1|act_loss: 0.03375244140625|cri_loss: 0.017425537109375|unsuper_loss: 0.0
+average reward score: 4.6484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.26%) |Training time=0.44s (20.18%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1236|ppo_ep: 1|act_loss: 0.01275634765625|cri_loss: 0.007781982421875|unsuper_loss: 0.0
+average reward score: 4.58984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.19%) |Training time=0.43s (18.57%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1237|ppo_ep: 1|act_loss: 0.0258026123046875|cri_loss: 0.0137481689453125|unsuper_loss: 0.0
+average reward score: 5.11328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.50%) |Training time=0.43s (19.96%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1238|ppo_ep: 1|act_loss: -0.0137786865234375|cri_loss: -0.006542205810546875|unsuper_loss: 0.0
+average reward score: 4.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.41%) |Training time=0.43s (20.04%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42
+[2023-04-14 09:33:16,086] [INFO] [logging.py:96:log_dist] [Rank 0] step=1240, skipped=15, lr=[9.204454758600558e-06, 9.204454758600558e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:33:16,212] [INFO] [timer.py:199:stop] epoch=0/micro_step=1240/global_step=1240, RunningAvgSamplesPerSec=109.67958562836411, CurrSamplesPerSec=85.63239207564769, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:33:16,305] [INFO] [logging.py:96:log_dist] [Rank 0] step=1240, skipped=17, lr=[4.769954807256458e-06, 4.769954807256458e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1239|ppo_ep: 1|act_loss: -0.1005859375|cri_loss: -0.047332763671875|unsuper_loss: 0.0
+average reward score: 4.453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.64s (72.10%) |Training time=0.54s (23.57%) |Others=0.10 (4.32%)|CurSamplesPerSec=14.06 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1240|ppo_ep: 1|act_loss: 0.018646240234375|cri_loss: 0.01007843017578125|unsuper_loss: 0.0
+average reward score: 5.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.09%) |Training time=0.43s (19.45%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.54 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1241|ppo_ep: 1|act_loss: 0.0440673828125|cri_loss: 0.023956298828125|unsuper_loss: 0.0
+average reward score: 4.54296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.74s (76.14%) |Training time=0.45s (19.54%) |Others=0.10 (4.32%)|CurSamplesPerSec=14.01 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1242|ppo_ep: 1|act_loss: -0.012939453125|cri_loss: -0.00576019287109375|unsuper_loss: 0.0
+average reward score: 4.5859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.61%) |Training time=0.43s (19.84%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1243|ppo_ep: 1|act_loss: -0.013214111328125|cri_loss: -0.0034637451171875|unsuper_loss: 0.0
+average reward score: 4.40234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.59%) |Training time=0.43s (19.87%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1244|ppo_ep: 1|act_loss: -0.0278167724609375|cri_loss: -0.0135498046875|unsuper_loss: 0.0
+average reward score: 5.50390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.85%) |Training time=0.42s (19.60%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1245|ppo_ep: 1|act_loss: -0.0090789794921875|cri_loss: -0.00386810302734375|unsuper_loss: 0.0
+average reward score: 4.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.53%) |Training time=0.43s (19.83%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1246|ppo_ep: 1|act_loss: -0.0220794677734375|cri_loss: -0.01053619384765625|unsuper_loss: 0.0
+average reward score: 4.65625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.50%) |Training time=0.43s (19.83%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1247|ppo_ep: 1|act_loss: -0.0011386871337890625|cri_loss: 0.00011157989501953125|unsuper_loss: 0.0
+average reward score: 3.87109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.74%) |Training time=0.43s (19.68%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1248|ppo_ep: 1|act_loss: 0.03851318359375|cri_loss: 0.021759033203125|unsuper_loss: 0.0
+average reward score: 4.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.88%) |Training time=0.42s (19.56%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.42
+[2023-04-14 09:33:38,001] [INFO] [logging.py:96:log_dist] [Rank 0] step=1250, skipped=15, lr=[9.196625731610486e-06, 9.196625731610486e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:33:38,019] [INFO] [timer.py:199:stop] epoch=0/micro_step=1250/global_step=1250, RunningAvgSamplesPerSec=109.75391174162782, CurrSamplesPerSec=119.57984018331966, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:33:38,112] [INFO] [logging.py:96:log_dist] [Rank 0] step=1250, skipped=17, lr=[4.765905038641933e-06, 4.765905038641933e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1249|ppo_ep: 1|act_loss: 0.003772735595703125|cri_loss: 0.0023212432861328125|unsuper_loss: 0.0
+average reward score: 4.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.63s |Gather latency=0.00s (0.00%) |Generate time=1.64s (62.29%) |Training time=0.43s (16.38%) |Others=0.56 (21.33%)|CurSamplesPerSec=12.18 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1250|ppo_ep: 1|act_loss: 0.05157470703125|cri_loss: 0.0281829833984375|unsuper_loss: 0.0
+average reward score: 3.8984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.64s (77.15%) |Training time=0.39s (18.21%) |Others=0.10 (4.64%)|CurSamplesPerSec=15.04 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1251|ppo_ep: 1|act_loss: -0.022918701171875|cri_loss: -0.0090789794921875|unsuper_loss: 0.0
+average reward score: 4.48828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.81s (77.81%) |Training time=0.42s (17.95%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1252|ppo_ep: 1|act_loss: 0.08087158203125|cri_loss: 0.043609619140625|unsuper_loss: 0.0
+average reward score: 3.96484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.87%) |Training time=0.42s (19.56%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1253|ppo_ep: 1|act_loss: 0.01342010498046875|cri_loss: 0.0078125|unsuper_loss: 0.0
+average reward score: 4.3046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.60%) |Training time=0.43s (19.84%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1254|ppo_ep: 1|act_loss: -0.002349853515625|cri_loss: -0.00026798248291015625|unsuper_loss: 0.0
+average reward score: 4.15234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.46%) |Training time=0.43s (19.98%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1255|ppo_ep: 1|act_loss: 0.020172119140625|cri_loss: 0.01102447509765625|unsuper_loss: 0.0
+average reward score: 4.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.01%) |Training time=0.44s (20.39%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1256|ppo_ep: 1|act_loss: 0.00833892822265625|cri_loss: 0.0047454833984375|unsuper_loss: 0.0
+average reward score: 4.07421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.31%) |Training time=0.44s (20.13%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1257|ppo_ep: 1|act_loss: 0.01318359375|cri_loss: 0.007080078125|unsuper_loss: 0.0
+average reward score: 4.17578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.22%) |Training time=0.44s (20.22%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1258|ppo_ep: 1|act_loss: 0.0224609375|cri_loss: 0.0117340087890625|unsuper_loss: 0.0
+average reward score: 4.3125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.92%) |Training time=0.44s (20.52%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42
+[2023-04-14 09:34:00,234] [INFO] [logging.py:96:log_dist] [Rank 0] step=1260, skipped=15, lr=[9.188731906581099e-06, 9.188731906581099e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:34:00,253] [INFO] [timer.py:199:stop] epoch=0/micro_step=1260/global_step=1260, RunningAvgSamplesPerSec=109.82973644022236, CurrSamplesPerSec=115.90207136793875, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:34:00,767] [INFO] [logging.py:96:log_dist] [Rank 0] step=1260, skipped=17, lr=[4.761821683848535e-06, 4.761821683848535e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1259|ppo_ep: 1|act_loss: -0.0025997161865234375|cri_loss: -9.34600830078125e-05|unsuper_loss: 0.0
+average reward score: 3.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.58s |Gather latency=0.00s (0.00%) |Generate time=1.62s (62.88%) |Training time=0.86s (33.31%) |Others=0.10 (3.81%)|CurSamplesPerSec=12.39 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1260|ppo_ep: 1|act_loss: -0.0119476318359375|cri_loss: -0.00537872314453125|unsuper_loss: 0.0
+average reward score: 4.02734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.03%) |Training time=0.44s (20.41%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1261|ppo_ep: 1|act_loss: -0.062286376953125|cri_loss: -0.030242919921875|unsuper_loss: 0.0
+average reward score: 4.45703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.00%) |Training time=0.44s (20.45%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1262|ppo_ep: 1|act_loss: -0.04656982421875|cri_loss: -0.022186279296875|unsuper_loss: 0.0
+average reward score: 3.5546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.83%) |Training time=0.45s (20.62%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1263|ppo_ep: 1|act_loss: -0.03692626953125|cri_loss: -0.0166778564453125|unsuper_loss: 0.0
+average reward score: 3.6015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.03%) |Training time=0.44s (20.41%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1264|ppo_ep: 1|act_loss: 0.04559326171875|cri_loss: 0.0249481201171875|unsuper_loss: 0.0
+average reward score: 4.08984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.97%) |Training time=0.44s (20.48%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1265|ppo_ep: 1|act_loss: 0.032470703125|cri_loss: 0.0174713134765625|unsuper_loss: 0.0
+average reward score: 3.62109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.05%) |Training time=0.50s (22.48%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.45 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1266|ppo_ep: 1|act_loss: -0.03363037109375|cri_loss: -0.01528167724609375|unsuper_loss: 0.0
+average reward score: 4.3828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.99%) |Training time=0.44s (20.46%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1267|ppo_ep: 1|act_loss: -0.1041259765625|cri_loss: -0.0447998046875|unsuper_loss: 0.0
+average reward score: 4.125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.04%) |Training time=0.44s (20.42%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1268|ppo_ep: 1|act_loss: -0.0225982666015625|cri_loss: -0.00713348388671875|unsuper_loss: 0.0
+average reward score: 3.890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.65%) |Training time=0.45s (20.79%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42
+[2023-04-14 09:34:22,397] [INFO] [logging.py:96:log_dist] [Rank 0] step=1270, skipped=15, lr=[9.180773400517926e-06, 9.180773400517926e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:34:22,416] [INFO] [timer.py:199:stop] epoch=0/micro_step=1270/global_step=1270, RunningAvgSamplesPerSec=109.86249534800284, CurrSamplesPerSec=121.51866314532418, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:34:22,508] [INFO] [logging.py:96:log_dist] [Rank 0] step=1270, skipped=17, lr=[4.757704803401435e-06, 4.757704803401435e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1269|ppo_ep: 1|act_loss: 0.058837890625|cri_loss: 0.031768798828125|unsuper_loss: 0.0
+average reward score: 4.7109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.69s (76.27%) |Training time=0.43s (19.29%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1270|ppo_ep: 1|act_loss: 0.06817626953125|cri_loss: 0.036041259765625|unsuper_loss: 0.0
+average reward score: 4.24609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.73s (76.10%) |Training time=0.44s (19.54%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1271|ppo_ep: 1|act_loss: 0.0235595703125|cri_loss: 0.0127410888671875|unsuper_loss: 0.0
+average reward score: 4.41796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.45s |Gather latency=0.00s (0.00%) |Generate time=1.62s (66.17%) |Training time=0.44s (18.14%) |Others=0.38 (15.69%)|CurSamplesPerSec=13.07 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1272|ppo_ep: 1|act_loss: 0.017425537109375|cri_loss: 0.0092315673828125|unsuper_loss: 0.0
+average reward score: 4.04296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.16%) |Training time=0.44s (20.29%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1273|ppo_ep: 1|act_loss: 0.0494384765625|cri_loss: 0.0261993408203125|unsuper_loss: 0.0
+average reward score: 4.09375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.29%) |Training time=0.44s (20.15%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1274|ppo_ep: 1|act_loss: -0.03271484375|cri_loss: -0.011749267578125|unsuper_loss: 0.0
+average reward score: 4.09765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.27%) |Training time=0.44s (20.15%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1275|ppo_ep: 1|act_loss: -0.03143310546875|cri_loss: -0.0136566162109375|unsuper_loss: 0.0
+average reward score: 5.04296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.49%) |Training time=0.43s (19.95%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1276|ppo_ep: 1|act_loss: -0.00504302978515625|cri_loss: -0.0022125244140625|unsuper_loss: 0.0
+average reward score: 4.6953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.41%) |Training time=0.43s (20.03%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1277|ppo_ep: 1|act_loss: -0.0264892578125|cri_loss: -0.011505126953125|unsuper_loss: 0.0
+average reward score: 4.92578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.32%) |Training time=0.44s (20.13%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1278|ppo_ep: 1|act_loss: 0.00826263427734375|cri_loss: 0.005523681640625|unsuper_loss: 0.0
+average reward score: 4.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.55%) |Training time=0.43s (19.90%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42
+[2023-04-14 09:34:44,417] [INFO] [logging.py:96:log_dist] [Rank 0] step=1280, skipped=15, lr=[9.172750331385226e-06, 9.172750331385226e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:34:44,435] [INFO] [timer.py:199:stop] epoch=0/micro_step=1280/global_step=1280, RunningAvgSamplesPerSec=109.9137738235626, CurrSamplesPerSec=118.03625573723781, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:34:44,519] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+[2023-04-14 09:34:44,520] [INFO] [logging.py:96:log_dist] [Rank 0] step=1280, skipped=18, lr=[4.753970996989642e-06, 4.753970996989642e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1279|ppo_ep: 1|act_loss: -0.0079345703125|cri_loss: -0.003147125244140625|unsuper_loss: 0.0
+average reward score: 4.71875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.76%) |Training time=0.43s (20.16%) |Others=0.09 (4.07%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.42
+[2023-04-14 09:34:46,838] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 1280|ppo_ep: 1|act_loss: -0.0105133056640625|cri_loss: -0.004421234130859375|unsuper_loss: 0.0
+average reward score: 4.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.66%) |Training time=0.43s (18.54%) |Others=0.09 (3.80%)|CurSamplesPerSec=13.80 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1281|ppo_ep: 1|act_loss: 0.026824951171875|cri_loss: 0.01517486572265625|unsuper_loss: 0.0
+average reward score: 5.51953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.81%) |Training time=0.45s (20.63%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
+[2023-04-14 09:34:51,046] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 1282|ppo_ep: 1|act_loss: -0.035003662109375|cri_loss: -0.0055084228515625|unsuper_loss: 0.0
+average reward score: 4.12109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.86%) |Training time=0.42s (19.55%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1283|ppo_ep: 1|act_loss: -0.0618896484375|cri_loss: -0.0242919921875|unsuper_loss: 0.0
+average reward score: 4.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.62s |Gather latency=0.00s (0.00%) |Generate time=1.62s (61.85%) |Training time=0.44s (16.90%) |Others=0.56 (21.25%)|CurSamplesPerSec=12.21 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1284|ppo_ep: 1|act_loss: 0.0521240234375|cri_loss: 0.031524658203125|unsuper_loss: 0.0
+average reward score: 4.33984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.32%) |Training time=0.41s (19.09%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1285|ppo_ep: 1|act_loss: -0.01483154296875|cri_loss: 0.003387451171875|unsuper_loss: 0.0
+average reward score: 4.33984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.55%) |Training time=0.45s (20.90%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1286|ppo_ep: 1|act_loss: 0.0240478515625|cri_loss: 0.0176544189453125|unsuper_loss: 0.0
+average reward score: 4.65234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.78%) |Training time=0.45s (20.66%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1287|ppo_ep: 1|act_loss: 0.038238525390625|cri_loss: 0.034881591796875|unsuper_loss: 0.0
+average reward score: 4.04296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.77%) |Training time=0.45s (20.66%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
+[2023-04-14 09:35:04,453] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096
+epoch: 0|step: 1288|ppo_ep: 1|act_loss: 0.074951171875|cri_loss: 0.040374755859375|unsuper_loss: 0.0
+average reward score: 4.43359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.51%) |Training time=0.43s (19.90%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.42
+[2023-04-14 09:35:06,600] [INFO] [logging.py:96:log_dist] [Rank 0] step=1290, skipped=17, lr=[9.166285470551969e-06, 9.166285470551969e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:35:06,618] [INFO] [timer.py:199:stop] epoch=0/micro_step=1290/global_step=1290, RunningAvgSamplesPerSec=109.96179691661273, CurrSamplesPerSec=110.87710045939353, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:35:06,710] [INFO] [logging.py:96:log_dist] [Rank 0] step=1290, skipped=19, lr=[4.750210129048555e-06, 4.750210129048555e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1289|ppo_ep: 1|act_loss: 0.126953125|cri_loss: 0.0889892578125|unsuper_loss: 0.0
+average reward score: 4.3125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.60%) |Training time=0.45s (20.88%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1290|ppo_ep: 1|act_loss: 0.0721435546875|cri_loss: 0.037872314453125|unsuper_loss: 0.0
+average reward score: 4.87109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.65%) |Training time=0.45s (20.80%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1291|ppo_ep: 1|act_loss: -0.00913238525390625|cri_loss: 0.00107574462890625|unsuper_loss: 0.0
+average reward score: 4.83984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.71%) |Training time=0.45s (20.72%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1292|ppo_ep: 1|act_loss: 0.02349853515625|cri_loss: 0.01522064208984375|unsuper_loss: 0.0
+average reward score: 4.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.94%) |Training time=0.44s (20.49%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1293|ppo_ep: 1|act_loss: -0.00255584716796875|cri_loss: 0.004985809326171875|unsuper_loss: 0.0
+average reward score: 4.85546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.63s (71.22%) |Training time=0.44s (19.20%) |Others=0.22 (9.58%)|CurSamplesPerSec=14.00 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1294|ppo_ep: 1|act_loss: 0.004199981689453125|cri_loss: 0.0028781890869140625|unsuper_loss: 0.0
+average reward score: 4.55078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.29%) |Training time=0.46s (20.78%) |Others=0.13 (5.93%)|CurSamplesPerSec=14.45 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1295|ppo_ep: 1|act_loss: -0.0011138916015625|cri_loss: 0.0023860931396484375|unsuper_loss: 0.0
+average reward score: 4.6328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.69s (75.54%) |Training time=0.44s (19.73%) |Others=0.11 (4.73%)|CurSamplesPerSec=14.29 |AvgSamplesPerSec=14.42
+[2023-04-14 09:35:21,992] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 4096, reducing to 2048
+[2023-04-14 09:35:22,078] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
+epoch: 0|step: 1296|ppo_ep: 1|act_loss: -0.000152587890625|cri_loss: 0.0015926361083984375|unsuper_loss: 0.0
+average reward score: 4.52734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.39%) |Training time=0.42s (19.43%) |Others=0.09 (4.18%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1297|ppo_ep: 1|act_loss: -0.1217041015625|cri_loss: -0.04266357421875|unsuper_loss: 0.0
+average reward score: 4.83203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.76%) |Training time=0.45s (20.68%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1298|ppo_ep: 1|act_loss: -0.1729736328125|cri_loss: -0.057708740234375|unsuper_loss: 0.0
+average reward score: 3.98046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.65s (69.72%) |Training time=0.44s (18.58%) |Others=0.28 (11.70%)|CurSamplesPerSec=13.53 |AvgSamplesPerSec=14.42
+[2023-04-14 09:35:28,767] [INFO] [logging.py:96:log_dist] [Rank 0] step=1300, skipped=18, lr=[9.158963278694846e-06, 9.158963278694846e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:35:28,785] [INFO] [timer.py:199:stop] epoch=0/micro_step=1300/global_step=1300, RunningAvgSamplesPerSec=110.0024475916536, CurrSamplesPerSec=127.61274741908309, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:35:28,879] [INFO] [logging.py:96:log_dist] [Rank 0] step=1300, skipped=20, lr=[4.746422244731743e-06, 4.746422244731743e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1299|ppo_ep: 1|act_loss: 0.052490234375|cri_loss: 0.03717041015625|unsuper_loss: 0.0
+average reward score: 5.03125
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.75s (77.32%) |Training time=0.41s (18.29%) |Others=0.10 (4.38%)|CurSamplesPerSec=14.15 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1300|ppo_ep: 1|act_loss: 0.082763671875|cri_loss: 0.052001953125|unsuper_loss: 0.0
+average reward score: 3.94921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.73%) |Training time=0.45s (20.73%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1301|ppo_ep: 1|act_loss: 0.08441162109375|cri_loss: 0.04736328125|unsuper_loss: 0.0
+average reward score: 4.875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.97%) |Training time=0.44s (20.47%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1302|ppo_ep: 1|act_loss: 0.0318603515625|cri_loss: 0.0181427001953125|unsuper_loss: 0.0
+average reward score: 4.953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.76%) |Training time=0.45s (20.60%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1303|ppo_ep: 1|act_loss: 0.031951904296875|cri_loss: 0.020355224609375|unsuper_loss: 0.0
+average reward score: 4.77734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.87%) |Training time=0.45s (20.58%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1304|ppo_ep: 1|act_loss: -0.00981903076171875|cri_loss: -0.0034885406494140625|unsuper_loss: 0.0
+average reward score: 4.796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.75%) |Training time=0.45s (20.67%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1305|ppo_ep: 1|act_loss: -0.0030975341796875|cri_loss: -0.0007276535034179688|unsuper_loss: 0.0
+average reward score: 4.73828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.02%) |Training time=0.44s (20.42%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1306|ppo_ep: 1|act_loss: 0.209228515625|cri_loss: 0.11376953125|unsuper_loss: 0.0
+average reward score: 4.63671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.80%) |Training time=0.45s (20.65%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1307|ppo_ep: 1|act_loss: 2.288818359375e-05|cri_loss: 0.0047454833984375|unsuper_loss: 0.0
+average reward score: 4.25
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.84%) |Training time=0.44s (20.58%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1308|ppo_ep: 1|act_loss: -0.1138916015625|cri_loss: -0.0352783203125|unsuper_loss: 0.0
+average reward score: 4.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.95%) |Training time=0.44s (20.48%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
+[2023-04-14 09:35:50,565] [INFO] [logging.py:96:log_dist] [Rank 0] step=1310, skipped=18, lr=[9.150766485988878e-06, 9.150766485988878e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:35:50,950] [INFO] [timer.py:199:stop] epoch=0/micro_step=1310/global_step=1310, RunningAvgSamplesPerSec=109.92256089659742, CurrSamplesPerSec=49.722884509588546, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:35:51,044] [INFO] [logging.py:96:log_dist] [Rank 0] step=1310, skipped=20, lr=[4.742181853831721e-06, 4.742181853831721e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1309|ppo_ep: 1|act_loss: -0.1129150390625|cri_loss: -0.009521484375|unsuper_loss: 0.0
+average reward score: 4.7578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.69s |Gather latency=0.00s (0.00%) |Generate time=1.79s (66.42%) |Training time=0.81s (29.92%) |Others=0.10 (3.66%)|CurSamplesPerSec=11.88 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1310|ppo_ep: 1|act_loss: -0.14404296875|cri_loss: -0.050750732421875|unsuper_loss: 0.0
+average reward score: 5.12890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.92%) |Training time=0.44s (20.53%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1311|ppo_ep: 1|act_loss: 0.021026611328125|cri_loss: 0.028533935546875|unsuper_loss: 0.0
+average reward score: 5.12109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.80%) |Training time=0.45s (20.64%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1312|ppo_ep: 1|act_loss: 0.08160400390625|cri_loss: 0.04974365234375|unsuper_loss: 0.0
+average reward score: 4.71875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.04%) |Training time=0.44s (20.40%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1313|ppo_ep: 1|act_loss: 0.031280517578125|cri_loss: 0.0166015625|unsuper_loss: 0.0
+average reward score: 4.6484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.81%) |Training time=0.45s (20.64%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1314|ppo_ep: 1|act_loss: 0.0399169921875|cri_loss: 0.0246429443359375|unsuper_loss: 0.0
+average reward score: 4.23828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.48%) |Training time=0.43s (19.95%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1315|ppo_ep: 1|act_loss: -0.033721923828125|cri_loss: -0.014495849609375|unsuper_loss: 0.0
+average reward score: 4.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.71%) |Training time=0.45s (20.73%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1316|ppo_ep: 1|act_loss: 0.016845703125|cri_loss: 0.015380859375|unsuper_loss: 0.0
+average reward score: 4.87109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.61%) |Training time=0.43s (19.78%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1317|ppo_ep: 1|act_loss: -0.05865478515625|cri_loss: -0.0282440185546875|unsuper_loss: 0.0
+average reward score: 4.81640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.60%) |Training time=0.43s (19.83%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1318|ppo_ep: 1|act_loss: -0.00882720947265625|cri_loss: -0.00371551513671875|unsuper_loss: 0.0
+average reward score: 4.765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.46%) |Training time=0.43s (19.98%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.42
+[2023-04-14 09:36:12,565] [INFO] [logging.py:96:log_dist] [Rank 0] step=1320, skipped=18, lr=[9.142505574988243e-06, 9.142505574988243e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:36:12,583] [INFO] [timer.py:199:stop] epoch=0/micro_step=1320/global_step=1320, RunningAvgSamplesPerSec=109.96346053463445, CurrSamplesPerSec=112.99348394896265, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:36:12,676] [INFO] [logging.py:96:log_dist] [Rank 0] step=1320, skipped=20, lr=[4.737908228387656e-06, 4.737908228387656e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1319|ppo_ep: 1|act_loss: 0.0806884765625|cri_loss: 0.045318603515625|unsuper_loss: 0.0
+average reward score: 4.796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.42s |Gather latency=0.00s (0.00%) |Generate time=1.62s (67.12%) |Training time=0.45s (18.45%) |Others=0.35 (14.44%)|CurSamplesPerSec=13.24 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1320|ppo_ep: 1|act_loss: 0.0528564453125|cri_loss: 0.027801513671875|unsuper_loss: 0.0
+average reward score: 4.83203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.92%) |Training time=0.44s (20.52%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1321|ppo_ep: 1|act_loss: 0.047027587890625|cri_loss: 0.02423095703125|unsuper_loss: 0.0
+average reward score: 5.3046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.88%) |Training time=0.44s (20.56%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1322|ppo_ep: 1|act_loss: 0.0877685546875|cri_loss: 0.050018310546875|unsuper_loss: 0.0
+average reward score: 5.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.92%) |Training time=0.44s (20.53%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1323|ppo_ep: 1|act_loss: 0.02587890625|cri_loss: 0.01342010498046875|unsuper_loss: 0.0
+average reward score: 4.7421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.74%) |Training time=0.44s (20.08%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1324|ppo_ep: 1|act_loss: 0.005809783935546875|cri_loss: 0.00439453125|unsuper_loss: 0.0
+average reward score: 5.30078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.62s |Gather latency=0.00s (0.00%) |Generate time=1.80s (68.66%) |Training time=0.44s (16.72%) |Others=0.38 (14.63%)|CurSamplesPerSec=12.22 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1325|ppo_ep: 1|act_loss: 0.014617919921875|cri_loss: 0.0078277587890625|unsuper_loss: 0.0
+average reward score: 4.8046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.12%) |Training time=0.44s (20.29%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1326|ppo_ep: 1|act_loss: 0.04364013671875|cri_loss: 0.0245361328125|unsuper_loss: 0.0
+average reward score: 4.88671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.52%) |Training time=0.45s (20.56%) |Others=0.11 (4.92%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1327|ppo_ep: 1|act_loss: -0.030120849609375|cri_loss: -0.01197052001953125|unsuper_loss: 0.0
+average reward score: 4.8828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.13%) |Training time=0.44s (20.34%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1328|ppo_ep: 1|act_loss: 0.016876220703125|cri_loss: 0.01165771484375|unsuper_loss: 0.0
+average reward score: 4.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.74s (76.74%) |Training time=0.43s (18.87%) |Others=0.10 (4.39%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.42
+[2023-04-14 09:36:35,048] [INFO] [logging.py:96:log_dist] [Rank 0] step=1330, skipped=18, lr=[9.134180668139572e-06, 9.134180668139572e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:36:35,574] [INFO] [timer.py:199:stop] epoch=0/micro_step=1330/global_step=1330, RunningAvgSamplesPerSec=109.86232216762367, CurrSamplesPerSec=41.83281054421599, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:36:35,667] [INFO] [logging.py:96:log_dist] [Rank 0] step=1330, skipped=20, lr=[4.733601431744987e-06, 4.733601431744987e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1329|ppo_ep: 1|act_loss: 0.00733184814453125|cri_loss: 0.004749298095703125|unsuper_loss: 0.0
+average reward score: 4.1796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.67s |Gather latency=0.00s (0.00%) |Generate time=1.64s (61.50%) |Training time=0.93s (34.79%) |Others=0.10 (3.71%)|CurSamplesPerSec=12.00 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1330|ppo_ep: 1|act_loss: 0.0728759765625|cri_loss: 0.03802490234375|unsuper_loss: 0.0
+average reward score: 5.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.93%) |Training time=0.44s (20.51%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1331|ppo_ep: 1|act_loss: 0.013946533203125|cri_loss: 0.00748443603515625|unsuper_loss: 0.0
+average reward score: 4.7109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.00%) |Training time=0.44s (20.44%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1332|ppo_ep: 1|act_loss: -0.0016345977783203125|cri_loss: 0.0003871917724609375|unsuper_loss: 0.0
+average reward score: 4.7421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.33%) |Training time=0.43s (20.11%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1333|ppo_ep: 1|act_loss: -0.047210693359375|cri_loss: -0.0224609375|unsuper_loss: 0.0
+average reward score: 4.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.00%) |Training time=0.44s (20.38%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1334|ppo_ep: 1|act_loss: -0.0223236083984375|cri_loss: -0.0106201171875|unsuper_loss: 0.0
+average reward score: 4.67578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.76%) |Training time=0.45s (20.69%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1335|ppo_ep: 1|act_loss: -0.031768798828125|cri_loss: -0.0149383544921875|unsuper_loss: 0.0
+average reward score: 4.6484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.94%) |Training time=0.44s (20.49%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1336|ppo_ep: 1|act_loss: -0.0111541748046875|cri_loss: -0.005096435546875|unsuper_loss: 0.0
+average reward score: 4.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.62s (68.90%) |Training time=0.44s (18.63%) |Others=0.29 (12.47%)|CurSamplesPerSec=13.58 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1337|ppo_ep: 1|act_loss: 0.04083251953125|cri_loss: 0.023773193359375|unsuper_loss: 0.0
+average reward score: 4.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.96%) |Training time=0.44s (20.47%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1338|ppo_ep: 1|act_loss: 0.0168609619140625|cri_loss: 0.009246826171875|unsuper_loss: 0.0
+average reward score: 4.48828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.96%) |Training time=0.44s (20.48%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
+[2023-04-14 09:36:57,425] [INFO] [logging.py:96:log_dist] [Rank 0] step=1340, skipped=18, lr=[9.125791888838067e-06, 9.125791888838067e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:36:57,437] [INFO] [timer.py:199:stop] epoch=0/micro_step=1340/global_step=1340, RunningAvgSamplesPerSec=109.8848984580583, CurrSamplesPerSec=100.39286272603175, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:36:57,549] [INFO] [logging.py:96:log_dist] [Rank 0] step=1340, skipped=20, lr=[4.729261527740829e-06, 4.729261527740829e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1339|ppo_ep: 1|act_loss: 0.03173828125|cri_loss: 0.01629638671875|unsuper_loss: 0.0
+average reward score: 4.37890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.09%) |Training time=0.49s (22.06%) |Others=0.11 (4.85%)|CurSamplesPerSec=14.35 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1340|ppo_ep: 1|act_loss: 0.06378173828125|cri_loss: 0.037933349609375|unsuper_loss: 0.0
+average reward score: 4.14453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.80%) |Training time=0.45s (20.65%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1341|ppo_ep: 1|act_loss: 0.00018310546875|cri_loss: 0.0010709762573242188|unsuper_loss: 0.0
+average reward score: 4.68359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.92%) |Training time=0.44s (20.53%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1342|ppo_ep: 1|act_loss: 0.042938232421875|cri_loss: 0.0228118896484375|unsuper_loss: 0.0
+average reward score: 5.01171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.00%) |Training time=0.44s (20.46%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1343|ppo_ep: 1|act_loss: -0.014617919921875|cri_loss: -0.00555419921875|unsuper_loss: 0.0
+average reward score: 3.982421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.58%) |Training time=0.43s (19.86%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1344|ppo_ep: 1|act_loss: 0.02569580078125|cri_loss: 0.014404296875|unsuper_loss: 0.0
+average reward score: 4.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.90%) |Training time=0.40s (18.48%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1345|ppo_ep: 1|act_loss: 0.1107177734375|cri_loss: 0.06866455078125|unsuper_loss: 0.0
+average reward score: 4.34375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.83%) |Training time=0.45s (20.60%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1346|ppo_ep: 1|act_loss: -0.05108642578125|cri_loss: -0.02252197265625|unsuper_loss: 0.0
+average reward score: 4.9453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.98%) |Training time=0.47s (21.53%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1347|ppo_ep: 1|act_loss: -0.043212890625|cri_loss: -0.020538330078125|unsuper_loss: 0.0
+average reward score: 5.09375
+-------------------------------------------------------------------------------------
+|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.62s (76.96%) |Training time=0.39s (18.37%) |Others=0.10 (4.68%)|CurSamplesPerSec=15.20 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1348|ppo_ep: 1|act_loss: 0.0045166015625|cri_loss: 0.00312042236328125|unsuper_loss: 0.0
+average reward score: 4.6953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.55%) |Training time=0.45s (20.92%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.42
+[2023-04-14 09:37:19,283] [INFO] [logging.py:96:log_dist] [Rank 0] step=1350, skipped=18, lr=[9.117339361425675e-06, 9.117339361425675e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:37:19,301] [INFO] [timer.py:199:stop] epoch=0/micro_step=1350/global_step=1350, RunningAvgSamplesPerSec=109.93319858905161, CurrSamplesPerSec=114.07213302011552, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:37:19,394] [INFO] [logging.py:96:log_dist] [Rank 0] step=1350, skipped=20, lr=[4.72488858070303e-06, 4.72488858070303e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1349|ppo_ep: 1|act_loss: -0.035736083984375|cri_loss: -0.01666259765625|unsuper_loss: 0.0
+average reward score: 4.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.11%) |Training time=0.44s (20.37%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1350|ppo_ep: 1|act_loss: -0.00806427001953125|cri_loss: -0.0030517578125|unsuper_loss: 0.0
+average reward score: 4.8515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.02%) |Training time=0.45s (20.48%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1351|ppo_ep: 1|act_loss: -0.015472412109375|cri_loss: -0.00713348388671875|unsuper_loss: 0.0
+average reward score: 5.0859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.71%) |Training time=0.46s (20.82%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1352|ppo_ep: 1|act_loss: 0.00513458251953125|cri_loss: 0.0028324127197265625|unsuper_loss: 0.0
+average reward score: 4.546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.37%) |Training time=0.46s (21.11%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1353|ppo_ep: 1|act_loss: -0.00102996826171875|cri_loss: 0.0009441375732421875|unsuper_loss: 0.0
+average reward score: 3.984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.71%) |Training time=0.45s (20.75%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1354|ppo_ep: 1|act_loss: -0.0282135009765625|cri_loss: -0.01287841796875|unsuper_loss: 0.0
+average reward score: 4.58984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.74%) |Training time=0.43s (19.70%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1355|ppo_ep: 1|act_loss: 0.0009326934814453125|cri_loss: 0.0012836456298828125|unsuper_loss: 0.0
+average reward score: 4.5546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.45%) |Training time=0.43s (20.01%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1356|ppo_ep: 1|act_loss: -0.025543212890625|cri_loss: -0.0117034912109375|unsuper_loss: 0.0
+average reward score: 4.8828125
+-------------------------------------------------------------------------------------
+|E2E latency=3.70s |Gather latency=0.00s (0.00%) |Generate time=1.81s (49.06%) |Training time=0.44s (11.88%) |Others=1.44 (39.06%)|CurSamplesPerSec=8.65 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1357|ppo_ep: 1|act_loss: 0.0008974075317382812|cri_loss: 0.001956939697265625|unsuper_loss: 0.0
+average reward score: 5.24609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.73s (76.27%) |Training time=0.44s (19.26%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.09 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1358|ppo_ep: 1|act_loss: 0.018463134765625|cri_loss: 0.010040283203125|unsuper_loss: 0.0
+average reward score: 4.72265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.81%) |Training time=0.43s (19.63%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42
+[2023-04-14 09:37:42,662] [INFO] [logging.py:96:log_dist] [Rank 0] step=1360, skipped=18, lr=[9.10882321118924e-06, 9.10882321118924e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:37:42,680] [INFO] [timer.py:199:stop] epoch=0/micro_step=1360/global_step=1360, RunningAvgSamplesPerSec=109.96246013017377, CurrSamplesPerSec=110.81549098318833, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:37:42,773] [INFO] [logging.py:96:log_dist] [Rank 0] step=1360, skipped=20, lr=[4.720482655449212e-06, 4.720482655449212e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1359|ppo_ep: 1|act_loss: 0.0360107421875|cri_loss: 0.0187225341796875|unsuper_loss: 0.0
+average reward score: 4.9921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.74%) |Training time=0.45s (20.74%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1360|ppo_ep: 1|act_loss: 0.0202484130859375|cri_loss: 0.0105438232421875|unsuper_loss: 0.0
+average reward score: 5.25
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.40%) |Training time=0.46s (21.07%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1361|ppo_ep: 1|act_loss: 0.0132904052734375|cri_loss: 0.007068634033203125|unsuper_loss: 0.0
+average reward score: 4.32421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.58%) |Training time=0.45s (20.86%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1362|ppo_ep: 1|act_loss: -0.06787109375|cri_loss: -0.032440185546875|unsuper_loss: 0.0
+average reward score: 5.04296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.30%) |Training time=0.46s (21.17%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1363|ppo_ep: 1|act_loss: -0.052978515625|cri_loss: -0.02508544921875|unsuper_loss: 0.0
+average reward score: 5.30859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.46%) |Training time=0.46s (20.91%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1364|ppo_ep: 1|act_loss: -0.09228515625|cri_loss: -0.043792724609375|unsuper_loss: 0.0
+average reward score: 5.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.14%) |Training time=0.47s (21.34%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1365|ppo_ep: 1|act_loss: -0.03424072265625|cri_loss: -0.0164642333984375|unsuper_loss: 0.0
+average reward score: 4.921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.29%) |Training time=0.46s (21.19%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1366|ppo_ep: 1|act_loss: -0.0079803466796875|cri_loss: -0.0024280548095703125|unsuper_loss: 0.0
+average reward score: 4.9375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.89%) |Training time=0.45s (20.59%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1367|ppo_ep: 1|act_loss: 0.0533447265625|cri_loss: 0.0282745361328125|unsuper_loss: 0.0
+average reward score: 5.24609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.47s |Gather latency=0.00s (0.00%) |Generate time=1.62s (65.55%) |Training time=0.46s (18.74%) |Others=0.39 (15.72%)|CurSamplesPerSec=12.95 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1368|ppo_ep: 1|act_loss: -0.009552001953125|cri_loss: -0.0042877197265625|unsuper_loss: 0.0
+average reward score: 5.08984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.40%) |Training time=0.46s (21.07%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.42
+[2023-04-14 09:38:04,730] [INFO] [logging.py:96:log_dist] [Rank 0] step=1370, skipped=18, lr=[9.100243564358655e-06, 9.100243564358655e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:38:04,748] [INFO] [timer.py:199:stop] epoch=0/micro_step=1370/global_step=1370, RunningAvgSamplesPerSec=109.94952050263434, CurrSamplesPerSec=109.40402084107701, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:38:04,841] [INFO] [logging.py:96:log_dist] [Rank 0] step=1370, skipped=20, lr=[4.716043817285819e-06, 4.716043817285819e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1369|ppo_ep: 1|act_loss: 0.04583740234375|cri_loss: 0.0237579345703125|unsuper_loss: 0.0
+average reward score: 5.40234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.54%) |Training time=0.46s (20.94%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1370|ppo_ep: 1|act_loss: 0.039947509765625|cri_loss: 0.0226593017578125|unsuper_loss: 0.0
+average reward score: 4.58203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.57%) |Training time=0.46s (20.94%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1371|ppo_ep: 1|act_loss: 0.01641845703125|cri_loss: 0.0088348388671875|unsuper_loss: 0.0
+average reward score: 5.04296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.47%) |Training time=0.45s (19.29%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1372|ppo_ep: 1|act_loss: 0.012786865234375|cri_loss: 0.007312774658203125|unsuper_loss: 0.0
+average reward score: 4.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.91%) |Training time=0.47s (21.22%) |Others=0.11 (4.86%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1373|ppo_ep: 1|act_loss: 0.037200927734375|cri_loss: 0.023590087890625|unsuper_loss: 0.0
+average reward score: 4.7421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.93%) |Training time=0.45s (20.55%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1374|ppo_ep: 1|act_loss: -0.037384033203125|cri_loss: -0.01812744140625|unsuper_loss: 0.0
+average reward score: 5.09375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.33%) |Training time=0.46s (21.15%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1375|ppo_ep: 1|act_loss: -0.021392822265625|cri_loss: -0.00673675537109375|unsuper_loss: 0.0
+average reward score: 4.43359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.21%) |Training time=0.46s (21.27%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1376|ppo_ep: 1|act_loss: 0.03839111328125|cri_loss: 0.020843505859375|unsuper_loss: 0.0
+average reward score: 4.7578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.26%) |Training time=0.46s (21.21%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1377|ppo_ep: 1|act_loss: 0.05328369140625|cri_loss: 0.0286865234375|unsuper_loss: 0.0
+average reward score: 4.87890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.99%) |Training time=0.47s (21.46%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1378|ppo_ep: 1|act_loss: 0.003376007080078125|cri_loss: 0.0021820068359375|unsuper_loss: 0.0
+average reward score: 4.6171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.39%) |Training time=0.46s (21.09%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.42
+[2023-04-14 09:38:26,722] [INFO] [logging.py:96:log_dist] [Rank 0] step=1380, skipped=18, lr=[9.091600548104982e-06, 9.091600548104982e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:38:26,740] [INFO] [timer.py:199:stop] epoch=0/micro_step=1380/global_step=1380, RunningAvgSamplesPerSec=109.93347342240803, CurrSamplesPerSec=109.94820174058928, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:38:26,833] [INFO] [logging.py:96:log_dist] [Rank 0] step=1380, skipped=20, lr=[4.711572132007139e-06, 4.711572132007139e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1379|ppo_ep: 1|act_loss: 0.05718994140625|cri_loss: 0.0311737060546875|unsuper_loss: 0.0
+average reward score: 4.49609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.42%) |Training time=0.46s (20.95%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1380|ppo_ep: 1|act_loss: 0.00634765625|cri_loss: 0.0057830810546875|unsuper_loss: 0.0
+average reward score: 4.625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.11%) |Training time=0.47s (21.36%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1381|ppo_ep: 1|act_loss: -0.0004782676696777344|cri_loss: 3.814697265625e-06|unsuper_loss: 0.0
+average reward score: 4.6953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.61%) |Training time=0.45s (20.88%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1382|ppo_ep: 1|act_loss: -0.0220947265625|cri_loss: -0.0086822509765625|unsuper_loss: 0.0
+average reward score: 4.65625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.37%) |Training time=0.46s (21.11%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1383|ppo_ep: 1|act_loss: 0.0055999755859375|cri_loss: 0.00392913818359375|unsuper_loss: 0.0
+average reward score: 4.79296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.45%) |Training time=0.46s (21.01%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1384|ppo_ep: 1|act_loss: -0.0077667236328125|cri_loss: -0.003047943115234375|unsuper_loss: 0.0
+average reward score: 4.5703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.73s (75.31%) |Training time=0.47s (20.39%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.94 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1385|ppo_ep: 1|act_loss: -0.040283203125|cri_loss: -0.0183868408203125|unsuper_loss: 0.0
+average reward score: 5.046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.82s (76.63%) |Training time=0.46s (19.22%) |Others=0.10 (4.15%)|CurSamplesPerSec=13.49 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1386|ppo_ep: 1|act_loss: -0.00937652587890625|cri_loss: -0.00380706787109375|unsuper_loss: 0.0
+average reward score: 4.546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.73s (75.67%) |Training time=0.46s (20.02%) |Others=0.10 (4.31%)|CurSamplesPerSec=14.02 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1387|ppo_ep: 1|act_loss: -0.01216888427734375|cri_loss: -0.0052642822265625|unsuper_loss: 0.0
+average reward score: 4.80859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.95%) |Training time=0.47s (21.50%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1388|ppo_ep: 1|act_loss: 0.021453857421875|cri_loss: 0.0120391845703125|unsuper_loss: 0.0
+average reward score: 4.8046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.38s |Gather latency=0.00s (0.00%) |Generate time=1.65s (69.38%) |Training time=0.44s (18.50%) |Others=0.29 (12.11%)|CurSamplesPerSec=13.42 |AvgSamplesPerSec=14.42
+[2023-04-14 09:38:49,163] [INFO] [logging.py:96:log_dist] [Rank 0] step=1390, skipped=18, lr=[9.082894290538575e-06, 9.082894290538575e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:38:49,180] [INFO] [timer.py:199:stop] epoch=0/micro_step=1390/global_step=1390, RunningAvgSamplesPerSec=109.91676030089931, CurrSamplesPerSec=106.04061561798818, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:38:49,273] [INFO] [logging.py:96:log_dist] [Rank 0] step=1390, skipped=20, lr=[4.707067665894335e-06, 4.707067665894335e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1389|ppo_ep: 1|act_loss: 0.0421142578125|cri_loss: 0.0222015380859375|unsuper_loss: 0.0
+average reward score: 4.61328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.25%) |Training time=0.46s (21.25%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1390|ppo_ep: 1|act_loss: 0.01336669921875|cri_loss: 0.006893157958984375|unsuper_loss: 0.0
+average reward score: 4.62109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.60%) |Training time=0.46s (20.91%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1391|ppo_ep: 1|act_loss: 0.06072998046875|cri_loss: 0.03216552734375|unsuper_loss: 0.0
+average reward score: 4.89453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.21%) |Training time=0.47s (21.29%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1392|ppo_ep: 1|act_loss: -0.034271240234375|cri_loss: -0.0163726806640625|unsuper_loss: 0.0
+average reward score: 4.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.46%) |Training time=0.46s (21.02%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1393|ppo_ep: 1|act_loss: 0.08160400390625|cri_loss: 0.043060302734375|unsuper_loss: 0.0
+average reward score: 4.61328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.03%) |Training time=0.45s (20.49%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1394|ppo_ep: 1|act_loss: -0.052520751953125|cri_loss: -0.0250244140625|unsuper_loss: 0.0
+average reward score: 4.671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.62s (69.56%) |Training time=0.46s (19.95%) |Others=0.24 (10.49%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1395|ppo_ep: 1|act_loss: -0.0552978515625|cri_loss: -0.024566650390625|unsuper_loss: 0.0
+average reward score: 4.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.01%) |Training time=0.47s (21.47%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1396|ppo_ep: 1|act_loss: -0.06494140625|cri_loss: -0.030853271484375|unsuper_loss: 0.0
+average reward score: 4.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.22%) |Training time=0.47s (21.28%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1397|ppo_ep: 1|act_loss: -0.051849365234375|cri_loss: -0.0247039794921875|unsuper_loss: 0.0
+average reward score: 5.23828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.59%) |Training time=0.45s (20.87%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1398|ppo_ep: 1|act_loss: -0.0325927734375|cri_loss: -0.0157318115234375|unsuper_loss: 0.0
+average reward score: 4.72265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.21%) |Training time=0.42s (19.24%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.42
+[2023-04-14 09:39:11,139] [INFO] [logging.py:96:log_dist] [Rank 0] step=1400, skipped=18, lr=[9.074124920707169e-06, 9.074124920707169e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:39:11,157] [INFO] [timer.py:199:stop] epoch=0/micro_step=1400/global_step=1400, RunningAvgSamplesPerSec=109.92654260577895, CurrSamplesPerSec=134.14327848044334, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:39:11,252] [INFO] [logging.py:96:log_dist] [Rank 0] step=1400, skipped=20, lr=[4.702530485714462e-06, 4.702530485714462e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1399|ppo_ep: 1|act_loss: -0.02215576171875|cri_loss: -0.009674072265625|unsuper_loss: 0.0
+average reward score: 5.1875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.68s (76.91%) |Training time=0.40s (18.43%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1400|ppo_ep: 1|act_loss: 0.0113372802734375|cri_loss: 0.006031036376953125|unsuper_loss: 0.0
+average reward score: 5.1953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.66s (74.61%) |Training time=0.46s (20.91%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.39 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1401|ppo_ep: 1|act_loss: 0.0224761962890625|cri_loss: 0.01213836669921875|unsuper_loss: 0.0
+average reward score: 5.0234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.85%) |Training time=0.43s (19.63%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1402|ppo_ep: 1|act_loss: -0.0025653839111328125|cri_loss: -0.000286102294921875|unsuper_loss: 0.0
+average reward score: 5.10546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.16%) |Training time=0.45s (20.36%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1403|ppo_ep: 1|act_loss: 0.00897216796875|cri_loss: 0.005275726318359375|unsuper_loss: 0.0
+average reward score: 4.92578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.43%) |Training time=0.44s (20.11%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1404|ppo_ep: 1|act_loss: -0.051849365234375|cri_loss: -0.02508544921875|unsuper_loss: 0.0
+average reward score: 4.6640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.88%) |Training time=0.43s (19.60%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1405|ppo_ep: 1|act_loss: 0.0389404296875|cri_loss: 0.02093505859375|unsuper_loss: 0.0
+average reward score: 4.6796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.12%) |Training time=0.44s (20.32%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1406|ppo_ep: 1|act_loss: -0.00616455078125|cri_loss: -0.00276947021484375|unsuper_loss: 0.0
+average reward score: 4.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.10%) |Training time=0.42s (19.32%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1407|ppo_ep: 1|act_loss: -0.0345458984375|cri_loss: -0.016998291015625|unsuper_loss: 0.0
+average reward score: 4.8125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.68s (77.05%) |Training time=0.40s (18.42%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1408|ppo_ep: 1|act_loss: -0.016632080078125|cri_loss: -0.00641632080078125|unsuper_loss: 0.0
+average reward score: 4.75390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.81%) |Training time=0.43s (19.60%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.42
+[2023-04-14 09:39:33,017] [INFO] [logging.py:96:log_dist] [Rank 0] step=1410, skipped=18, lr=[9.065292568593984e-06, 9.065292568593984e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:39:33,035] [INFO] [timer.py:199:stop] epoch=0/micro_step=1410/global_step=1410, RunningAvgSamplesPerSec=109.99575030295641, CurrSamplesPerSec=120.44529078042189, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:39:33,127] [INFO] [logging.py:96:log_dist] [Rank 0] step=1410, skipped=20, lr=[4.697960658719475e-06, 4.697960658719475e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1409|ppo_ep: 1|act_loss: 0.052001953125|cri_loss: 0.027801513671875|unsuper_loss: 0.0
+average reward score: 5.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.68%) |Training time=0.43s (19.78%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1410|ppo_ep: 1|act_loss: 0.01788330078125|cri_loss: 0.0096588134765625|unsuper_loss: 0.0
+average reward score: 5.16015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.56%) |Training time=0.43s (19.92%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1411|ppo_ep: 1|act_loss: 0.0845947265625|cri_loss: 0.04779052734375|unsuper_loss: 0.0
+average reward score: 4.6328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.32%) |Training time=0.44s (20.15%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1412|ppo_ep: 1|act_loss: -0.00656890869140625|cri_loss: -0.0027256011962890625|unsuper_loss: 0.0
+average reward score: 4.71484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.55%) |Training time=0.41s (18.89%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1413|ppo_ep: 1|act_loss: 0.018768310546875|cri_loss: 0.00978851318359375|unsuper_loss: 0.0
+average reward score: 4.515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.15%) |Training time=0.41s (18.95%) |Others=0.11 (4.90%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1414|ppo_ep: 1|act_loss: -0.06195068359375|cri_loss: -0.029815673828125|unsuper_loss: 0.0
+average reward score: 4.76171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.08%) |Training time=0.49s (22.42%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1415|ppo_ep: 1|act_loss: -0.056732177734375|cri_loss: -0.0274505615234375|unsuper_loss: 0.0
+average reward score: 4.86328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.45s |Gather latency=0.00s (0.00%) |Generate time=1.86s (76.14%) |Training time=0.49s (19.85%) |Others=0.10 (4.01%)|CurSamplesPerSec=13.07 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1416|ppo_ep: 1|act_loss: -0.016510009765625|cri_loss: -0.007720947265625|unsuper_loss: 0.0
+average reward score: 5.296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.30%) |Training time=0.48s (22.19%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1417|ppo_ep: 1|act_loss: 0.018585205078125|cri_loss: 0.01007080078125|unsuper_loss: 0.0
+average reward score: 5.10546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.31%) |Training time=0.48s (22.19%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1418|ppo_ep: 1|act_loss: 0.04681396484375|cri_loss: 0.0242156982421875|unsuper_loss: 0.0
+average reward score: 5.08984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.18%) |Training time=0.49s (22.35%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.42
+[2023-04-14 09:39:55,090] [INFO] [logging.py:96:log_dist] [Rank 0] step=1420, skipped=18, lr=[9.056397365115782e-06, 9.056397365115782e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:39:55,108] [INFO] [timer.py:199:stop] epoch=0/micro_step=1420/global_step=1420, RunningAvgSamplesPerSec=109.97072218501401, CurrSamplesPerSec=93.94947837627466, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:39:55,200] [INFO] [logging.py:96:log_dist] [Rank 0] step=1420, skipped=20, lr=[4.693358252645234e-06, 4.693358252645234e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1419|ppo_ep: 1|act_loss: 0.0171661376953125|cri_loss: 0.00890350341796875|unsuper_loss: 0.0
+average reward score: 5.17578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.59%) |Training time=0.50s (22.96%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1420|ppo_ep: 1|act_loss: -0.01690673828125|cri_loss: -0.00787353515625|unsuper_loss: 0.0
+average reward score: 4.515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.05%) |Training time=0.47s (21.43%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1421|ppo_ep: 1|act_loss: -0.0090484619140625|cri_loss: -0.003841400146484375|unsuper_loss: 0.0
+average reward score: 4.59375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.37%) |Training time=0.48s (22.14%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1422|ppo_ep: 1|act_loss: -0.03363037109375|cri_loss: -0.01525115966796875|unsuper_loss: 0.0
+average reward score: 4.875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.55%) |Training time=0.48s (21.94%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1423|ppo_ep: 1|act_loss: -0.00421142578125|cri_loss: -0.001033782958984375|unsuper_loss: 0.0
+average reward score: 5.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.37%) |Training time=0.48s (22.13%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1424|ppo_ep: 1|act_loss: 0.040191650390625|cri_loss: 0.020843505859375|unsuper_loss: 0.0
+average reward score: 4.77734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.41%) |Training time=0.48s (22.07%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1425|ppo_ep: 1|act_loss: 0.0694580078125|cri_loss: 0.03741455078125|unsuper_loss: 0.0
+average reward score: 4.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.88%) |Training time=0.49s (22.61%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1426|ppo_ep: 1|act_loss: -0.0015506744384765625|cri_loss: 0.000782012939453125|unsuper_loss: 0.0
+average reward score: 4.77734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.80%) |Training time=0.50s (22.71%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1427|ppo_ep: 1|act_loss: -0.0264739990234375|cri_loss: -0.0119781494140625|unsuper_loss: 0.0
+average reward score: 4.70703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.44s |Gather latency=0.00s (0.00%) |Generate time=1.58s (64.64%) |Training time=0.50s (20.50%) |Others=0.36 (14.86%)|CurSamplesPerSec=13.11 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1428|ppo_ep: 1|act_loss: -0.052154541015625|cri_loss: -0.024078369140625|unsuper_loss: 0.0
+average reward score: 4.7578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.42%) |Training time=0.50s (23.06%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42
+[2023-04-14 09:40:17,160] [INFO] [logging.py:96:log_dist] [Rank 0] step=1430, skipped=18, lr=[9.04743944212094e-06, 9.04743944212094e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:40:17,178] [INFO] [timer.py:199:stop] epoch=0/micro_step=1430/global_step=1430, RunningAvgSamplesPerSec=109.87632875471583, CurrSamplesPerSec=92.99834883909462, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:40:17,305] [INFO] [logging.py:96:log_dist] [Rank 0] step=1430, skipped=20, lr=[4.688723335710501e-06, 4.688723335710501e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1429|ppo_ep: 1|act_loss: 0.0249176025390625|cri_loss: 0.01410675048828125|unsuper_loss: 0.0
+average reward score: 4.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.20%) |Training time=0.51s (22.89%) |Others=0.13 (5.91%)|CurSamplesPerSec=14.45 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1430|ppo_ep: 1|act_loss: 0.01303863525390625|cri_loss: 0.00823974609375|unsuper_loss: 0.0
+average reward score: 5.10546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.73s (74.82%) |Training time=0.48s (20.91%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.82 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1431|ppo_ep: 1|act_loss: -0.0116119384765625|cri_loss: -0.0054779052734375|unsuper_loss: 0.0
+average reward score: 4.90625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.72%) |Training time=0.50s (22.78%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1432|ppo_ep: 1|act_loss: 0.0116424560546875|cri_loss: 0.00733184814453125|unsuper_loss: 0.0
+average reward score: 5.52734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.62%) |Training time=0.48s (21.90%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1433|ppo_ep: 1|act_loss: 0.019805908203125|cri_loss: 0.01030731201171875|unsuper_loss: 0.0
+average reward score: 4.67578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.72%) |Training time=0.50s (22.78%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1434|ppo_ep: 1|act_loss: 0.0243682861328125|cri_loss: 0.0135650634765625|unsuper_loss: 0.0
+average reward score: 5.1171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.72%) |Training time=0.52s (23.55%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.49 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1435|ppo_ep: 1|act_loss: -0.00748443603515625|cri_loss: -0.0032596588134765625|unsuper_loss: 0.0
+average reward score: 5.109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.50%) |Training time=0.50s (22.77%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1436|ppo_ep: 1|act_loss: 0.019012451171875|cri_loss: 0.01018524169921875|unsuper_loss: 0.0
+average reward score: 5.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.41%) |Training time=0.50s (23.08%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1437|ppo_ep: 1|act_loss: 0.01898193359375|cri_loss: 0.010498046875|unsuper_loss: 0.0
+average reward score: 4.64453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.64%) |Training time=0.50s (22.85%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1438|ppo_ep: 1|act_loss: 0.0021076202392578125|cri_loss: 0.0012311935424804688|unsuper_loss: 0.0
+average reward score: 5.296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.67%) |Training time=0.50s (22.84%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.42
+[2023-04-14 09:40:39,193] [INFO] [logging.py:96:log_dist] [Rank 0] step=1440, skipped=18, lr=[9.038418932387486e-06, 9.038418932387486e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:40:39,211] [INFO] [timer.py:199:stop] epoch=0/micro_step=1440/global_step=1440, RunningAvgSamplesPerSec=109.76133992307255, CurrSamplesPerSec=94.28474446729834, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:40:39,304] [INFO] [logging.py:96:log_dist] [Rank 0] step=1440, skipped=20, lr=[4.684055976615924e-06, 4.684055976615924e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1439|ppo_ep: 1|act_loss: -0.04058837890625|cri_loss: -0.0198516845703125|unsuper_loss: 0.0
+average reward score: 5.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.59%) |Training time=0.50s (22.94%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1440|ppo_ep: 1|act_loss: -0.08544921875|cri_loss: -0.04156494140625|unsuper_loss: 0.0
+average reward score: 4.83984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.56%) |Training time=0.50s (22.86%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1441|ppo_ep: 1|act_loss: 0.008087158203125|cri_loss: 0.005504608154296875|unsuper_loss: 0.0
+average reward score: 5.125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.60%) |Training time=0.50s (22.90%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1442|ppo_ep: 1|act_loss: -0.0049896240234375|cri_loss: -0.001865386962890625|unsuper_loss: 0.0
+average reward score: 5.9921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.33%) |Training time=0.51s (23.17%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1443|ppo_ep: 1|act_loss: 0.02874755859375|cri_loss: 0.01544189453125|unsuper_loss: 0.0
+average reward score: 5.15625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.61%) |Training time=0.50s (22.89%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1444|ppo_ep: 1|act_loss: 0.0271453857421875|cri_loss: 0.0143585205078125|unsuper_loss: 0.0
+average reward score: 4.92578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.45s |Gather latency=0.00s (0.00%) |Generate time=1.78s (72.41%) |Training time=0.58s (23.59%) |Others=0.10 (4.01%)|CurSamplesPerSec=13.04 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1445|ppo_ep: 1|act_loss: -0.0135955810546875|cri_loss: -0.00629425048828125|unsuper_loss: 0.0
+average reward score: 5.1796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.75%) |Training time=0.50s (22.73%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1446|ppo_ep: 1|act_loss: -0.0104522705078125|cri_loss: -0.00444793701171875|unsuper_loss: 0.0
+average reward score: 5.40625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.35%) |Training time=0.51s (23.17%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1447|ppo_ep: 1|act_loss: -0.009429931640625|cri_loss: -0.00450897216796875|unsuper_loss: 0.0
+average reward score: 5.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.84%) |Training time=0.52s (23.64%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1448|ppo_ep: 1|act_loss: -0.06170654296875|cri_loss: -0.029632568359375|unsuper_loss: 0.0
+average reward score: 5.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.32%) |Training time=0.48s (22.16%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.42
+[2023-04-14 09:41:01,351] [INFO] [logging.py:96:log_dist] [Rank 0] step=1450, skipped=18, lr=[9.029335969621133e-06, 9.029335969621133e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:41:01,370] [INFO] [timer.py:199:stop] epoch=0/micro_step=1450/global_step=1450, RunningAvgSamplesPerSec=109.61442490775492, CurrSamplesPerSec=87.41166563657832, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:41:01,462] [INFO] [logging.py:96:log_dist] [Rank 0] step=1450, skipped=20, lr=[4.679356244543027e-06, 4.679356244543027e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1449|ppo_ep: 1|act_loss: -0.037841796875|cri_loss: -0.0179443359375|unsuper_loss: 0.0
+average reward score: 5.16796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.78%) |Training time=0.53s (23.81%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.41 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1450|ppo_ep: 1|act_loss: 0.0478515625|cri_loss: 0.024993896484375|unsuper_loss: 0.0
+average reward score: 5.1953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.66%) |Training time=0.51s (22.84%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.42 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1451|ppo_ep: 1|act_loss: 0.04669189453125|cri_loss: 0.0257415771484375|unsuper_loss: 0.0
+average reward score: 5.859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.93%) |Training time=0.50s (22.59%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1452|ppo_ep: 1|act_loss: 0.055023193359375|cri_loss: 0.0299072265625|unsuper_loss: 0.0
+average reward score: 4.83984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.94%) |Training time=0.48s (21.58%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1453|ppo_ep: 1|act_loss: 0.026885986328125|cri_loss: 0.0149993896484375|unsuper_loss: 0.0
+average reward score: 6.34375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.59%) |Training time=0.50s (22.93%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1454|ppo_ep: 1|act_loss: 0.0374755859375|cri_loss: 0.0211029052734375|unsuper_loss: 0.0
+average reward score: 5.625
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.67%) |Training time=0.53s (23.88%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.45 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1455|ppo_ep: 1|act_loss: 0.021514892578125|cri_loss: 0.0114288330078125|unsuper_loss: 0.0
+average reward score: 4.8984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.60%) |Training time=0.50s (22.93%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1456|ppo_ep: 1|act_loss: -0.03704833984375|cri_loss: -0.017303466796875|unsuper_loss: 0.0
+average reward score: 6.1953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.59%) |Training time=0.50s (22.87%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1457|ppo_ep: 1|act_loss: -0.0538330078125|cri_loss: -0.0260467529296875|unsuper_loss: 0.0
+average reward score: 5.6328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.48%) |Training time=0.48s (22.04%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1458|ppo_ep: 1|act_loss: -0.018829345703125|cri_loss: -0.00910186767578125|unsuper_loss: 0.0
+average reward score: 5.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.19%) |Training time=0.49s (22.31%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.42
+[2023-04-14 09:41:23,569] [INFO] [logging.py:96:log_dist] [Rank 0] step=1460, skipped=18, lr=[9.020190688453302e-06, 9.020190688453302e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:41:23,671] [INFO] [timer.py:199:stop] epoch=0/micro_step=1460/global_step=1460, RunningAvgSamplesPerSec=109.47243506677798, CurrSamplesPerSec=72.63209936403898, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:41:23,763] [INFO] [logging.py:96:log_dist] [Rank 0] step=1460, skipped=20, lr=[4.674624209153173e-06, 4.674624209153173e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1459|ppo_ep: 1|act_loss: 0.00940704345703125|cri_loss: 0.00525665283203125|unsuper_loss: 0.0
+average reward score: 5.625
+-------------------------------------------------------------------------------------
+|E2E latency=2.48s |Gather latency=0.00s (0.00%) |Generate time=1.77s (71.68%) |Training time=0.60s (24.38%) |Others=0.10 (3.95%)|CurSamplesPerSec=12.93 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1460|ppo_ep: 1|act_loss: -0.01226043701171875|cri_loss: -0.00563812255859375|unsuper_loss: 0.0
+average reward score: 5.1171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.26%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1461|ppo_ep: 1|act_loss: 0.034271240234375|cri_loss: 0.01806640625|unsuper_loss: 0.0
+average reward score: 5.6328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.88%) |Training time=0.49s (22.61%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1462|ppo_ep: 1|act_loss: 0.079345703125|cri_loss: 0.04486083984375|unsuper_loss: 0.0
+average reward score: 5.62109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.17%) |Training time=0.49s (22.31%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1463|ppo_ep: 1|act_loss: 0.00299835205078125|cri_loss: 0.0023288726806640625|unsuper_loss: 0.0
+average reward score: 5.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.43%) |Training time=0.48s (22.04%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1464|ppo_ep: 1|act_loss: -0.00670623779296875|cri_loss: -0.00295257568359375|unsuper_loss: 0.0
+average reward score: 5.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.74%) |Training time=0.47s (21.75%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1465|ppo_ep: 1|act_loss: -0.027069091796875|cri_loss: -0.0130462646484375|unsuper_loss: 0.0
+average reward score: 4.88671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.61s |Gather latency=0.00s (0.00%) |Generate time=1.61s (61.52%) |Training time=0.47s (17.96%) |Others=0.54 (20.52%)|CurSamplesPerSec=12.24 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1466|ppo_ep: 1|act_loss: 0.002590179443359375|cri_loss: 0.001628875732421875|unsuper_loss: 0.0
+average reward score: 5.1328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.15%) |Training time=0.46s (21.33%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1467|ppo_ep: 1|act_loss: 0.00725555419921875|cri_loss: 0.00450897216796875|unsuper_loss: 0.0
+average reward score: 5.0234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.04%) |Training time=0.47s (21.45%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1468|ppo_ep: 1|act_loss: 0.01209259033203125|cri_loss: 0.006256103515625|unsuper_loss: 0.0
+average reward score: 5.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.06%) |Training time=0.46s (21.34%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42
+[2023-04-14 09:41:45,881] [INFO] [logging.py:96:log_dist] [Rank 0] step=1470, skipped=18, lr=[9.010983224439122e-06, 9.010983224439122e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:41:45,900] [INFO] [timer.py:199:stop] epoch=0/micro_step=1470/global_step=1470, RunningAvgSamplesPerSec=109.42062658822219, CurrSamplesPerSec=106.38218975695435, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:41:45,992] [INFO] [logging.py:96:log_dist] [Rank 0] step=1470, skipped=20, lr=[4.6698599405865465e-06, 4.6698599405865465e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1469|ppo_ep: 1|act_loss: -0.00179290771484375|cri_loss: 0.0003948211669921875|unsuper_loss: 0.0
+average reward score: 5.37109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.10%) |Training time=0.46s (21.37%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1470|ppo_ep: 1|act_loss: 0.0103302001953125|cri_loss: 0.006702423095703125|unsuper_loss: 0.0
+average reward score: 5.1875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.59%) |Training time=0.48s (21.90%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1471|ppo_ep: 1|act_loss: 0.02276611328125|cri_loss: 0.0134124755859375|unsuper_loss: 0.0
+average reward score: 5.85546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.60s |Gather latency=0.00s (0.00%) |Generate time=1.60s (61.64%) |Training time=0.47s (18.19%) |Others=0.52 (20.17%)|CurSamplesPerSec=12.33 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1472|ppo_ep: 1|act_loss: 0.0002186298370361328|cri_loss: 0.0006475448608398438|unsuper_loss: 0.0
+average reward score: 5.8515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.57%) |Training time=0.48s (21.95%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1473|ppo_ep: 1|act_loss: 0.01290130615234375|cri_loss: 0.00920867919921875|unsuper_loss: 0.0
+average reward score: 5.203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.43s |Gather latency=0.00s (0.00%) |Generate time=1.85s (76.14%) |Training time=0.48s (19.81%) |Others=0.10 (4.04%)|CurSamplesPerSec=13.16 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1474|ppo_ep: 1|act_loss: -0.0024871826171875|cri_loss: -0.0009250640869140625|unsuper_loss: 0.0
+average reward score: 5.390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.58%) |Training time=0.48s (21.91%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1475|ppo_ep: 1|act_loss: -0.00386810302734375|cri_loss: -0.0015048980712890625|unsuper_loss: 0.0
+average reward score: 5.04296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.74%) |Training time=0.50s (22.76%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1476|ppo_ep: 1|act_loss: -0.00803375244140625|cri_loss: -0.0038585662841796875|unsuper_loss: 0.0
+average reward score: 5.9375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.91%) |Training time=0.49s (22.57%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1477|ppo_ep: 1|act_loss: 0.034149169921875|cri_loss: 0.0176544189453125|unsuper_loss: 0.0
+average reward score: 5.234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.10%) |Training time=0.49s (22.38%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1478|ppo_ep: 1|act_loss: 0.000904083251953125|cri_loss: 0.0015773773193359375|unsuper_loss: 0.0
+average reward score: 5.76171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.70%) |Training time=0.48s (21.80%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.42
+[2023-04-14 09:42:08,344] [INFO] [logging.py:96:log_dist] [Rank 0] step=1480, skipped=18, lr=[9.00171371405542e-06, 9.00171371405542e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:42:08,362] [INFO] [timer.py:199:stop] epoch=0/micro_step=1480/global_step=1480, RunningAvgSamplesPerSec=109.35307940699091, CurrSamplesPerSec=102.23690081816804, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:42:08,454] [INFO] [logging.py:96:log_dist] [Rank 0] step=1480, skipped=20, lr=[4.665063509461098e-06, 4.665063509461098e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1479|ppo_ep: 1|act_loss: 0.053741455078125|cri_loss: 0.030670166015625|unsuper_loss: 0.0
+average reward score: 5.12109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.60%) |Training time=0.48s (21.90%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1480|ppo_ep: 1|act_loss: 0.021942138671875|cri_loss: 0.01233673095703125|unsuper_loss: 0.0
+average reward score: 5.3203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.44%) |Training time=0.48s (22.01%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1481|ppo_ep: 1|act_loss: -0.05841064453125|cri_loss: -0.0283660888671875|unsuper_loss: 0.0
+average reward score: 5.2734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.65%) |Training time=0.47s (21.83%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1482|ppo_ep: 1|act_loss: -0.03936767578125|cri_loss: -0.0176849365234375|unsuper_loss: 0.0
+average reward score: 5.96875
+-------------------------------------------------------------------------------------
+|E2E latency=2.46s |Gather latency=0.00s (0.00%) |Generate time=1.60s (65.07%) |Training time=0.47s (19.00%) |Others=0.39 (15.93%)|CurSamplesPerSec=12.99 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1483|ppo_ep: 1|act_loss: 0.0780029296875|cri_loss: 0.04107666015625|unsuper_loss: 0.0
+average reward score: 5.66015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.58%) |Training time=0.48s (21.91%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1484|ppo_ep: 1|act_loss: 0.010711669921875|cri_loss: 0.00705718994140625|unsuper_loss: 0.0
+average reward score: 4.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.98%) |Training time=0.47s (21.50%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1485|ppo_ep: 1|act_loss: 0.022308349609375|cri_loss: 0.01165008544921875|unsuper_loss: 0.0
+average reward score: 5.21875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.90%) |Training time=0.47s (21.57%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1486|ppo_ep: 1|act_loss: -0.0200347900390625|cri_loss: -0.0081634521484375|unsuper_loss: 0.0
+average reward score: 5.078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.05%) |Training time=0.47s (21.43%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1487|ppo_ep: 1|act_loss: 0.01959228515625|cri_loss: 0.01219940185546875|unsuper_loss: 0.0
+average reward score: 5.16015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.96%) |Training time=0.47s (21.51%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1488|ppo_ep: 1|act_loss: -0.05877685546875|cri_loss: -0.0276641845703125|unsuper_loss: 0.0
+average reward score: 5.20703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.88%) |Training time=0.47s (19.92%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.66 |AvgSamplesPerSec=14.42
+[2023-04-14 09:42:30,551] [INFO] [logging.py:96:log_dist] [Rank 0] step=1490, skipped=18, lr=[8.992382294698705e-06, 8.992382294698705e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:42:30,570] [INFO] [timer.py:199:stop] epoch=0/micro_step=1490/global_step=1490, RunningAvgSamplesPerSec=109.31296286894379, CurrSamplesPerSec=103.17163047296637, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:42:30,664] [INFO] [logging.py:96:log_dist] [Rank 0] step=1490, skipped=20, lr=[4.660234986871507e-06, 4.660234986871507e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1489|ppo_ep: 1|act_loss: 0.0299530029296875|cri_loss: 0.0162506103515625|unsuper_loss: 0.0
+average reward score: 5.08984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.67%) |Training time=0.47s (21.74%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1490|ppo_ep: 1|act_loss: -0.02716064453125|cri_loss: -0.01160430908203125|unsuper_loss: 0.0
+average reward score: 4.71484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.50%) |Training time=0.48s (21.96%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1491|ppo_ep: 1|act_loss: -0.006702423095703125|cri_loss: -0.0006561279296875|unsuper_loss: 0.0
+average reward score: 4.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.84%) |Training time=0.47s (21.62%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1492|ppo_ep: 1|act_loss: 0.037200927734375|cri_loss: 0.020538330078125|unsuper_loss: 0.0
+average reward score: 5.14453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.49%) |Training time=0.48s (21.98%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1493|ppo_ep: 1|act_loss: -0.050750732421875|cri_loss: -0.02349853515625|unsuper_loss: 0.0
+average reward score: 4.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.39%) |Training time=0.48s (22.10%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1494|ppo_ep: 1|act_loss: 0.03173828125|cri_loss: 0.0195159912109375|unsuper_loss: 0.0
+average reward score: 5.23828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.60s (67.96%) |Training time=0.48s (20.14%) |Others=0.28 (11.90%)|CurSamplesPerSec=13.56 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1495|ppo_ep: 1|act_loss: -0.03515625|cri_loss: -0.0159454345703125|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.59%) |Training time=0.48s (21.88%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1496|ppo_ep: 1|act_loss: -0.05670166015625|cri_loss: -0.025726318359375|unsuper_loss: 0.0
+average reward score: 4.83203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.50%) |Training time=0.48s (21.98%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.42
+[2023-04-14 09:42:48,235] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 1497|ppo_ep: 1|act_loss: 0.013671875|cri_loss: 0.013336181640625|unsuper_loss: 0.0
+average reward score: 4.99609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.47s (21.93%) |Others=0.09 (4.05%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42
+[2023-04-14 09:42:50,401] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 1498|ppo_ep: 1|act_loss: 0.046875|cri_loss: 0.0252685546875|unsuper_loss: 0.0
+average reward score: 5.39453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.68%) |Training time=0.48s (22.20%) |Others=0.09 (4.12%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42
+[2023-04-14 09:42:52,461] [INFO] [logging.py:96:log_dist] [Rank 0] step=1500, skipped=18, lr=[8.982989104683118e-06, 8.982989104683118e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:42:52,480] [INFO] [timer.py:199:stop] epoch=0/micro_step=1500/global_step=1500, RunningAvgSamplesPerSec=109.25825586216985, CurrSamplesPerSec=100.98657928694233, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:42:52,573] [INFO] [logging.py:96:log_dist] [Rank 0] step=1500, skipped=22, lr=[4.656349111024974e-06, 4.656349111024974e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1499|ppo_ep: 1|act_loss: 0.037841796875|cri_loss: 0.023956298828125|unsuper_loss: 0.0
+average reward score: 4.86328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.42%) |Training time=0.48s (22.05%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1500|ppo_ep: 1|act_loss: -0.0390625|cri_loss: -0.0156402587890625|unsuper_loss: 0.0
+average reward score: 4.95703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.95s |Gather latency=0.00s (0.00%) |Generate time=1.60s (54.11%) |Training time=0.48s (16.34%) |Others=0.87 (29.55%)|CurSamplesPerSec=10.84 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1501|ppo_ep: 1|act_loss: 0.0438232421875|cri_loss: 0.024444580078125|unsuper_loss: 0.0
+average reward score: 5.41796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.98%) |Training time=0.48s (21.58%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1502|ppo_ep: 1|act_loss: 0.054840087890625|cri_loss: 0.037109375|unsuper_loss: 0.0
+average reward score: 4.57421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.47s |Gather latency=0.00s (0.00%) |Generate time=1.86s (75.41%) |Training time=0.51s (20.59%) |Others=0.10 (4.00%)|CurSamplesPerSec=12.98 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1503|ppo_ep: 1|act_loss: 0.0931396484375|cri_loss: 0.05364990234375|unsuper_loss: 0.0
+average reward score: 4.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.03%) |Training time=0.46s (21.42%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1504|ppo_ep: 1|act_loss: -0.000888824462890625|cri_loss: 0.0021839141845703125|unsuper_loss: 0.0
+average reward score: 5.234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.11%) |Training time=0.46s (21.34%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1505|ppo_ep: 1|act_loss: -0.1180419921875|cri_loss: -0.049957275390625|unsuper_loss: 0.0
+average reward score: 5.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.76s |Gather latency=0.00s (0.00%) |Generate time=1.60s (57.98%) |Training time=0.46s (16.70%) |Others=0.70 (25.32%)|CurSamplesPerSec=11.60 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1506|ppo_ep: 1|act_loss: -0.0391845703125|cri_loss: -0.0111236572265625|unsuper_loss: 0.0
+average reward score: 4.72265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.30%) |Training time=0.46s (21.14%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1507|ppo_ep: 1|act_loss: -0.08062744140625|cri_loss: -0.0303497314453125|unsuper_loss: 0.0
+average reward score: 4.83984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.40%) |Training time=0.45s (21.03%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1508|ppo_ep: 1|act_loss: 0.01904296875|cri_loss: 0.0215911865234375|unsuper_loss: 0.0
+average reward score: 4.76171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.15%) |Training time=0.52s (23.41%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.44 |AvgSamplesPerSec=14.42
+[2023-04-14 09:43:15,905] [INFO] [logging.py:96:log_dist] [Rank 0] step=1510, skipped=18, lr=[8.973534283238398e-06, 8.973534283238398e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:43:15,923] [INFO] [timer.py:199:stop] epoch=0/micro_step=1510/global_step=1510, RunningAvgSamplesPerSec=109.20020953413868, CurrSamplesPerSec=92.72023189431013, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:43:16,016] [INFO] [logging.py:96:log_dist] [Rank 0] step=1510, skipped=22, lr=[4.651463004476193e-06, 4.651463004476193e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1509|ppo_ep: 1|act_loss: 0.111328125|cri_loss: 0.06182861328125|unsuper_loss: 0.0
+average reward score: 4.7421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.58%) |Training time=0.51s (23.00%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1510|ppo_ep: 1|act_loss: 0.0236968994140625|cri_loss: 0.0250091552734375|unsuper_loss: 0.0
+average reward score: 5.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.18%) |Training time=0.49s (22.34%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.42
+[2023-04-14 09:43:20,288] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, but hysteresis is 2. Reducing hysteresis to 1
+[2023-04-14 09:43:20,373] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
+epoch: 0|step: 1511|ppo_ep: 1|act_loss: 0.0660400390625|cri_loss: 0.04168701171875|unsuper_loss: 0.0
+average reward score: 5.32421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.42%) |Training time=0.46s (21.44%) |Others=0.09 (4.14%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.42
+[2023-04-14 09:43:22,449] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096
+epoch: 0|step: 1512|ppo_ep: 1|act_loss: 0.1353759765625|cri_loss: 0.08154296875|unsuper_loss: 0.0
+average reward score: 5.49609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.78%) |Training time=0.47s (21.70%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1513|ppo_ep: 1|act_loss: 0.08203125|cri_loss: 0.04931640625|unsuper_loss: 0.0
+average reward score: 5.71875
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.66%) |Training time=0.48s (21.88%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1514|ppo_ep: 1|act_loss: -0.0709228515625|cri_loss: -0.0137939453125|unsuper_loss: 0.0
+average reward score: 5.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.65%) |Training time=0.48s (21.84%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1515|ppo_ep: 1|act_loss: -0.051605224609375|cri_loss: -0.006378173828125|unsuper_loss: 0.0
+average reward score: 5.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.70%) |Training time=0.47s (21.78%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1516|ppo_ep: 1|act_loss: 0.102783203125|cri_loss: 0.0628662109375|unsuper_loss: 0.0
+average reward score: 5.38671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.42%) |Training time=0.48s (22.09%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1517|ppo_ep: 1|act_loss: 0.07171630859375|cri_loss: 0.06170654296875|unsuper_loss: 0.0
+average reward score: 4.97265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.24%) |Training time=0.49s (22.26%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.42
+[2023-04-14 09:43:35,676] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 4096, reducing to 2048
+epoch: 0|step: 1518|ppo_ep: 1|act_loss: 0.1497802734375|cri_loss: 0.08740234375|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.75s (76.08%) |Training time=0.45s (19.66%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.88 |AvgSamplesPerSec=14.42
+[2023-04-14 09:43:37,835] [INFO] [logging.py:96:log_dist] [Rank 0] step=1520, skipped=21, lr=[8.966879312536848e-06, 8.966879312536848e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:43:37,854] [INFO] [timer.py:199:stop] epoch=0/micro_step=1520/global_step=1520, RunningAvgSamplesPerSec=109.15061098093494, CurrSamplesPerSec=101.99248759647922, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:43:37,946] [INFO] [logging.py:96:log_dist] [Rank 0] step=1520, skipped=23, lr=[4.647038240668007e-06, 4.647038240668007e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1519|ppo_ep: 1|act_loss: 0.0712890625|cri_loss: 0.10894775390625|unsuper_loss: 0.0
+average reward score: 4.89453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.62%) |Training time=0.48s (21.88%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1520|ppo_ep: 1|act_loss: 0.173583984375|cri_loss: 0.12744140625|unsuper_loss: 0.0
+average reward score: 4.77734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.84%) |Training time=0.47s (21.64%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1521|ppo_ep: 1|act_loss: 0.228759765625|cri_loss: 0.144775390625|unsuper_loss: 0.0
+average reward score: 4.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.75s |Gather latency=0.00s (0.00%) |Generate time=1.60s (58.34%) |Training time=0.48s (17.52%) |Others=0.66 (24.14%)|CurSamplesPerSec=11.65 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1522|ppo_ep: 1|act_loss: 0.2275390625|cri_loss: 0.133056640625|unsuper_loss: 0.0
+average reward score: 4.83984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.27%) |Training time=0.46s (21.15%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
+[2023-04-14 09:43:47,123] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 2048, reducing to 1024
+epoch: 0|step: 1523|ppo_ep: 1|act_loss: 0.055511474609375|cri_loss: 0.048187255859375|unsuper_loss: 0.0
+average reward score: 4.81640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.70%) |Training time=0.48s (21.81%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1524|ppo_ep: 1|act_loss: 0.0124359130859375|cri_loss: 0.0382080078125|unsuper_loss: 0.0
+average reward score: 4.65234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (72.99%) |Training time=0.50s (22.55%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.46 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1525|ppo_ep: 1|act_loss: -0.3203125|cri_loss: -0.0809326171875|unsuper_loss: 0.0
+average reward score: 4.73828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.79%) |Training time=0.46s (20.74%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1526|ppo_ep: 1|act_loss: 0.00335693359375|cri_loss: 0.023529052734375|unsuper_loss: 0.0
+average reward score: 4.3359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.81%) |Training time=0.50s (22.70%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1527|ppo_ep: 1|act_loss: -0.03948974609375|cri_loss: 0.019134521484375|unsuper_loss: 0.0
+average reward score: 4.76953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.64%) |Training time=0.48s (21.89%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1528|ppo_ep: 1|act_loss: 0.2244873046875|cri_loss: 0.134521484375|unsuper_loss: 0.0
+average reward score: 4.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.35%) |Training time=0.49s (22.18%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.42
+[2023-04-14 09:44:00,334] [INFO] [logging.py:96:log_dist] [Rank 0] step=1530, skipped=22, lr=[8.958278725693138e-06, 8.958278725693138e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:44:00,352] [INFO] [timer.py:199:stop] epoch=0/micro_step=1530/global_step=1530, RunningAvgSamplesPerSec=109.09060936426617, CurrSamplesPerSec=97.64463647454845, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:44:00,445] [INFO] [logging.py:96:log_dist] [Rank 0] step=1530, skipped=23, lr=[4.642091605675834e-06, 4.642091605675834e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1529|ppo_ep: 1|act_loss: 0.209228515625|cri_loss: 0.13525390625|unsuper_loss: 0.0
+average reward score: 4.234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.23%) |Training time=0.49s (22.26%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.42
+[2023-04-14 09:44:02,664] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192
+epoch: 0|step: 1530|ppo_ep: 1|act_loss: -0.031982421875|cri_loss: 0.02606201171875|unsuper_loss: 0.0
+average reward score: 4.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.68s (75.53%) |Training time=0.45s (20.42%) |Others=0.09 (4.05%)|CurSamplesPerSec=14.43 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1531|ppo_ep: 1|act_loss: -0.10589599609375|cri_loss: -0.023681640625|unsuper_loss: 0.0
+average reward score: 4.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.71s (74.62%) |Training time=0.48s (20.78%) |Others=0.11 (4.59%)|CurSamplesPerSec=13.95 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1532|ppo_ep: 1|act_loss: -0.62890625|cri_loss: -0.153076171875|unsuper_loss: 0.0
+average reward score: 3.93359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.27%) |Training time=0.49s (22.22%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1533|ppo_ep: 1|act_loss: 0.1341552734375|cri_loss: 0.111083984375|unsuper_loss: 0.0
+average reward score: 4.3671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.77s (74.85%) |Training time=0.50s (20.97%) |Others=0.10 (4.18%)|CurSamplesPerSec=13.53 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1534|ppo_ep: 1|act_loss: 0.129638671875|cri_loss: 0.0816650390625|unsuper_loss: 0.0
+average reward score: 3.388671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.25%) |Training time=0.49s (22.25%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1535|ppo_ep: 1|act_loss: 0.484375|cri_loss: 0.39990234375|unsuper_loss: 0.0
+average reward score: 2.533203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.18%) |Training time=0.49s (22.32%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1536|ppo_ep: 1|act_loss: 0.16015625|cri_loss: 0.1719970703125|unsuper_loss: 0.0
+average reward score: 2.78515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.88s |Gather latency=0.00s (0.00%) |Generate time=1.60s (55.54%) |Training time=0.49s (17.01%) |Others=0.79 (27.45%)|CurSamplesPerSec=11.10 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1537|ppo_ep: 1|act_loss: 0.40625|cri_loss: 0.2578125|unsuper_loss: 0.0
+average reward score: 4.32421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.83%) |Training time=0.50s (22.67%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1538|ppo_ep: 1|act_loss: 0.3017578125|cri_loss: 0.2041015625|unsuper_loss: 0.0
+average reward score: 2.69140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.03%) |Training time=0.49s (22.48%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.41
+[2023-04-14 09:44:23,196] [INFO] [logging.py:96:log_dist] [Rank 0] step=1540, skipped=22, lr=[8.948664320677332e-06, 8.948664320677332e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:44:23,214] [INFO] [timer.py:199:stop] epoch=0/micro_step=1540/global_step=1540, RunningAvgSamplesPerSec=109.02335196349398, CurrSamplesPerSec=101.4977854172543, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:44:23,323] [INFO] [logging.py:96:log_dist] [Rank 0] step=1540, skipped=24, lr=[4.637612485008328e-06, 4.637612485008328e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1539|ppo_ep: 1|act_loss: 0.5537109375|cri_loss: 0.375|unsuper_loss: 0.0
+average reward score: 3.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.09%) |Training time=0.48s (21.79%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1540|ppo_ep: 1|act_loss: -0.2861328125|cri_loss: -0.0626220703125|unsuper_loss: 0.0
+average reward score: 3.4921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.91%) |Training time=0.50s (22.61%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1541|ppo_ep: 1|act_loss: -0.171875|cri_loss: -0.0218505859375|unsuper_loss: 0.0
+average reward score: 4.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.28%) |Training time=0.48s (22.15%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1542|ppo_ep: 1|act_loss: 0.02923583984375|cri_loss: 0.050079345703125|unsuper_loss: 0.0
+average reward score: 3.861328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.72%) |Training time=0.50s (22.77%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1543|ppo_ep: 1|act_loss: 0.27490234375|cri_loss: 0.1748046875|unsuper_loss: 0.0
+average reward score: 3.6015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.09%) |Training time=0.49s (22.40%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1544|ppo_ep: 1|act_loss: 0.25048828125|cri_loss: 0.1973876953125|unsuper_loss: 0.0
+average reward score: 2.509765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.87%) |Training time=0.49s (22.64%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1545|ppo_ep: 1|act_loss: 0.181396484375|cri_loss: 0.1395263671875|unsuper_loss: 0.0
+average reward score: 2.671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.70%) |Training time=0.50s (22.80%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1546|ppo_ep: 1|act_loss: -0.060211181640625|cri_loss: -0.006256103515625|unsuper_loss: 0.0
+average reward score: 3.3046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.86%) |Training time=0.49s (22.64%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1547|ppo_ep: 1|act_loss: 0.07568359375|cri_loss: 0.0791015625|unsuper_loss: 0.0
+average reward score: 2.16015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.43s |Gather latency=0.00s (0.00%) |Generate time=1.59s (65.40%) |Training time=0.55s (22.71%) |Others=0.29 (11.90%)|CurSamplesPerSec=13.19 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1548|ppo_ep: 1|act_loss: -0.0823974609375|cri_loss: 0.028076171875|unsuper_loss: 0.0
+average reward score: 2.818359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.79%) |Training time=0.50s (22.70%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.41
+[2023-04-14 09:44:45,262] [INFO] [logging.py:96:log_dist] [Rank 0] step=1550, skipped=22, lr=[8.938988793008496e-06, 8.938988793008496e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:44:45,280] [INFO] [timer.py:199:stop] epoch=0/micro_step=1550/global_step=1550, RunningAvgSamplesPerSec=108.92040127458814, CurrSamplesPerSec=96.0678427100437, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:44:45,372] [INFO] [logging.py:96:log_dist] [Rank 0] step=1550, skipped=24, lr=[4.632605586260949e-06, 4.632605586260949e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1549|ppo_ep: 1|act_loss: 0.34619140625|cri_loss: 0.21337890625|unsuper_loss: 0.0
+average reward score: 2.98046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.73%) |Training time=0.50s (22.78%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1550|ppo_ep: 1|act_loss: 0.369140625|cri_loss: 0.21630859375|unsuper_loss: 0.0
+average reward score: 3.705078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.14%) |Training time=0.51s (23.39%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1551|ppo_ep: 1|act_loss: 0.46142578125|cri_loss: 0.289306640625|unsuper_loss: 0.0
+average reward score: 3.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.66%) |Training time=0.47s (21.79%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1552|ppo_ep: 1|act_loss: 0.345703125|cri_loss: 0.218994140625|unsuper_loss: 0.0
+average reward score: 3.10546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.62%) |Training time=0.53s (23.93%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1553|ppo_ep: 1|act_loss: 0.163330078125|cri_loss: 0.12451171875|unsuper_loss: 0.0
+average reward score: 4.44921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.78%) |Training time=0.52s (23.76%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1554|ppo_ep: 1|act_loss: 0.135498046875|cri_loss: 0.0986328125|unsuper_loss: 0.0
+average reward score: 3.1015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.94%) |Training time=0.45s (20.56%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1555|ppo_ep: 1|act_loss: -0.2354736328125|cri_loss: -0.0045166015625|unsuper_loss: 0.0
+average reward score: 4.90625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.30%) |Training time=0.49s (22.20%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1556|ppo_ep: 1|act_loss: 0.00152587890625|cri_loss: 0.057708740234375|unsuper_loss: 0.0
+average reward score: 2.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.18%) |Training time=0.49s (22.34%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1557|ppo_ep: 1|act_loss: 0.43017578125|cri_loss: 0.260009765625|unsuper_loss: 0.0
+average reward score: 2.853515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.23%) |Training time=0.49s (22.30%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.51 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1558|ppo_ep: 1|act_loss: 0.15673828125|cri_loss: 0.125244140625|unsuper_loss: 0.0
+average reward score: 2.9765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.13%) |Training time=0.47s (21.36%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.41
+[2023-04-14 09:45:07,278] [INFO] [logging.py:96:log_dist] [Rank 0] step=1560, skipped=22, lr=[8.929252286101288e-06, 8.929252286101288e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:45:07,296] [INFO] [timer.py:199:stop] epoch=0/micro_step=1560/global_step=1560, RunningAvgSamplesPerSec=108.83748797941826, CurrSamplesPerSec=101.76667574004247, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:45:07,389] [INFO] [logging.py:96:log_dist] [Rank 0] step=1560, skipped=24, lr=[4.62756707715415e-06, 4.62756707715415e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1559|ppo_ep: 1|act_loss: 0.161865234375|cri_loss: 0.1217041015625|unsuper_loss: 0.0
+average reward score: 3.515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.66s (74.22%) |Training time=0.48s (21.37%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.34 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1560|ppo_ep: 1|act_loss: 0.08599853515625|cri_loss: 0.06768798828125|unsuper_loss: 0.0
+average reward score: 3.03515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.92%) |Training time=0.45s (20.56%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1561|ppo_ep: 1|act_loss: 0.1705322265625|cri_loss: 0.1212158203125|unsuper_loss: 0.0
+average reward score: 4.92578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.54%) |Training time=0.45s (20.90%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1562|ppo_ep: 1|act_loss: 0.094970703125|cri_loss: 0.07098388671875|unsuper_loss: 0.0
+average reward score: 2.849609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.79s |Gather latency=0.00s (0.00%) |Generate time=1.77s (63.41%) |Training time=0.49s (17.69%) |Others=0.53 (18.90%)|CurSamplesPerSec=11.45 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1563|ppo_ep: 1|act_loss: 0.21337890625|cri_loss: 0.126708984375|unsuper_loss: 0.0
+average reward score: 3.6875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.25%) |Training time=0.46s (21.13%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1564|ppo_ep: 1|act_loss: 0.0687255859375|cri_loss: 0.06353759765625|unsuper_loss: 0.0
+average reward score: 4.07421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.20%) |Training time=0.47s (21.31%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1565|ppo_ep: 1|act_loss: 0.040740966796875|cri_loss: 0.034027099609375|unsuper_loss: 0.0
+average reward score: 4.078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.59%) |Training time=0.47s (21.59%) |Others=0.11 (4.81%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1566|ppo_ep: 1|act_loss: 0.20751953125|cri_loss: 0.12158203125|unsuper_loss: 0.0
+average reward score: 3.55859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.58%) |Training time=0.48s (21.90%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1567|ppo_ep: 1|act_loss: 0.0308380126953125|cri_loss: 0.02667236328125|unsuper_loss: 0.0
+average reward score: 2.984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.80%) |Training time=0.48s (21.73%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1568|ppo_ep: 1|act_loss: 0.1075439453125|cri_loss: 0.0609130859375|unsuper_loss: 0.0
+average reward score: 4.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.08%) |Training time=0.47s (21.40%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.41
+[2023-04-14 09:45:29,668] [INFO] [logging.py:96:log_dist] [Rank 0] step=1570, skipped=22, lr=[8.919454944274233e-06, 8.919454944274233e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:45:29,686] [INFO] [timer.py:199:stop] epoch=0/micro_step=1570/global_step=1570, RunningAvgSamplesPerSec=108.8116418141218, CurrSamplesPerSec=105.11076914161218, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:45:29,778] [INFO] [logging.py:96:log_dist] [Rank 0] step=1570, skipped=24, lr=[4.622497032370792e-06, 4.622497032370792e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1569|ppo_ep: 1|act_loss: -0.0347900390625|cri_loss: -0.00799560546875|unsuper_loss: 0.0
+average reward score: 3.5234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.04%) |Training time=0.47s (21.49%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1570|ppo_ep: 1|act_loss: 0.10052490234375|cri_loss: 0.06103515625|unsuper_loss: 0.0
+average reward score: 3.310546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.01%) |Training time=0.47s (21.48%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1571|ppo_ep: 1|act_loss: -0.04644775390625|cri_loss: -0.01904296875|unsuper_loss: 0.0
+average reward score: 2.890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.39%) |Training time=0.46s (21.06%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1572|ppo_ep: 1|act_loss: -0.6005859375|cri_loss: -0.1934814453125|unsuper_loss: 0.0
+average reward score: 2.80078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.43%) |Training time=0.48s (22.06%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1573|ppo_ep: 1|act_loss: -0.2471923828125|cri_loss: -0.1044921875|unsuper_loss: 0.0
+average reward score: 2.806640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.58%) |Training time=0.48s (21.92%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1574|ppo_ep: 1|act_loss: 0.23974609375|cri_loss: 0.1318359375|unsuper_loss: 0.0
+average reward score: 3.423828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.47%) |Training time=0.48s (22.02%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1575|ppo_ep: 1|act_loss: 0.115478515625|cri_loss: 0.07293701171875|unsuper_loss: 0.0
+average reward score: 3.80859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.76%) |Training time=0.47s (21.73%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1576|ppo_ep: 1|act_loss: 0.1158447265625|cri_loss: 0.0618896484375|unsuper_loss: 0.0
+average reward score: 3.185546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.83s |Gather latency=0.00s (0.00%) |Generate time=1.77s (62.47%) |Training time=0.48s (16.92%) |Others=0.58 (20.61%)|CurSamplesPerSec=11.32 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1577|ppo_ep: 1|act_loss: -0.0975341796875|cri_loss: -0.0289306640625|unsuper_loss: 0.0
+average reward score: 3.537109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.50%) |Training time=0.48s (22.00%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1578|ppo_ep: 1|act_loss: 0.1142578125|cri_loss: 0.0662841796875|unsuper_loss: 0.0
+average reward score: 3.919921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.58%) |Training time=0.48s (21.90%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.41
+[2023-04-14 09:45:52,093] [INFO] [logging.py:96:log_dist] [Rank 0] step=1580, skipped=22, lr=[8.909596912747568e-06, 8.909596912747568e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:45:52,111] [INFO] [timer.py:199:stop] epoch=0/micro_step=1580/global_step=1580, RunningAvgSamplesPerSec=108.76884252619337, CurrSamplesPerSec=101.50047189216659, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:45:52,204] [INFO] [logging.py:96:log_dist] [Rank 0] step=1580, skipped=24, lr=[4.617395527061168e-06, 4.617395527061168e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1579|ppo_ep: 1|act_loss: -0.0958251953125|cri_loss: -0.0352783203125|unsuper_loss: 0.0
+average reward score: 4.23046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.55%) |Training time=0.48s (21.96%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1580|ppo_ep: 1|act_loss: -0.014373779296875|cri_loss: 0.0070953369140625|unsuper_loss: 0.0
+average reward score: 4.1953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.62%) |Training time=0.47s (21.85%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1581|ppo_ep: 1|act_loss: 0.1207275390625|cri_loss: 0.0650634765625|unsuper_loss: 0.0
+average reward score: 3.24609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.62%) |Training time=0.47s (21.85%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1582|ppo_ep: 1|act_loss: 0.0110321044921875|cri_loss: 0.020233154296875|unsuper_loss: 0.0
+average reward score: 4.88671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.51%) |Training time=0.48s (21.97%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1583|ppo_ep: 1|act_loss: -0.049652099609375|cri_loss: -0.022430419921875|unsuper_loss: 0.0
+average reward score: 3.423828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.67%) |Training time=0.47s (21.82%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1584|ppo_ep: 1|act_loss: 0.0467529296875|cri_loss: 0.033203125|unsuper_loss: 0.0
+average reward score: 3.763671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.70%) |Training time=0.47s (21.80%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1585|ppo_ep: 1|act_loss: -0.01050567626953125|cri_loss: 0.001220703125|unsuper_loss: 0.0
+average reward score: 3.591796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.59%) |Training time=0.48s (21.80%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1586|ppo_ep: 1|act_loss: 0.107666015625|cri_loss: 0.0589599609375|unsuper_loss: 0.0
+average reward score: 3.427734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.29%) |Training time=0.51s (23.19%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.44 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1587|ppo_ep: 1|act_loss: 0.03472900390625|cri_loss: 0.0185546875|unsuper_loss: 0.0
+average reward score: 4.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.00%) |Training time=0.49s (22.16%) |Others=0.11 (4.84%)|CurSamplesPerSec=14.51 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1588|ppo_ep: 1|act_loss: 0.060333251953125|cri_loss: 0.0380859375|unsuper_loss: 0.0
+average reward score: 3.052734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.63s (70.59%) |Training time=0.58s (25.15%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.82 |AvgSamplesPerSec=14.41
+[2023-04-14 09:46:14,048] [INFO] [logging.py:96:log_dist] [Rank 0] step=1590, skipped=22, lr=[8.899678337641102e-06, 8.899678337641102e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:46:14,066] [INFO] [timer.py:199:stop] epoch=0/micro_step=1590/global_step=1590, RunningAvgSamplesPerSec=108.69881169225158, CurrSamplesPerSec=101.52227597704174, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:46:14,159] [INFO] [logging.py:96:log_dist] [Rank 0] step=1590, skipped=24, lr=[4.612262636841895e-06, 4.612262636841895e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1589|ppo_ep: 1|act_loss: -0.0256195068359375|cri_loss: -0.009033203125|unsuper_loss: 0.0
+average reward score: 3.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.52%) |Training time=0.48s (21.97%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1590|ppo_ep: 1|act_loss: 0.00982666015625|cri_loss: 0.0081329345703125|unsuper_loss: 0.0
+average reward score: 3.65234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.38%) |Training time=0.48s (22.09%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1591|ppo_ep: 1|act_loss: 0.06524658203125|cri_loss: 0.035430908203125|unsuper_loss: 0.0
+average reward score: 3.935546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.44s |Gather latency=0.00s (0.00%) |Generate time=1.77s (72.45%) |Training time=0.49s (20.24%) |Others=0.18 (7.31%)|CurSamplesPerSec=13.10 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1592|ppo_ep: 1|act_loss: 0.037811279296875|cri_loss: 0.021453857421875|unsuper_loss: 0.0
+average reward score: 3.34765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.26%) |Training time=0.48s (22.22%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1593|ppo_ep: 1|act_loss: 0.0085601806640625|cri_loss: 0.006603240966796875|unsuper_loss: 0.0
+average reward score: 4.12890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.57%) |Training time=0.48s (21.90%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1594|ppo_ep: 1|act_loss: -0.097412109375|cri_loss: -0.033599853515625|unsuper_loss: 0.0
+average reward score: 4.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.39%) |Training time=0.49s (22.13%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1595|ppo_ep: 1|act_loss: -0.043792724609375|cri_loss: -0.01497650146484375|unsuper_loss: 0.0
+average reward score: 3.98046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.04%) |Training time=0.49s (22.45%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1596|ppo_ep: 1|act_loss: 0.02978515625|cri_loss: 0.022247314453125|unsuper_loss: 0.0
+average reward score: 4.1796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.12%) |Training time=0.46s (21.35%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1597|ppo_ep: 1|act_loss: 0.091796875|cri_loss: 0.0494384765625|unsuper_loss: 0.0
+average reward score: 3.82421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.29%) |Training time=0.49s (22.21%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1598|ppo_ep: 1|act_loss: 0.023773193359375|cri_loss: 0.0134124755859375|unsuper_loss: 0.0
+average reward score: 5.109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.49%) |Training time=0.48s (22.00%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.41
+[2023-04-14 09:46:36,122] [INFO] [logging.py:96:log_dist] [Rank 0] step=1600, skipped=22, lr=[8.889699365972046e-06, 8.889699365972046e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:46:36,140] [INFO] [timer.py:199:stop] epoch=0/micro_step=1600/global_step=1600, RunningAvgSamplesPerSec=108.6402557626682, CurrSamplesPerSec=100.86045297044397, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:46:36,232] [INFO] [logging.py:96:log_dist] [Rank 0] step=1600, skipped=24, lr=[4.6070984377947884e-06, 4.6070984377947884e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1599|ppo_ep: 1|act_loss: 0.02203369140625|cri_loss: 0.0122222900390625|unsuper_loss: 0.0
+average reward score: 4.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.50%) |Training time=0.48s (22.01%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1600|ppo_ep: 1|act_loss: -0.00275421142578125|cri_loss: 0.000911712646484375|unsuper_loss: 0.0
+average reward score: 4.296875
+-------------------------------------------------------------------------------------
+|E2E latency=3.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (50.51%) |Training time=0.48s (15.23%) |Others=1.09 (34.26%)|CurSamplesPerSec=10.08 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1601|ppo_ep: 1|act_loss: -0.01715087890625|cri_loss: -0.007396697998046875|unsuper_loss: 0.0
+average reward score: 3.7578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.62%) |Training time=0.48s (21.85%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1602|ppo_ep: 1|act_loss: -0.04461669921875|cri_loss: -0.0196533203125|unsuper_loss: 0.0
+average reward score: 3.693359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.58%) |Training time=0.48s (21.91%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1603|ppo_ep: 1|act_loss: 0.00501251220703125|cri_loss: 0.005153656005859375|unsuper_loss: 0.0
+average reward score: 3.765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.59%) |Training time=0.48s (21.91%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1604|ppo_ep: 1|act_loss: -0.03546142578125|cri_loss: -0.015838623046875|unsuper_loss: 0.0
+average reward score: 4.3125
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.78%) |Training time=0.47s (21.57%) |Others=0.12 (5.65%)|CurSamplesPerSec=14.54 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1605|ppo_ep: 1|act_loss: 0.10308837890625|cri_loss: 0.058502197265625|unsuper_loss: 0.0
+average reward score: 4.0078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.73s (74.31%) |Training time=0.49s (21.15%) |Others=0.11 (4.54%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1606|ppo_ep: 1|act_loss: -0.0546875|cri_loss: -0.02545166015625|unsuper_loss: 0.0
+average reward score: 3.8046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.69%) |Training time=0.47s (21.79%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1607|ppo_ep: 1|act_loss: 0.0222930908203125|cri_loss: 0.01284027099609375|unsuper_loss: 0.0
+average reward score: 4.12890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.96%) |Training time=0.47s (21.51%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1608|ppo_ep: 1|act_loss: -0.04022216796875|cri_loss: -0.0186004638671875|unsuper_loss: 0.0
+average reward score: 4.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.68%) |Training time=0.47s (21.79%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41
+[2023-04-14 09:46:59,030] [INFO] [logging.py:96:log_dist] [Rank 0] step=1610, skipped=22, lr=[8.879660145652832e-06, 8.879660145652832e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:46:59,048] [INFO] [timer.py:199:stop] epoch=0/micro_step=1610/global_step=1610, RunningAvgSamplesPerSec=108.59716815707017, CurrSamplesPerSec=105.74269943889715, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:46:59,141] [INFO] [logging.py:96:log_dist] [Rank 0] step=1610, skipped=24, lr=[4.601903006465734e-06, 4.601903006465734e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1609|ppo_ep: 1|act_loss: -0.0167388916015625|cri_loss: -0.006267547607421875|unsuper_loss: 0.0
+average reward score: 4.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.47s (21.47%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1610|ppo_ep: 1|act_loss: -0.02960205078125|cri_loss: -0.01259613037109375|unsuper_loss: 0.0
+average reward score: 4.08984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.59%) |Training time=0.48s (21.92%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1611|ppo_ep: 1|act_loss: -0.00969696044921875|cri_loss: -0.00272369384765625|unsuper_loss: 0.0
+average reward score: 5.1171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.18%) |Training time=0.51s (22.87%) |Others=0.11 (4.94%)|CurSamplesPerSec=14.38 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1612|ppo_ep: 1|act_loss: 0.0380859375|cri_loss: 0.02044677734375|unsuper_loss: 0.0
+average reward score: 3.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.07%) |Training time=0.49s (22.44%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.54 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1613|ppo_ep: 1|act_loss: 0.06634521484375|cri_loss: 0.03521728515625|unsuper_loss: 0.0
+average reward score: 4.12890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.14%) |Training time=0.45s (20.37%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1614|ppo_ep: 1|act_loss: 0.0743408203125|cri_loss: 0.03887939453125|unsuper_loss: 0.0
+average reward score: 4.20703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.39%) |Training time=0.49s (22.12%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1615|ppo_ep: 1|act_loss: 0.0175018310546875|cri_loss: 0.01300048828125|unsuper_loss: 0.0
+average reward score: 4.3515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.26%) |Training time=0.49s (22.05%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1616|ppo_ep: 1|act_loss: 0.02777099609375|cri_loss: 0.0153656005859375|unsuper_loss: 0.0
+average reward score: 3.30859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.79%) |Training time=0.48s (21.71%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1617|ppo_ep: 1|act_loss: -0.039794921875|cri_loss: -0.0181884765625|unsuper_loss: 0.0
+average reward score: 4.49609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.42s |Gather latency=0.00s (0.00%) |Generate time=1.73s (71.29%) |Training time=0.48s (19.72%) |Others=0.22 (8.99%)|CurSamplesPerSec=13.21 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1618|ppo_ep: 1|act_loss: 0.07086181640625|cri_loss: 0.03765869140625|unsuper_loss: 0.0
+average reward score: 3.755859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.71%) |Training time=0.45s (20.78%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.41
+[2023-04-14 09:47:21,223] [INFO] [logging.py:96:log_dist] [Rank 0] step=1620, skipped=22, lr=[8.869560825488926e-06, 8.869560825488926e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:47:21,241] [INFO] [timer.py:199:stop] epoch=0/micro_step=1620/global_step=1620, RunningAvgSamplesPerSec=108.55081239645398, CurrSamplesPerSec=106.41888138557795, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:47:21,334] [INFO] [logging.py:96:log_dist] [Rank 0] step=1620, skipped=24, lr=[4.596676419863561e-06, 4.596676419863561e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1619|ppo_ep: 1|act_loss: 0.0615234375|cri_loss: 0.03411865234375|unsuper_loss: 0.0
+average reward score: 4.44140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.12%) |Training time=0.46s (21.36%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1620|ppo_ep: 1|act_loss: 0.024871826171875|cri_loss: 0.01503753662109375|unsuper_loss: 0.0
+average reward score: 3.728515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.77s (74.71%) |Training time=0.50s (21.13%) |Others=0.10 (4.16%)|CurSamplesPerSec=13.50 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1621|ppo_ep: 1|act_loss: -0.0162811279296875|cri_loss: -0.007350921630859375|unsuper_loss: 0.0
+average reward score: 5.1328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.84%) |Training time=0.48s (21.69%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1622|ppo_ep: 1|act_loss: -0.02630615234375|cri_loss: -0.01035308837890625|unsuper_loss: 0.0
+average reward score: 4.40625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.77%) |Training time=0.47s (21.71%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1623|ppo_ep: 1|act_loss: 0.03363037109375|cri_loss: 0.0177154541015625|unsuper_loss: 0.0
+average reward score: 3.87109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.55%) |Training time=0.48s (21.93%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1624|ppo_ep: 1|act_loss: 0.005886077880859375|cri_loss: 0.003627777099609375|unsuper_loss: 0.0
+average reward score: 3.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.09%) |Training time=0.49s (22.37%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1625|ppo_ep: 1|act_loss: 0.0280609130859375|cri_loss: 0.01690673828125|unsuper_loss: 0.0
+average reward score: 2.912109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.24%) |Training time=0.46s (21.22%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1626|ppo_ep: 1|act_loss: -0.050811767578125|cri_loss: -0.021881103515625|unsuper_loss: 0.0
+average reward score: 4.94921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.40%) |Training time=0.48s (22.06%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1627|ppo_ep: 1|act_loss: -0.05517578125|cri_loss: -0.0225067138671875|unsuper_loss: 0.0
+average reward score: 4.9921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.55%) |Training time=0.48s (21.94%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1628|ppo_ep: 1|act_loss: 0.07183837890625|cri_loss: 0.039794921875|unsuper_loss: 0.0
+average reward score: 4.18359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.55%) |Training time=0.48s (21.92%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
+[2023-04-14 09:47:43,177] [INFO] [logging.py:96:log_dist] [Rank 0] step=1630, skipped=22, lr=[8.859401555176615e-06, 8.859401555176615e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:47:43,482] [INFO] [timer.py:199:stop] epoch=0/micro_step=1630/global_step=1630, RunningAvgSamplesPerSec=108.43823317583377, CurrSamplesPerSec=53.26256723411719, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:47:43,575] [INFO] [logging.py:96:log_dist] [Rank 0] step=1630, skipped=24, lr=[4.591418755458887e-06, 4.591418755458887e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1629|ppo_ep: 1|act_loss: 0.09765625|cri_loss: 0.051177978515625|unsuper_loss: 0.0
+average reward score: 3.75390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.45s |Gather latency=0.00s (0.00%) |Generate time=1.59s (64.92%) |Training time=0.76s (31.10%) |Others=0.10 (3.99%)|CurSamplesPerSec=13.04 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1630|ppo_ep: 1|act_loss: 0.0201416015625|cri_loss: 0.0125274658203125|unsuper_loss: 0.0
+average reward score: 4.66796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.48s (22.00%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1631|ppo_ep: 1|act_loss: 0.018310546875|cri_loss: 0.0108489990234375|unsuper_loss: 0.0
+average reward score: 4.23046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.19%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1632|ppo_ep: 1|act_loss: 0.0361328125|cri_loss: 0.019378662109375|unsuper_loss: 0.0
+average reward score: 4.4921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.13%) |Training time=0.48s (22.33%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1633|ppo_ep: 1|act_loss: -0.019989013671875|cri_loss: -0.00919342041015625|unsuper_loss: 0.0
+average reward score: 4.1875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.23%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1634|ppo_ep: 1|act_loss: 0.031402587890625|cri_loss: 0.01763916015625|unsuper_loss: 0.0
+average reward score: 4.09375
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.64s (71.03%) |Training time=0.57s (24.67%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1635|ppo_ep: 1|act_loss: 0.04095458984375|cri_loss: 0.0223541259765625|unsuper_loss: 0.0
+average reward score: 3.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.86%) |Training time=0.49s (22.63%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1636|ppo_ep: 1|act_loss: -0.003795623779296875|cri_loss: -0.0005779266357421875|unsuper_loss: 0.0
+average reward score: 3.61328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.04%) |Training time=0.49s (22.44%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1637|ppo_ep: 1|act_loss: -0.004100799560546875|cri_loss: -0.0005321502685546875|unsuper_loss: 0.0
+average reward score: 4.2109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.02%) |Training time=0.49s (22.47%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1638|ppo_ep: 1|act_loss: 0.043060302734375|cri_loss: 0.02545166015625|unsuper_loss: 0.0
+average reward score: 3.30859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.82%) |Training time=0.49s (22.68%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.41
+[2023-04-14 09:48:05,298] [INFO] [logging.py:96:log_dist] [Rank 0] step=1640, skipped=22, lr=[8.849182485300792e-06, 8.849182485300792e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:48:05,316] [INFO] [timer.py:199:stop] epoch=0/micro_step=1640/global_step=1640, RunningAvgSamplesPerSec=108.37462570038971, CurrSamplesPerSec=107.1831391727385, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:48:05,409] [INFO] [logging.py:96:log_dist] [Rank 0] step=1640, skipped=24, lr=[4.586130091182985e-06, 4.586130091182985e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1639|ppo_ep: 1|act_loss: 0.005695343017578125|cri_loss: 0.00555419921875|unsuper_loss: 0.0
+average reward score: 4.1953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.90%) |Training time=0.46s (21.53%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1640|ppo_ep: 1|act_loss: -0.004543304443359375|cri_loss: -0.0017652511596679688|unsuper_loss: 0.0
+average reward score: 4.12890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.59s (70.44%) |Training time=0.47s (20.68%) |Others=0.20 (8.88%)|CurSamplesPerSec=14.19 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1641|ppo_ep: 1|act_loss: 0.014434814453125|cri_loss: 0.00841522216796875|unsuper_loss: 0.0
+average reward score: 3.341796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.97%) |Training time=0.49s (22.52%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1642|ppo_ep: 1|act_loss: -0.00848388671875|cri_loss: -0.003963470458984375|unsuper_loss: 0.0
+average reward score: 5.80078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.21%) |Training time=0.48s (22.28%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1643|ppo_ep: 1|act_loss: 0.031829833984375|cri_loss: 0.016815185546875|unsuper_loss: 0.0
+average reward score: 5.17578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.15%) |Training time=0.48s (22.31%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1644|ppo_ep: 1|act_loss: 0.0106048583984375|cri_loss: 0.00621795654296875|unsuper_loss: 0.0
+average reward score: 4.61328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.23%) |Training time=0.48s (22.24%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1645|ppo_ep: 1|act_loss: 0.038177490234375|cri_loss: 0.0225982666015625|unsuper_loss: 0.0
+average reward score: 3.908203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.64%) |Training time=0.47s (21.81%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1646|ppo_ep: 1|act_loss: -0.0369873046875|cri_loss: -0.017547607421875|unsuper_loss: 0.0
+average reward score: 4.453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.71s (74.11%) |Training time=0.49s (21.49%) |Others=0.10 (4.41%)|CurSamplesPerSec=13.89 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1647|ppo_ep: 1|act_loss: -0.051971435546875|cri_loss: -0.0243682861328125|unsuper_loss: 0.0
+average reward score: 3.453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.06%) |Training time=0.49s (22.41%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1648|ppo_ep: 1|act_loss: 0.01430511474609375|cri_loss: 0.00861358642578125|unsuper_loss: 0.0
+average reward score: 4.2734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.17%) |Training time=0.49s (22.36%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.41
+[2023-04-14 09:48:27,226] [INFO] [logging.py:96:log_dist] [Rank 0] step=1650, skipped=22, lr=[8.838903767332725e-06, 8.838903767332725e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:48:27,244] [INFO] [timer.py:199:stop] epoch=0/micro_step=1650/global_step=1650, RunningAvgSamplesPerSec=108.32006295141781, CurrSamplesPerSec=100.62015510849723, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:48:27,337] [INFO] [logging.py:96:log_dist] [Rank 0] step=1650, skipped=24, lr=[4.580810505426617e-06, 4.580810505426617e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1649|ppo_ep: 1|act_loss: 0.0341796875|cri_loss: 0.018310546875|unsuper_loss: 0.0
+average reward score: 3.806640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.26%) |Training time=0.48s (22.22%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1650|ppo_ep: 1|act_loss: 0.0545654296875|cri_loss: 0.029083251953125|unsuper_loss: 0.0
+average reward score: 4.59375
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.12%) |Training time=0.51s (21.64%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.55 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1651|ppo_ep: 1|act_loss: 0.0146026611328125|cri_loss: 0.0078277587890625|unsuper_loss: 0.0
+average reward score: 4.9375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.82%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1652|ppo_ep: 1|act_loss: -0.00402069091796875|cri_loss: -0.0014247894287109375|unsuper_loss: 0.0
+average reward score: 4.61328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.87%) |Training time=0.49s (22.63%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1653|ppo_ep: 1|act_loss: 0.00786590576171875|cri_loss: 0.00921630859375|unsuper_loss: 0.0
+average reward score: 3.876953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.14%) |Training time=0.48s (22.31%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1654|ppo_ep: 1|act_loss: -0.023162841796875|cri_loss: -0.0103912353515625|unsuper_loss: 0.0
+average reward score: 4.1875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.44%) |Training time=0.47s (22.00%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1655|ppo_ep: 1|act_loss: -0.0096435546875|cri_loss: -0.004184722900390625|unsuper_loss: 0.0
+average reward score: 4.55859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.16%) |Training time=0.49s (22.25%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1656|ppo_ep: 1|act_loss: 0.018157958984375|cri_loss: 0.011199951171875|unsuper_loss: 0.0
+average reward score: 5.671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.46s |Gather latency=0.00s (0.00%) |Generate time=1.59s (64.54%) |Training time=0.45s (18.46%) |Others=0.42 (17.01%)|CurSamplesPerSec=13.00 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1657|ppo_ep: 1|act_loss: 0.024444580078125|cri_loss: 0.01318359375|unsuper_loss: 0.0
+average reward score: 4.3515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.73%) |Training time=0.46s (21.69%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1658|ppo_ep: 1|act_loss: 0.024383544921875|cri_loss: 0.0128021240234375|unsuper_loss: 0.0
+average reward score: 3.88671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.81%) |Training time=0.46s (21.62%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.41
+[2023-04-14 09:48:49,312] [INFO] [logging.py:96:log_dist] [Rank 0] step=1660, skipped=22, lr=[8.82856555362781e-06, 8.82856555362781e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:48:49,331] [INFO] [timer.py:199:stop] epoch=0/micro_step=1660/global_step=1660, RunningAvgSamplesPerSec=108.27997592914936, CurrSamplesPerSec=107.1406158574308, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:48:49,423] [INFO] [logging.py:96:log_dist] [Rank 0] step=1660, skipped=24, lr=[4.575460077038877e-06, 4.575460077038877e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1659|ppo_ep: 1|act_loss: 0.0134429931640625|cri_loss: 0.0074615478515625|unsuper_loss: 0.0
+average reward score: 3.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.86%) |Training time=0.46s (21.57%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1660|ppo_ep: 1|act_loss: -0.0192413330078125|cri_loss: -0.009063720703125|unsuper_loss: 0.0
+average reward score: 4.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.71%) |Training time=0.47s (21.71%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1661|ppo_ep: 1|act_loss: -0.00200653076171875|cri_loss: -0.00023174285888671875|unsuper_loss: 0.0
+average reward score: 4.94140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.91%) |Training time=0.46s (21.51%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1662|ppo_ep: 1|act_loss: -0.0095977783203125|cri_loss: -0.003932952880859375|unsuper_loss: 0.0
+average reward score: 4.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.58s (69.08%) |Training time=0.46s (20.33%) |Others=0.24 (10.59%)|CurSamplesPerSec=13.99 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1663|ppo_ep: 1|act_loss: 0.002063751220703125|cri_loss: 0.0014085769653320312|unsuper_loss: 0.0
+average reward score: 5.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.59%) |Training time=0.47s (21.82%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1664|ppo_ep: 1|act_loss: 0.04315185546875|cri_loss: 0.0236358642578125|unsuper_loss: 0.0
+average reward score: 3.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.57s (71.26%) |Training time=0.48s (21.65%) |Others=0.16 (7.09%)|CurSamplesPerSec=14.49 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1665|ppo_ep: 1|act_loss: -0.00046062469482421875|cri_loss: 0.0005645751953125|unsuper_loss: 0.0
+average reward score: 4.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.12%) |Training time=0.47s (21.71%) |Others=0.11 (5.17%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1666|ppo_ep: 1|act_loss: 0.0020160675048828125|cri_loss: 0.0041656494140625|unsuper_loss: 0.0
+average reward score: 3.298828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.68%) |Training time=0.47s (21.76%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1667|ppo_ep: 1|act_loss: -0.0254974365234375|cri_loss: -0.01202392578125|unsuper_loss: 0.0
+average reward score: 5.234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.46s |Gather latency=0.00s (0.00%) |Generate time=1.58s (64.14%) |Training time=0.47s (19.15%) |Others=0.41 (16.71%)|CurSamplesPerSec=13.01 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1668|ppo_ep: 1|act_loss: 0.020050048828125|cri_loss: 0.010650634765625|unsuper_loss: 0.0
+average reward score: 3.779296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.48%) |Training time=0.47s (21.95%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.41
+[2023-04-14 09:49:11,326] [INFO] [logging.py:96:log_dist] [Rank 0] step=1670, skipped=22, lr=[8.818167997423314e-06, 8.818167997423314e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:49:11,344] [INFO] [timer.py:199:stop] epoch=0/micro_step=1670/global_step=1670, RunningAvgSamplesPerSec=108.25653389955217, CurrSamplesPerSec=106.11741265840399, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:49:11,437] [INFO] [logging.py:96:log_dist] [Rank 0] step=1670, skipped=24, lr=[4.5700788853260205e-06, 4.5700788853260205e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1669|ppo_ep: 1|act_loss: -0.01548004150390625|cri_loss: -0.00714111328125|unsuper_loss: 0.0
+average reward score: 4.375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.46s (21.61%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1670|ppo_ep: 1|act_loss: 0.02532958984375|cri_loss: 0.014007568359375|unsuper_loss: 0.0
+average reward score: 4.796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.14%) |Training time=0.49s (22.23%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1671|ppo_ep: 1|act_loss: -0.00852203369140625|cri_loss: -0.00392913818359375|unsuper_loss: 0.0
+average reward score: 5.30078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.21%) |Training time=0.48s (22.27%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1672|ppo_ep: 1|act_loss: 0.0172882080078125|cri_loss: 0.0097808837890625|unsuper_loss: 0.0
+average reward score: 4.484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.97%) |Training time=0.49s (22.54%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1673|ppo_ep: 1|act_loss: 0.00482177734375|cri_loss: 0.003299713134765625|unsuper_loss: 0.0
+average reward score: 4.30078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.85%) |Training time=0.49s (22.64%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1674|ppo_ep: 1|act_loss: 0.015869140625|cri_loss: 0.00927734375|unsuper_loss: 0.0
+average reward score: 4.89453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.95%) |Training time=0.49s (22.55%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1675|ppo_ep: 1|act_loss: 0.0238037109375|cri_loss: 0.01232147216796875|unsuper_loss: 0.0
+average reward score: 4.6171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.03%) |Training time=0.50s (22.54%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.28 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1676|ppo_ep: 1|act_loss: 0.049346923828125|cri_loss: 0.0260009765625|unsuper_loss: 0.0
+average reward score: 3.904296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.80%) |Training time=0.49s (22.66%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1677|ppo_ep: 1|act_loss: 0.0116119384765625|cri_loss: 0.006591796875|unsuper_loss: 0.0
+average reward score: 4.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.73%) |Training time=0.49s (22.76%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1678|ppo_ep: 1|act_loss: 0.05316162109375|cri_loss: 0.027618408203125|unsuper_loss: 0.0
+average reward score: 3.74609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.91%) |Training time=0.49s (22.54%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42
+[2023-04-14 09:49:33,142] [INFO] [logging.py:96:log_dist] [Rank 0] step=1680, skipped=22, lr=[8.807711252836105e-06, 8.807711252836105e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:49:33,906] [INFO] [timer.py:199:stop] epoch=0/micro_step=1680/global_step=1680, RunningAvgSamplesPerSec=108.02099512384166, CurrSamplesPerSec=29.835284744846422, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:49:34,000] [INFO] [logging.py:96:log_dist] [Rank 0] step=1680, skipped=24, lr=[4.564667010050288e-06, 4.564667010050288e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1679|ppo_ep: 1|act_loss: -0.0125732421875|cri_loss: -0.0058746337890625|unsuper_loss: 0.0
+average reward score: 5.4296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.91s |Gather latency=0.00s (0.00%) |Generate time=1.58s (54.17%) |Training time=1.24s (42.44%) |Others=0.10 (3.39%)|CurSamplesPerSec=10.98 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1680|ppo_ep: 1|act_loss: -0.021759033203125|cri_loss: -0.01035308837890625|unsuper_loss: 0.0
+average reward score: 4.453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.60%) |Training time=0.45s (20.84%) |Others=0.14 (6.57%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1681|ppo_ep: 1|act_loss: -0.0322265625|cri_loss: -0.0152130126953125|unsuper_loss: 0.0
+average reward score: 3.94921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.95%) |Training time=0.47s (20.70%) |Others=0.10 (4.35%)|CurSamplesPerSec=14.12 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1682|ppo_ep: 1|act_loss: -0.0018930435180664062|cri_loss: -3.147125244140625e-05|unsuper_loss: 0.0
+average reward score: 4.015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.49%) |Training time=0.47s (21.94%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1683|ppo_ep: 1|act_loss: 0.02203369140625|cri_loss: 0.0120391845703125|unsuper_loss: 0.0
+average reward score: 3.998046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.30%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1684|ppo_ep: 1|act_loss: 0.03509521484375|cri_loss: 0.01904296875|unsuper_loss: 0.0
+average reward score: 3.82421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.59s (70.07%) |Training time=0.47s (20.92%) |Others=0.20 (9.01%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1685|ppo_ep: 1|act_loss: -0.00547027587890625|cri_loss: -0.0011768341064453125|unsuper_loss: 0.0
+average reward score: 4.7265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.10%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1686|ppo_ep: 1|act_loss: -0.033355712890625|cri_loss: -0.01560211181640625|unsuper_loss: 0.0
+average reward score: 4.32421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.76%) |Training time=0.49s (22.70%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1687|ppo_ep: 1|act_loss: -0.01319122314453125|cri_loss: -0.005558013916015625|unsuper_loss: 0.0
+average reward score: 4.6328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.12%) |Training time=0.48s (22.34%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1688|ppo_ep: 1|act_loss: -0.003948211669921875|cri_loss: 0.0012969970703125|unsuper_loss: 0.0
+average reward score: 4.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.86%) |Training time=0.49s (22.61%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41
+[2023-04-14 09:49:55,740] [INFO] [logging.py:96:log_dist] [Rank 0] step=1690, skipped=22, lr=[8.797195474860359e-06, 8.797195474860359e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:49:55,758] [INFO] [timer.py:199:stop] epoch=0/micro_step=1690/global_step=1690, RunningAvgSamplesPerSec=107.97908023515386, CurrSamplesPerSec=98.599461374245, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:49:55,851] [INFO] [logging.py:96:log_dist] [Rank 0] step=1690, skipped=24, lr=[4.559224531428731e-06, 4.559224531428731e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1689|ppo_ep: 1|act_loss: -0.04498291015625|cri_loss: -0.0214385986328125|unsuper_loss: 0.0
+average reward score: 5.28125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.98%) |Training time=0.49s (22.50%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1690|ppo_ep: 1|act_loss: 0.044769287109375|cri_loss: 0.02410888671875|unsuper_loss: 0.0
+average reward score: 4.01171875
+-------------------------------------------------------------------------------------
+|E2E latency=3.03s |Gather latency=0.00s (0.00%) |Generate time=1.58s (52.18%) |Training time=0.49s (16.08%) |Others=0.96 (31.73%)|CurSamplesPerSec=10.56 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1691|ppo_ep: 1|act_loss: 0.019256591796875|cri_loss: 0.0107574462890625|unsuper_loss: 0.0
+average reward score: 4.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.84%) |Training time=0.49s (22.61%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1692|ppo_ep: 1|act_loss: -0.0185089111328125|cri_loss: -0.007755279541015625|unsuper_loss: 0.0
+average reward score: 4.2109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.04%) |Training time=0.49s (22.42%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1693|ppo_ep: 1|act_loss: -0.00452423095703125|cri_loss: -0.0012645721435546875|unsuper_loss: 0.0
+average reward score: 3.41015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (22.06%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1694|ppo_ep: 1|act_loss: -0.01233673095703125|cri_loss: -0.00475311279296875|unsuper_loss: 0.0
+average reward score: 3.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.48s (22.40%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1695|ppo_ep: 1|act_loss: 0.01306915283203125|cri_loss: 0.00788116455078125|unsuper_loss: 0.0
+average reward score: 4.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.07%) |Training time=0.48s (20.66%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1696|ppo_ep: 1|act_loss: 0.01459503173828125|cri_loss: 0.00774383544921875|unsuper_loss: 0.0
+average reward score: 6.890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.86%) |Training time=0.48s (22.24%) |Others=0.11 (4.90%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1697|ppo_ep: 1|act_loss: -0.010650634765625|cri_loss: -0.00344085693359375|unsuper_loss: 0.0
+average reward score: 4.671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.50%) |Training time=0.48s (22.00%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1698|ppo_ep: 1|act_loss: 0.015228271484375|cri_loss: 0.00823211669921875|unsuper_loss: 0.0
+average reward score: 4.71484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.48s (22.39%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41
+[2023-04-14 09:50:18,441] [INFO] [logging.py:96:log_dist] [Rank 0] step=1700, skipped=22, lr=[8.786620819365276e-06, 8.786620819365276e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:50:18,460] [INFO] [timer.py:199:stop] epoch=0/micro_step=1700/global_step=1700, RunningAvgSamplesPerSec=107.92857209171187, CurrSamplesPerSec=103.74092618671828, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:50:18,554] [INFO] [logging.py:96:log_dist] [Rank 0] step=1700, skipped=24, lr=[4.553751530132009e-06, 4.553751530132009e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1699|ppo_ep: 1|act_loss: -0.0390625|cri_loss: -0.0186309814453125|unsuper_loss: 0.0
+average reward score: 5.40625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.67%) |Training time=0.47s (21.65%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1700|ppo_ep: 1|act_loss: 0.01053619384765625|cri_loss: 0.005527496337890625|unsuper_loss: 0.0
+average reward score: 4.95703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.33%) |Training time=0.48s (22.14%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1701|ppo_ep: 1|act_loss: 0.0002722740173339844|cri_loss: 0.0005764961242675781|unsuper_loss: 0.0
+average reward score: 4.328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.59s (69.91%) |Training time=0.49s (21.56%) |Others=0.19 (8.53%)|CurSamplesPerSec=14.10 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1702|ppo_ep: 1|act_loss: -0.002727508544921875|cri_loss: -0.0006961822509765625|unsuper_loss: 0.0
+average reward score: 4.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.21%) |Training time=0.48s (22.23%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1703|ppo_ep: 1|act_loss: -0.0056304931640625|cri_loss: -0.0018415451049804688|unsuper_loss: 0.0
+average reward score: 3.80078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.03%) |Training time=0.48s (21.91%) |Others=0.11 (5.06%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1704|ppo_ep: 1|act_loss: 0.0157623291015625|cri_loss: 0.00850677490234375|unsuper_loss: 0.0
+average reward score: 5.21484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.39%) |Training time=0.48s (21.28%) |Others=0.10 (4.32%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1705|ppo_ep: 1|act_loss: 0.0118255615234375|cri_loss: 0.006561279296875|unsuper_loss: 0.0
+average reward score: 4.46484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.19%) |Training time=0.48s (22.27%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1706|ppo_ep: 1|act_loss: -0.0498046875|cri_loss: -0.0225372314453125|unsuper_loss: 0.0
+average reward score: 4.34375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.24%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1707|ppo_ep: 1|act_loss: -0.0137176513671875|cri_loss: -0.00522613525390625|unsuper_loss: 0.0
+average reward score: 4.85546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (22.04%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1708|ppo_ep: 1|act_loss: -0.01203155517578125|cri_loss: -0.00525665283203125|unsuper_loss: 0.0
+average reward score: 3.890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.85%) |Training time=0.49s (22.62%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
+[2023-04-14 09:50:40,357] [INFO] [logging.py:96:log_dist] [Rank 0] step=1710, skipped=22, lr=[8.77598744309276e-06, 8.77598744309276e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:50:40,375] [INFO] [timer.py:199:stop] epoch=0/micro_step=1710/global_step=1710, RunningAvgSamplesPerSec=107.88094685691051, CurrSamplesPerSec=101.33945819997479, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:50:40,517] [INFO] [logging.py:96:log_dist] [Rank 0] step=1710, skipped=24, lr=[4.548248087283204e-06, 4.548248087283204e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1709|ppo_ep: 1|act_loss: -0.0095977783203125|cri_loss: -0.0038585662841796875|unsuper_loss: 0.0
+average reward score: 4.04296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.82%) |Training time=0.48s (21.60%) |Others=0.15 (6.58%)|CurSamplesPerSec=14.44 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1710|ppo_ep: 1|act_loss: -0.0081634521484375|cri_loss: -0.003696441650390625|unsuper_loss: 0.0
+average reward score: 3.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.41%) |Training time=0.49s (22.04%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1711|ppo_ep: 1|act_loss: 0.00742340087890625|cri_loss: 0.0043792724609375|unsuper_loss: 0.0
+average reward score: 4.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.93%) |Training time=0.49s (22.55%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1712|ppo_ep: 1|act_loss: 0.0270843505859375|cri_loss: 0.01422882080078125|unsuper_loss: 0.0
+average reward score: 3.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.07s |Gather latency=0.00s (0.00%) |Generate time=1.59s (76.61%) |Training time=0.39s (18.64%) |Others=0.10 (4.75%)|CurSamplesPerSec=15.43 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1713|ppo_ep: 1|act_loss: -0.00589752197265625|cri_loss: -0.002044677734375|unsuper_loss: 0.0
+average reward score: 4.82421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.71%) |Training time=0.47s (21.71%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1714|ppo_ep: 1|act_loss: 0.00591278076171875|cri_loss: 0.003887176513671875|unsuper_loss: 0.0
+average reward score: 4.26171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.77%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1715|ppo_ep: 1|act_loss: -0.02520751953125|cri_loss: -0.0118408203125|unsuper_loss: 0.0
+average reward score: 5.046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.67%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1716|ppo_ep: 1|act_loss: -0.03692626953125|cri_loss: -0.01666259765625|unsuper_loss: 0.0
+average reward score: 3.564453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.37%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1717|ppo_ep: 1|act_loss: -0.049041748046875|cri_loss: -0.02178955078125|unsuper_loss: 0.0
+average reward score: 3.392578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.15%) |Training time=0.45s (21.24%) |Others=0.10 (4.61%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1718|ppo_ep: 1|act_loss: -0.0164794921875|cri_loss: -0.00760650634765625|unsuper_loss: 0.0
+average reward score: 4.078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.03%) |Training time=0.46s (21.38%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.42
+[2023-04-14 09:51:02,716] [INFO] [logging.py:96:log_dist] [Rank 0] step=1720, skipped=22, lr=[8.765295503655101e-06, 8.765295503655101e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:51:02,734] [INFO] [timer.py:199:stop] epoch=0/micro_step=1720/global_step=1720, RunningAvgSamplesPerSec=107.88049260354408, CurrSamplesPerSec=107.629738450978, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:51:02,827] [INFO] [logging.py:96:log_dist] [Rank 0] step=1720, skipped=24, lr=[4.542714284456616e-06, 4.542714284456616e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1719|ppo_ep: 1|act_loss: 0.03533935546875|cri_loss: 0.0183563232421875|unsuper_loss: 0.0
+average reward score: 5.19140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.95%) |Training time=0.46s (21.48%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1720|ppo_ep: 1|act_loss: 0.064453125|cri_loss: 0.03436279296875|unsuper_loss: 0.0
+average reward score: 3.904296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.06%) |Training time=0.45s (21.31%) |Others=0.10 (4.63%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1721|ppo_ep: 1|act_loss: 0.016632080078125|cri_loss: 0.011627197265625|unsuper_loss: 0.0
+average reward score: 4.26171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.13%) |Training time=0.45s (21.26%) |Others=0.10 (4.60%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1722|ppo_ep: 1|act_loss: 0.05401611328125|cri_loss: 0.0280609130859375|unsuper_loss: 0.0
+average reward score: 5.17578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.85%) |Training time=0.46s (21.56%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1723|ppo_ep: 1|act_loss: 0.00507354736328125|cri_loss: 0.0029201507568359375|unsuper_loss: 0.0
+average reward score: 4.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.72%) |Training time=0.46s (21.68%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1724|ppo_ep: 1|act_loss: 0.01146697998046875|cri_loss: 0.006473541259765625|unsuper_loss: 0.0
+average reward score: 4.16015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.74s (75.64%) |Training time=0.46s (20.09%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.91 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1725|ppo_ep: 1|act_loss: -0.07354736328125|cri_loss: -0.032379150390625|unsuper_loss: 0.0
+average reward score: 3.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.22%) |Training time=0.48s (22.22%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1726|ppo_ep: 1|act_loss: -0.0413818359375|cri_loss: -0.0194549560546875|unsuper_loss: 0.0
+average reward score: 3.080078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.69%) |Training time=0.46s (21.71%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1727|ppo_ep: 1|act_loss: 0.0097808837890625|cri_loss: 0.007587432861328125|unsuper_loss: 0.0
+average reward score: 5.30859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.91s |Gather latency=0.00s (0.00%) |Generate time=1.58s (54.26%) |Training time=0.46s (15.74%) |Others=0.87 (30.00%)|CurSamplesPerSec=11.02 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1728|ppo_ep: 1|act_loss: -0.0242767333984375|cri_loss: -0.010833740234375|unsuper_loss: 0.0
+average reward score: 4.1875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.70%) |Training time=0.46s (21.69%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.42
+[2023-04-14 09:51:25,354] [INFO] [logging.py:96:log_dist] [Rank 0] step=1730, skipped=22, lr=[8.754545159532632e-06, 8.754545159532632e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:51:25,370] [INFO] [timer.py:199:stop] epoch=0/micro_step=1730/global_step=1730, RunningAvgSamplesPerSec=107.87975778832639, CurrSamplesPerSec=117.8119436052013, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:51:25,466] [INFO] [logging.py:96:log_dist] [Rank 0] step=1730, skipped=24, lr=[4.537150203676553e-06, 4.537150203676553e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1729|ppo_ep: 1|act_loss: 0.035400390625|cri_loss: 0.01837158203125|unsuper_loss: 0.0
+average reward score: 4.88671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.46s |Gather latency=0.00s (0.00%) |Generate time=1.87s (75.81%) |Training time=0.49s (20.01%) |Others=0.10 (4.18%)|CurSamplesPerSec=12.98 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1730|ppo_ep: 1|act_loss: 0.0250091552734375|cri_loss: 0.0130767822265625|unsuper_loss: 0.0
+average reward score: 4.09765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.73%) |Training time=0.47s (21.68%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1731|ppo_ep: 1|act_loss: 0.03399658203125|cri_loss: 0.0174713134765625|unsuper_loss: 0.0
+average reward score: 4.6171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.89%) |Training time=0.49s (22.41%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1732|ppo_ep: 1|act_loss: 0.00628662109375|cri_loss: 0.004150390625|unsuper_loss: 0.0
+average reward score: 4.796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.75s |Gather latency=0.00s (0.00%) |Generate time=1.59s (58.04%) |Training time=0.51s (18.54%) |Others=0.64 (23.42%)|CurSamplesPerSec=11.65 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1733|ppo_ep: 1|act_loss: 0.03253173828125|cri_loss: 0.016815185546875|unsuper_loss: 0.0
+average reward score: 5.34765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.68s (73.06%) |Training time=0.52s (22.67%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.90 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1734|ppo_ep: 1|act_loss: 0.0027866363525390625|cri_loss: 0.0016613006591796875|unsuper_loss: 0.0
+average reward score: 4.13671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.30%) |Training time=0.48s (22.17%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1735|ppo_ep: 1|act_loss: -0.01568603515625|cri_loss: -0.007293701171875|unsuper_loss: 0.0
+average reward score: 3.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.80%) |Training time=0.49s (22.67%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1736|ppo_ep: 1|act_loss: -0.0134735107421875|cri_loss: -0.00504302978515625|unsuper_loss: 0.0
+average reward score: 3.9375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.96%) |Training time=0.49s (22.50%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1737|ppo_ep: 1|act_loss: -0.029541015625|cri_loss: -0.01422882080078125|unsuper_loss: 0.0
+average reward score: 4.79296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.30%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1738|ppo_ep: 1|act_loss: -0.004878997802734375|cri_loss: -0.0018644332885742188|unsuper_loss: 0.0
+average reward score: 3.884765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.08%) |Training time=0.48s (22.37%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41
+[2023-04-14 09:51:47,899] [INFO] [logging.py:96:log_dist] [Rank 0] step=1740, skipped=22, lr=[8.743736570071387e-06, 8.743736570071387e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:51:47,917] [INFO] [timer.py:199:stop] epoch=0/micro_step=1740/global_step=1740, RunningAvgSamplesPerSec=107.81197459690361, CurrSamplesPerSec=93.59621982583081, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:51:48,010] [INFO] [logging.py:96:log_dist] [Rank 0] step=1740, skipped=24, lr=[4.531555927416115e-06, 4.531555927416115e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1739|ppo_ep: 1|act_loss: 0.01534271240234375|cri_loss: 0.00815582275390625|unsuper_loss: 0.0
+average reward score: 4.015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.32%) |Training time=0.50s (21.48%) |Others=0.10 (4.19%)|CurSamplesPerSec=13.63 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1740|ppo_ep: 1|act_loss: 0.041717529296875|cri_loss: 0.021728515625|unsuper_loss: 0.0
+average reward score: 4.62890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.59%) |Training time=0.50s (22.90%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1741|ppo_ep: 1|act_loss: 0.017333984375|cri_loss: 0.0090484619140625|unsuper_loss: 0.0
+average reward score: 3.990234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.78%) |Training time=0.49s (22.70%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1742|ppo_ep: 1|act_loss: -0.0078582763671875|cri_loss: -0.003448486328125|unsuper_loss: 0.0
+average reward score: 4.50390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.46s |Gather latency=0.00s (0.00%) |Generate time=1.58s (64.12%) |Training time=0.49s (19.89%) |Others=0.39 (15.99%)|CurSamplesPerSec=13.02 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1743|ppo_ep: 1|act_loss: 0.00637054443359375|cri_loss: 0.0039215087890625|unsuper_loss: 0.0
+average reward score: 4.7421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.00%) |Training time=0.49s (22.46%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1744|ppo_ep: 1|act_loss: -0.00989532470703125|cri_loss: -0.00441741943359375|unsuper_loss: 0.0
+average reward score: 3.30859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.98%) |Training time=0.49s (22.48%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1745|ppo_ep: 1|act_loss: -0.0804443359375|cri_loss: -0.03790283203125|unsuper_loss: 0.0
+average reward score: 3.40234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.18%) |Training time=0.46s (21.26%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1746|ppo_ep: 1|act_loss: -0.03265380859375|cri_loss: -0.0139923095703125|unsuper_loss: 0.0
+average reward score: 4.05078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.41%) |Training time=0.45s (21.02%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1747|ppo_ep: 1|act_loss: -0.0177001953125|cri_loss: -0.0078887939453125|unsuper_loss: 0.0
+average reward score: 4.625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.46s (21.18%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1748|ppo_ep: 1|act_loss: -0.0044708251953125|cri_loss: -0.00197601318359375|unsuper_loss: 0.0
+average reward score: 5.6015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.45s |Gather latency=0.00s (0.00%) |Generate time=1.59s (65.05%) |Training time=0.46s (18.74%) |Others=0.40 (16.21%)|CurSamplesPerSec=13.05 |AvgSamplesPerSec=14.41
+[2023-04-14 09:52:10,107] [INFO] [logging.py:96:log_dist] [Rank 0] step=1750, skipped=22, lr=[8.732869895480736e-06, 8.732869895480736e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:52:10,126] [INFO] [timer.py:199:stop] epoch=0/micro_step=1750/global_step=1750, RunningAvgSamplesPerSec=107.78236145408847, CurrSamplesPerSec=109.62930097272944, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:52:10,218] [INFO] [logging.py:96:log_dist] [Rank 0] step=1750, skipped=24, lr=[4.525931538595969e-06, 4.525931538595969e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1749|ppo_ep: 1|act_loss: 0.007076263427734375|cri_loss: 0.00386810302734375|unsuper_loss: 0.0
+average reward score: 3.205078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.32%) |Training time=0.45s (21.13%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1750|ppo_ep: 1|act_loss: 0.0384521484375|cri_loss: 0.02001953125|unsuper_loss: 0.0
+average reward score: 4.59765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.31%) |Training time=0.45s (21.12%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1751|ppo_ep: 1|act_loss: 0.07427978515625|cri_loss: 0.040679931640625|unsuper_loss: 0.0
+average reward score: 4.08984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.02%) |Training time=0.46s (21.42%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1752|ppo_ep: 1|act_loss: 0.0146026611328125|cri_loss: 0.0081329345703125|unsuper_loss: 0.0
+average reward score: 4.93359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.16%) |Training time=0.46s (21.27%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1753|ppo_ep: 1|act_loss: -0.022735595703125|cri_loss: -0.01081085205078125|unsuper_loss: 0.0
+average reward score: 4.875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.02%) |Training time=0.46s (21.41%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1754|ppo_ep: 1|act_loss: -0.03564453125|cri_loss: -0.01702880859375|unsuper_loss: 0.0
+average reward score: 5.19921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.88%) |Training time=0.46s (19.87%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1755|ppo_ep: 1|act_loss: -0.0101776123046875|cri_loss: -0.004772186279296875|unsuper_loss: 0.0
+average reward score: 4.0
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.97%) |Training time=0.46s (21.46%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1756|ppo_ep: 1|act_loss: -0.01068878173828125|cri_loss: -0.00504302978515625|unsuper_loss: 0.0
+average reward score: 4.4296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.40%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1757|ppo_ep: 1|act_loss: 0.00408935546875|cri_loss: 0.003238677978515625|unsuper_loss: 0.0
+average reward score: 4.30859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.21%) |Training time=0.46s (21.22%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1758|ppo_ep: 1|act_loss: 0.0113983154296875|cri_loss: 0.006969451904296875|unsuper_loss: 0.0
+average reward score: 3.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.65%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41
+[2023-04-14 09:52:31,837] [INFO] [logging.py:96:log_dist] [Rank 0] step=1760, skipped=22, lr=[8.72194529683101e-06, 8.72194529683101e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:52:31,856] [INFO] [timer.py:199:stop] epoch=0/micro_step=1760/global_step=1760, RunningAvgSamplesPerSec=107.78163376115772, CurrSamplesPerSec=105.12303566505426, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:52:31,949] [INFO] [logging.py:96:log_dist] [Rank 0] step=1760, skipped=24, lr=[4.5202771205831286e-06, 4.5202771205831286e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1759|ppo_ep: 1|act_loss: 0.01348876953125|cri_loss: 0.007396697998046875|unsuper_loss: 0.0
+average reward score: 3.6328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.94%) |Training time=0.47s (21.51%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1760|ppo_ep: 1|act_loss: 0.11712646484375|cri_loss: 0.06634521484375|unsuper_loss: 0.0
+average reward score: 4.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.38%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1761|ppo_ep: 1|act_loss: -0.01331329345703125|cri_loss: -0.00519561767578125|unsuper_loss: 0.0
+average reward score: 5.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.42%) |Training time=0.46s (21.06%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1762|ppo_ep: 1|act_loss: 0.0191802978515625|cri_loss: 0.00983428955078125|unsuper_loss: 0.0
+average reward score: 4.02734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.41%) |Training time=0.46s (20.24%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.19 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1763|ppo_ep: 1|act_loss: -0.0606689453125|cri_loss: -0.0264739990234375|unsuper_loss: 0.0
+average reward score: 3.58203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.74s |Gather latency=0.00s (0.00%) |Generate time=1.60s (58.39%) |Training time=0.46s (16.65%) |Others=0.68 (24.95%)|CurSamplesPerSec=11.68 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1764|ppo_ep: 1|act_loss: 0.034393310546875|cri_loss: 0.01776123046875|unsuper_loss: 0.0
+average reward score: 4.23046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.27%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1765|ppo_ep: 1|act_loss: -0.0011138916015625|cri_loss: -0.00027751922607421875|unsuper_loss: 0.0
+average reward score: 4.03125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.11%) |Training time=0.46s (21.33%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1766|ppo_ep: 1|act_loss: -0.0205230712890625|cri_loss: -0.0092010498046875|unsuper_loss: 0.0
+average reward score: 4.3046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.46s (21.15%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1767|ppo_ep: 1|act_loss: -0.01461029052734375|cri_loss: -0.00675201416015625|unsuper_loss: 0.0
+average reward score: 3.173828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.22%) |Training time=0.46s (21.18%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1768|ppo_ep: 1|act_loss: -0.00934600830078125|cri_loss: -0.004116058349609375|unsuper_loss: 0.0
+average reward score: 3.951171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.92%) |Training time=0.46s (19.85%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.75 |AvgSamplesPerSec=14.41
+[2023-04-14 09:52:54,244] [INFO] [logging.py:96:log_dist] [Rank 0] step=1770, skipped=22, lr=[8.710962936051119e-06, 8.710962936051119e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:52:54,263] [INFO] [timer.py:199:stop] epoch=0/micro_step=1770/global_step=1770, RunningAvgSamplesPerSec=107.78735625931982, CurrSamplesPerSec=109.66692295813189, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:52:54,355] [INFO] [logging.py:96:log_dist] [Rank 0] step=1770, skipped=24, lr=[4.514592757189707e-06, 4.514592757189707e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1769|ppo_ep: 1|act_loss: -0.000946044921875|cri_loss: 0.00171661376953125|unsuper_loss: 0.0
+average reward score: 4.08984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.31%) |Training time=0.45s (21.14%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1770|ppo_ep: 1|act_loss: -0.017913818359375|cri_loss: -0.0079498291015625|unsuper_loss: 0.0
+average reward score: 4.16796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.25%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1771|ppo_ep: 1|act_loss: 0.0276641845703125|cri_loss: 0.0149993896484375|unsuper_loss: 0.0
+average reward score: 4.296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.97%) |Training time=0.46s (21.46%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1772|ppo_ep: 1|act_loss: 0.02630615234375|cri_loss: 0.014068603515625|unsuper_loss: 0.0
+average reward score: 4.80078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.10%) |Training time=0.46s (21.34%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1773|ppo_ep: 1|act_loss: 0.0014629364013671875|cri_loss: 0.0013713836669921875|unsuper_loss: 0.0
+average reward score: 5.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.88%) |Training time=0.47s (21.56%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1774|ppo_ep: 1|act_loss: -0.009490966796875|cri_loss: -0.004062652587890625|unsuper_loss: 0.0
+average reward score: 4.27734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.46s (21.17%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1775|ppo_ep: 1|act_loss: -0.06005859375|cri_loss: -0.0272369384765625|unsuper_loss: 0.0
+average reward score: 3.927734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.71%) |Training time=0.44s (20.68%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1776|ppo_ep: 1|act_loss: -0.0263824462890625|cri_loss: -0.0122528076171875|unsuper_loss: 0.0
+average reward score: 4.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.64%) |Training time=0.44s (20.76%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1777|ppo_ep: 1|act_loss: 0.0029964447021484375|cri_loss: 0.0020046234130859375|unsuper_loss: 0.0
+average reward score: 4.97265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.51%) |Training time=0.45s (20.83%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1778|ppo_ep: 1|act_loss: 0.005260467529296875|cri_loss: 0.003143310546875|unsuper_loss: 0.0
+average reward score: 4.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.32%) |Training time=0.45s (21.07%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.42
+[2023-04-14 09:53:15,699] [INFO] [logging.py:96:log_dist] [Rank 0] step=1780, skipped=22, lr=[8.699922975926139e-06, 8.699922975926139e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:53:15,718] [INFO] [timer.py:199:stop] epoch=0/micro_step=1780/global_step=1780, RunningAvgSamplesPerSec=107.80113421242028, CurrSamplesPerSec=113.88078375605919, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:53:15,810] [INFO] [logging.py:96:log_dist] [Rank 0] step=1780, skipped=24, lr=[4.508878532671684e-06, 4.508878532671684e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1779|ppo_ep: 1|act_loss: 0.04669189453125|cri_loss: 0.02532958984375|unsuper_loss: 0.0
+average reward score: 4.890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.64%) |Training time=0.44s (20.78%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1780|ppo_ep: 1|act_loss: 0.0433349609375|cri_loss: 0.0230712890625|unsuper_loss: 0.0
+average reward score: 4.015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.47s |Gather latency=0.00s (0.00%) |Generate time=1.59s (64.40%) |Training time=0.46s (18.43%) |Others=0.42 (17.17%)|CurSamplesPerSec=12.95 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1781|ppo_ep: 1|act_loss: 0.03173828125|cri_loss: 0.018829345703125|unsuper_loss: 0.0
+average reward score: 4.25
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.41%) |Training time=0.45s (21.02%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1782|ppo_ep: 1|act_loss: 0.00551605224609375|cri_loss: 0.002964019775390625|unsuper_loss: 0.0
+average reward score: 4.20703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.67%) |Training time=0.44s (20.74%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1783|ppo_ep: 1|act_loss: 0.0203857421875|cri_loss: 0.01043701171875|unsuper_loss: 0.0
+average reward score: 4.46484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.85%) |Training time=0.46s (20.52%) |Others=0.17 (7.63%)|CurSamplesPerSec=14.42 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1784|ppo_ep: 1|act_loss: -0.0440673828125|cri_loss: -0.020599365234375|unsuper_loss: 0.0
+average reward score: 3.3828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.97%) |Training time=0.44s (20.32%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1785|ppo_ep: 1|act_loss: -0.0390625|cri_loss: -0.0182037353515625|unsuper_loss: 0.0
+average reward score: 4.4453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.70%) |Training time=0.44s (20.71%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1786|ppo_ep: 1|act_loss: -0.032318115234375|cri_loss: -0.01308441162109375|unsuper_loss: 0.0
+average reward score: 4.69921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.06%) |Training time=0.43s (20.33%) |Others=0.10 (4.61%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1787|ppo_ep: 1|act_loss: 0.0012836456298828125|cri_loss: 0.0012454986572265625|unsuper_loss: 0.0
+average reward score: 4.52734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.93%) |Training time=0.44s (20.46%) |Others=0.10 (4.61%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1788|ppo_ep: 1|act_loss: -0.00051116943359375|cri_loss: 0.0003037452697753906|unsuper_loss: 0.0
+average reward score: 4.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.02%) |Training time=0.44s (20.38%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.42
+[2023-04-14 09:53:37,537] [INFO] [logging.py:96:log_dist] [Rank 0] step=1790, skipped=22, lr=[8.688825580094914e-06, 8.688825580094914e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:53:37,556] [INFO] [timer.py:199:stop] epoch=0/micro_step=1790/global_step=1790, RunningAvgSamplesPerSec=107.830801602799, CurrSamplesPerSec=110.82939976714037, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:53:37,649] [INFO] [logging.py:96:log_dist] [Rank 0] step=1790, skipped=24, lr=[4.503134531727652e-06, 4.503134531727652e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1789|ppo_ep: 1|act_loss: 0.0289306640625|cri_loss: 0.01531219482421875|unsuper_loss: 0.0
+average reward score: 4.75
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.40%) |Training time=0.45s (21.00%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1790|ppo_ep: 1|act_loss: 0.0858154296875|cri_loss: 0.04718017578125|unsuper_loss: 0.0
+average reward score: 4.03125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.19%) |Training time=0.46s (21.26%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1791|ppo_ep: 1|act_loss: 0.1064453125|cri_loss: 0.05609130859375|unsuper_loss: 0.0
+average reward score: 4.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.51%) |Training time=0.49s (22.04%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1792|ppo_ep: 1|act_loss: 0.0220184326171875|cri_loss: 0.01183319091796875|unsuper_loss: 0.0
+average reward score: 4.40625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.46s (21.16%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1793|ppo_ep: 1|act_loss: -0.00402069091796875|cri_loss: -0.001773834228515625|unsuper_loss: 0.0
+average reward score: 4.29296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.01%) |Training time=0.46s (21.42%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1794|ppo_ep: 1|act_loss: -0.00018310546875|cri_loss: 0.0029506683349609375|unsuper_loss: 0.0
+average reward score: 3.982421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.47s (21.61%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1795|ppo_ep: 1|act_loss: -0.01824951171875|cri_loss: -0.00859832763671875|unsuper_loss: 0.0
+average reward score: 4.01953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.94%) |Training time=0.46s (21.49%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1796|ppo_ep: 1|act_loss: 0.038330078125|cri_loss: 0.0207977294921875|unsuper_loss: 0.0
+average reward score: 4.4296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.47s (21.62%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1797|ppo_ep: 1|act_loss: -0.05828857421875|cri_loss: -0.0277099609375|unsuper_loss: 0.0
+average reward score: 5.10546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.68%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1798|ppo_ep: 1|act_loss: -0.0136566162109375|cri_loss: -0.006031036376953125|unsuper_loss: 0.0
+average reward score: 3.41015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.66%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.42
+[2023-04-14 09:53:59,319] [INFO] [logging.py:96:log_dist] [Rank 0] step=1800, skipped=22, lr=[8.677670913047617e-06, 8.677670913047617e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:53:59,587] [INFO] [timer.py:199:stop] epoch=0/micro_step=1800/global_step=1800, RunningAvgSamplesPerSec=107.77048253976814, CurrSamplesPerSec=57.6945053886944, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:53:59,680] [INFO] [logging.py:96:log_dist] [Rank 0] step=1800, skipped=24, lr=[4.49736083949756e-06, 4.49736083949756e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1799|ppo_ep: 1|act_loss: -0.002838134765625|cri_loss: -0.000732421875|unsuper_loss: 0.0
+average reward score: 4.3203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.57s |Gather latency=0.00s (0.00%) |Generate time=1.76s (68.28%) |Training time=0.72s (27.90%) |Others=0.10 (3.82%)|CurSamplesPerSec=12.45 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1800|ppo_ep: 1|act_loss: -0.004962921142578125|cri_loss: -0.002277374267578125|unsuper_loss: 0.0
+average reward score: 5.18359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (22.07%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1801|ppo_ep: 1|act_loss: 0.00911712646484375|cri_loss: 0.00502777099609375|unsuper_loss: 0.0
+average reward score: 4.32421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.90%) |Training time=0.46s (21.54%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1802|ppo_ep: 1|act_loss: 0.0194091796875|cri_loss: 0.0101318359375|unsuper_loss: 0.0
+average reward score: 5.296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.67%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1803|ppo_ep: 1|act_loss: -0.004100799560546875|cri_loss: -0.0012693405151367188|unsuper_loss: 0.0
+average reward score: 5.61328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.69%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1804|ppo_ep: 1|act_loss: 0.005481719970703125|cri_loss: 0.00354766845703125|unsuper_loss: 0.0
+average reward score: 4.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.30%) |Training time=0.46s (21.12%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1805|ppo_ep: 1|act_loss: 0.009002685546875|cri_loss: 0.0046844482421875|unsuper_loss: 0.0
+average reward score: 4.77734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.60s (67.59%) |Training time=0.46s (19.33%) |Others=0.31 (13.08%)|CurSamplesPerSec=13.55 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1806|ppo_ep: 1|act_loss: -0.016754150390625|cri_loss: -0.00707244873046875|unsuper_loss: 0.0
+average reward score: 4.13671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.89%) |Training time=0.46s (21.54%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1807|ppo_ep: 1|act_loss: -0.021514892578125|cri_loss: -0.01013946533203125|unsuper_loss: 0.0
+average reward score: 3.890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.74%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1808|ppo_ep: 1|act_loss: 0.022613525390625|cri_loss: 0.01165008544921875|unsuper_loss: 0.0
+average reward score: 4.69140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.56%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.42
+[2023-04-14 09:54:21,316] [INFO] [logging.py:96:log_dist] [Rank 0] step=1810, skipped=22, lr=[8.66645914012333e-06, 8.66645914012333e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:54:21,334] [INFO] [timer.py:199:stop] epoch=0/micro_step=1810/global_step=1810, RunningAvgSamplesPerSec=107.75986855875877, CurrSamplesPerSec=105.9602488394859, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:54:21,427] [INFO] [logging.py:96:log_dist] [Rank 0] step=1810, skipped=24, lr=[4.491557541561456e-06, 4.491557541561456e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1809|ppo_ep: 1|act_loss: 0.03271484375|cri_loss: 0.0170440673828125|unsuper_loss: 0.0
+average reward score: 4.0234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.80%) |Training time=0.46s (21.65%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1810|ppo_ep: 1|act_loss: -0.0049285888671875|cri_loss: -0.002193450927734375|unsuper_loss: 0.0
+average reward score: 4.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.88s |Gather latency=0.00s (0.00%) |Generate time=1.59s (55.11%) |Training time=0.47s (16.32%) |Others=0.82 (28.58%)|CurSamplesPerSec=11.10 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1811|ppo_ep: 1|act_loss: -0.0049896240234375|cri_loss: -0.002170562744140625|unsuper_loss: 0.0
+average reward score: 5.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.39%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1812|ppo_ep: 1|act_loss: -0.0087127685546875|cri_loss: -0.0037517547607421875|unsuper_loss: 0.0
+average reward score: 4.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.81%) |Training time=0.47s (21.34%) |Others=0.15 (6.85%)|CurSamplesPerSec=14.51 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1813|ppo_ep: 1|act_loss: 0.01837158203125|cri_loss: 0.00933837890625|unsuper_loss: 0.0
+average reward score: 5.1875
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.82%) |Training time=0.47s (20.84%) |Others=0.10 (4.34%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1814|ppo_ep: 1|act_loss: -0.0235595703125|cri_loss: -0.01139068603515625|unsuper_loss: 0.0
+average reward score: 4.48828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.05%) |Training time=0.46s (21.39%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1815|ppo_ep: 1|act_loss: -0.002483367919921875|cri_loss: -0.0010623931884765625|unsuper_loss: 0.0
+average reward score: 3.763671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.46s (21.48%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1816|ppo_ep: 1|act_loss: 0.027923583984375|cri_loss: 0.01465606689453125|unsuper_loss: 0.0
+average reward score: 4.19140625
+-------------------------------------------------------------------------------------
+|E2E latency=3.37s |Gather latency=0.00s (0.00%) |Generate time=1.59s (47.29%) |Training time=0.45s (13.42%) |Others=1.32 (39.28%)|CurSamplesPerSec=9.49 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1817|ppo_ep: 1|act_loss: 0.03106689453125|cri_loss: 0.0168914794921875|unsuper_loss: 0.0
+average reward score: 3.7109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.38%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1818|ppo_ep: 1|act_loss: -0.027679443359375|cri_loss: -0.01276397705078125|unsuper_loss: 0.0
+average reward score: 4.33984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.17%) |Training time=0.45s (21.22%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.42
+[2023-04-14 09:54:44,979] [INFO] [logging.py:96:log_dist] [Rank 0] step=1820, skipped=22, lr=[8.65519042750757e-06, 8.65519042750757e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:54:44,998] [INFO] [timer.py:199:stop] epoch=0/micro_step=1820/global_step=1820, RunningAvgSamplesPerSec=107.75628443014152, CurrSamplesPerSec=109.50247245462815, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:54:45,090] [INFO] [logging.py:96:log_dist] [Rank 0] step=1820, skipped=24, lr=[4.485724723938215e-06, 4.485724723938215e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1819|ppo_ep: 1|act_loss: 0.0186767578125|cri_loss: 0.0098114013671875|unsuper_loss: 0.0
+average reward score: 5.24609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.65%) |Training time=0.45s (20.86%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1820|ppo_ep: 1|act_loss: 0.01434326171875|cri_loss: 0.007396697998046875|unsuper_loss: 0.0
+average reward score: 4.30078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.68s (74.57%) |Training time=0.47s (21.07%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1821|ppo_ep: 1|act_loss: 0.00457763671875|cri_loss: 0.0029048919677734375|unsuper_loss: 0.0
+average reward score: 4.6875
+-------------------------------------------------------------------------------------
+|E2E latency=2.78s |Gather latency=0.00s (0.00%) |Generate time=1.58s (56.71%) |Training time=0.47s (17.01%) |Others=0.73 (26.28%)|CurSamplesPerSec=11.49 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1822|ppo_ep: 1|act_loss: -0.027496337890625|cri_loss: -0.012420654296875|unsuper_loss: 0.0
+average reward score: 4.42578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.26%) |Training time=0.48s (22.18%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1823|ppo_ep: 1|act_loss: -0.0218505859375|cri_loss: -0.0094757080078125|unsuper_loss: 0.0
+average reward score: 4.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.48%) |Training time=0.47s (21.95%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1824|ppo_ep: 1|act_loss: -0.0355224609375|cri_loss: -0.015899658203125|unsuper_loss: 0.0
+average reward score: 4.5234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.19%) |Training time=0.48s (22.25%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1825|ppo_ep: 1|act_loss: -0.0250396728515625|cri_loss: -0.01177978515625|unsuper_loss: 0.0
+average reward score: 4.6953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.57%) |Training time=0.47s (21.84%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1826|ppo_ep: 1|act_loss: -0.0089111328125|cri_loss: -0.0037994384765625|unsuper_loss: 0.0
+average reward score: 4.3671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.48s (22.07%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1827|ppo_ep: 1|act_loss: -0.00405120849609375|cri_loss: -0.000476837158203125|unsuper_loss: 0.0
+average reward score: 5.2734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.33%) |Training time=0.47s (22.10%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1828|ppo_ep: 1|act_loss: 0.00782012939453125|cri_loss: 0.0044708251953125|unsuper_loss: 0.0
+average reward score: 3.943359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.57s (71.99%) |Training time=0.51s (23.51%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.41
+[2023-04-14 09:55:07,257] [INFO] [logging.py:96:log_dist] [Rank 0] step=1830, skipped=22, lr=[8.643864942229842e-06, 8.643864942229842e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:55:07,276] [INFO] [timer.py:199:stop] epoch=0/micro_step=1830/global_step=1830, RunningAvgSamplesPerSec=107.72570916256083, CurrSamplesPerSec=101.29953304215451, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:55:07,369] [INFO] [logging.py:96:log_dist] [Rank 0] step=1830, skipped=24, lr=[4.479862473084266e-06, 4.479862473084266e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1829|ppo_ep: 1|act_loss: 0.04193115234375|cri_loss: 0.0218658447265625|unsuper_loss: 0.0
+average reward score: 5.3828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.27%) |Training time=0.48s (22.18%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.41
+epoch: 0|step: 1830|ppo_ep: 1|act_loss: -0.0260772705078125|cri_loss: -0.01226806640625|unsuper_loss: 0.0
+average reward score: 4.5859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.31%) |Training time=0.48s (22.12%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.42
+[2023-04-14 09:55:11,655] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 1831|ppo_ep: 1|act_loss: -0.002475738525390625|cri_loss: 0.0001316070556640625|unsuper_loss: 0.0
+average reward score: 4.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.90%) |Training time=0.47s (22.01%) |Others=0.09 (4.10%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.42
+[2023-04-14 09:55:13,796] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 1832|ppo_ep: 1|act_loss: -0.0205841064453125|cri_loss: -0.007701873779296875|unsuper_loss: 0.0
+average reward score: 4.4921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.71%) |Training time=0.48s (22.19%) |Others=0.09 (4.09%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1833|ppo_ep: 1|act_loss: -0.0280303955078125|cri_loss: -0.0123748779296875|unsuper_loss: 0.0
+average reward score: 3.974609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.79%) |Training time=0.47s (21.64%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1834|ppo_ep: 1|act_loss: 0.022491455078125|cri_loss: 0.01218414306640625|unsuper_loss: 0.0
+average reward score: 4.30859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.07%) |Training time=0.49s (22.39%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1835|ppo_ep: 1|act_loss: 0.027008056640625|cri_loss: 0.014068603515625|unsuper_loss: 0.0
+average reward score: 3.689453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.00%) |Training time=0.46s (21.40%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1836|ppo_ep: 1|act_loss: 0.0245513916015625|cri_loss: 0.0125579833984375|unsuper_loss: 0.0
+average reward score: 4.1015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.48%) |Training time=0.47s (21.93%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1837|ppo_ep: 1|act_loss: 0.0110015869140625|cri_loss: 0.00743865966796875|unsuper_loss: 0.0
+average reward score: 4.21875
+-------------------------------------------------------------------------------------
+|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.87%) |Training time=0.39s (18.40%) |Others=0.10 (4.73%)|CurSamplesPerSec=15.12 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1838|ppo_ep: 1|act_loss: 0.00502777099609375|cri_loss: 0.0027179718017578125|unsuper_loss: 0.0
+average reward score: 4.6328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.86%) |Training time=0.40s (18.48%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.42
+[2023-04-14 09:55:28,958] [INFO] [logging.py:96:log_dist] [Rank 0] step=1840, skipped=22, lr=[8.632482852161159e-06, 8.632482852161159e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:55:28,976] [INFO] [timer.py:199:stop] epoch=0/micro_step=1840/global_step=1840, RunningAvgSamplesPerSec=107.73700504107984, CurrSamplesPerSec=111.84251454093962, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:55:29,068] [INFO] [logging.py:96:log_dist] [Rank 0] step=1840, skipped=26, lr=[4.475151538852264e-06, 4.475151538852264e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1839|ppo_ep: 1|act_loss: -0.0207672119140625|cri_loss: -0.00972747802734375|unsuper_loss: 0.0
+average reward score: 4.3359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.57%) |Training time=0.45s (20.86%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1840|ppo_ep: 1|act_loss: -0.021087646484375|cri_loss: -0.009552001953125|unsuper_loss: 0.0
+average reward score: 3.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (21.04%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1841|ppo_ep: 1|act_loss: -0.023468017578125|cri_loss: -0.010345458984375|unsuper_loss: 0.0
+average reward score: 3.470703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.92%) |Training time=0.44s (20.50%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1842|ppo_ep: 1|act_loss: -0.05645751953125|cri_loss: -0.026336669921875|unsuper_loss: 0.0
+average reward score: 4.5703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.65%) |Training time=0.45s (20.72%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1843|ppo_ep: 1|act_loss: 0.034271240234375|cri_loss: 0.0200042724609375|unsuper_loss: 0.0
+average reward score: 3.44140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.52%) |Training time=0.43s (19.89%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1844|ppo_ep: 1|act_loss: -0.0029296875|cri_loss: -0.0012416839599609375|unsuper_loss: 0.0
+average reward score: 4.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.96%) |Training time=0.44s (18.77%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.79 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1845|ppo_ep: 1|act_loss: 0.023406982421875|cri_loss: 0.01213836669921875|unsuper_loss: 0.0
+average reward score: 4.25390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.21%) |Training time=0.46s (21.22%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1846|ppo_ep: 1|act_loss: -0.0084228515625|cri_loss: -0.002788543701171875|unsuper_loss: 0.0
+average reward score: 4.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.31%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1847|ppo_ep: 1|act_loss: -0.06622314453125|cri_loss: -0.0291748046875|unsuper_loss: 0.0
+average reward score: 4.046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.35%) |Training time=0.45s (21.07%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1848|ppo_ep: 1|act_loss: 0.03302001953125|cri_loss: 0.017364501953125|unsuper_loss: 0.0
+average reward score: 3.33984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.46s (21.30%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42
+[2023-04-14 09:55:50,807] [INFO] [logging.py:96:log_dist] [Rank 0] step=1850, skipped=22, lr=[8.621044326011558e-06, 8.621044326011558e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:55:50,825] [INFO] [timer.py:199:stop] epoch=0/micro_step=1850/global_step=1850, RunningAvgSamplesPerSec=107.7581995961723, CurrSamplesPerSec=109.30806060828172, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:55:50,918] [INFO] [logging.py:96:log_dist] [Rank 0] step=1850, skipped=26, lr=[4.469236527444994e-06, 4.469236527444994e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1849|ppo_ep: 1|act_loss: -0.00634765625|cri_loss: 0.00135040283203125|unsuper_loss: 0.0
+average reward score: 3.236328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.73s (75.76%) |Training time=0.46s (19.95%) |Others=0.10 (4.29%)|CurSamplesPerSec=14.01 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1850|ppo_ep: 1|act_loss: 0.01006317138671875|cri_loss: 0.010528564453125|unsuper_loss: 0.0
+average reward score: 3.015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.64%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1851|ppo_ep: 1|act_loss: 0.032867431640625|cri_loss: 0.0192413330078125|unsuper_loss: 0.0
+average reward score: 3.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.28%) |Training time=0.45s (21.15%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1852|ppo_ep: 1|act_loss: 0.02581787109375|cri_loss: 0.0165252685546875|unsuper_loss: 0.0
+average reward score: 3.73046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.50%) |Training time=0.45s (20.93%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1853|ppo_ep: 1|act_loss: -0.04046630859375|cri_loss: -0.01678466796875|unsuper_loss: 0.0
+average reward score: 3.263671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.07%) |Training time=0.46s (21.36%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1854|ppo_ep: 1|act_loss: -0.1556396484375|cri_loss: -0.061798095703125|unsuper_loss: 0.0
+average reward score: 3.73046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.26%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1855|ppo_ep: 1|act_loss: 0.0204010009765625|cri_loss: 0.01122283935546875|unsuper_loss: 0.0
+average reward score: 3.962890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.13%) |Training time=0.46s (21.30%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1856|ppo_ep: 1|act_loss: 0.038818359375|cri_loss: 0.0240478515625|unsuper_loss: 0.0
+average reward score: 2.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.04%) |Training time=0.46s (21.40%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1857|ppo_ep: 1|act_loss: -0.041259765625|cri_loss: -0.0198516845703125|unsuper_loss: 0.0
+average reward score: 4.76953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.49%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1858|ppo_ep: 1|act_loss: 0.1640625|cri_loss: 0.1064453125|unsuper_loss: 0.0
+average reward score: 3.146484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.56%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.42
+[2023-04-14 09:56:12,515] [INFO] [logging.py:96:log_dist] [Rank 0] step=1860, skipped=22, lr=[8.609549533327585e-06, 8.609549533327585e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:56:12,534] [INFO] [timer.py:199:stop] epoch=0/micro_step=1860/global_step=1860, RunningAvgSamplesPerSec=107.75624465773492, CurrSamplesPerSec=104.92407924083149, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:56:12,628] [INFO] [logging.py:96:log_dist] [Rank 0] step=1860, skipped=26, lr=[4.463292327201862e-06, 4.463292327201862e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1859|ppo_ep: 1|act_loss: -0.017120361328125|cri_loss: -0.00815582275390625|unsuper_loss: 0.0
+average reward score: 3.7578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.57%) |Training time=0.47s (20.21%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.77 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1860|ppo_ep: 1|act_loss: 0.0186614990234375|cri_loss: 0.00991058349609375|unsuper_loss: 0.0
+average reward score: 3.82421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.74%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1861|ppo_ep: 1|act_loss: 0.031982421875|cri_loss: 0.0178070068359375|unsuper_loss: 0.0
+average reward score: 4.62109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.83%) |Training time=0.46s (21.60%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1862|ppo_ep: 1|act_loss: 0.03680419921875|cri_loss: 0.020660400390625|unsuper_loss: 0.0
+average reward score: 4.64453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.58s |Gather latency=0.00s (0.00%) |Generate time=1.58s (61.54%) |Training time=0.47s (18.13%) |Others=0.52 (20.32%)|CurSamplesPerSec=12.43 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1863|ppo_ep: 1|act_loss: 0.053924560546875|cri_loss: 0.027862548828125|unsuper_loss: 0.0
+average reward score: 5.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.88%) |Training time=0.47s (21.56%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42
+[2023-04-14 09:56:23,836] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
+epoch: 0|step: 1864|ppo_ep: 1|act_loss: 0.0142364501953125|cri_loss: 0.0084381103515625|unsuper_loss: 0.0
+average reward score: 4.68359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.09%) |Training time=0.47s (21.75%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1865|ppo_ep: 1|act_loss: 0.005218505859375|cri_loss: 0.003391265869140625|unsuper_loss: 0.0
+average reward score: 4.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.47s (21.80%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1866|ppo_ep: 1|act_loss: 0.020599365234375|cri_loss: 0.01151275634765625|unsuper_loss: 0.0
+average reward score: 4.07421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.67%) |Training time=0.47s (21.80%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1867|ppo_ep: 1|act_loss: -0.00788116455078125|cri_loss: -0.00058746337890625|unsuper_loss: 0.0
+average reward score: 4.55078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.98%) |Training time=0.46s (21.45%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1868|ppo_ep: 1|act_loss: 0.010833740234375|cri_loss: 0.00745391845703125|unsuper_loss: 0.0
+average reward score: 4.60546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.98%) |Training time=0.46s (21.45%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42
+[2023-04-14 09:56:34,515] [INFO] [logging.py:96:log_dist] [Rank 0] step=1870, skipped=22, lr=[8.597998644489801e-06, 8.597998644489801e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:56:34,533] [INFO] [timer.py:199:stop] epoch=0/micro_step=1870/global_step=1870, RunningAvgSamplesPerSec=107.7423744536757, CurrSamplesPerSec=106.6015344781027, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:56:34,626] [INFO] [logging.py:96:log_dist] [Rank 0] step=1870, skipped=27, lr=[4.45791766334022e-06, 4.45791766334022e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1869|ppo_ep: 1|act_loss: -0.01727294921875|cri_loss: -0.00601959228515625|unsuper_loss: 0.0
+average reward score: 4.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.46%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1870|ppo_ep: 1|act_loss: -0.072021484375|cri_loss: -0.029998779296875|unsuper_loss: 0.0
+average reward score: 5.0234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.26%) |Training time=0.46s (21.18%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1871|ppo_ep: 1|act_loss: 0.0263671875|cri_loss: 0.014129638671875|unsuper_loss: 0.0
+average reward score: 4.63671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.68%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1872|ppo_ep: 1|act_loss: -0.03436279296875|cri_loss: -0.015869140625|unsuper_loss: 0.0
+average reward score: 4.3671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.74%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1873|ppo_ep: 1|act_loss: 0.00041961669921875|cri_loss: 0.001628875732421875|unsuper_loss: 0.0
+average reward score: 4.16796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.49s |Gather latency=0.00s (0.00%) |Generate time=1.76s (70.73%) |Training time=0.47s (19.01%) |Others=0.26 (10.27%)|CurSamplesPerSec=12.86 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1874|ppo_ep: 1|act_loss: -0.018707275390625|cri_loss: -0.00836181640625|unsuper_loss: 0.0
+average reward score: 5.48046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.45%) |Training time=0.47s (21.99%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1875|ppo_ep: 1|act_loss: 0.03057861328125|cri_loss: 0.0168304443359375|unsuper_loss: 0.0
+average reward score: 4.69140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.97%) |Training time=0.46s (21.45%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1876|ppo_ep: 1|act_loss: 0.0498046875|cri_loss: 0.0269012451171875|unsuper_loss: 0.0
+average reward score: 4.11328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.74%) |Training time=0.47s (21.68%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1877|ppo_ep: 1|act_loss: 0.09320068359375|cri_loss: 0.048553466796875|unsuper_loss: 0.0
+average reward score: 4.85546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.56%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1878|ppo_ep: 1|act_loss: -0.0460205078125|cri_loss: -0.02252197265625|unsuper_loss: 0.0
+average reward score: 4.9921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.47%) |Training time=0.48s (22.04%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.42
+[2023-04-14 09:56:56,563] [INFO] [logging.py:96:log_dist] [Rank 0] step=1880, skipped=22, lr=[8.58639183071024e-06, 8.58639183071024e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:56:56,581] [INFO] [timer.py:199:stop] epoch=0/micro_step=1880/global_step=1880, RunningAvgSamplesPerSec=107.72234280157105, CurrSamplesPerSec=100.53431047965461, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:56:56,674] [INFO] [logging.py:96:log_dist] [Rank 0] step=1880, skipped=27, lr=[4.451918247401336e-06, 4.451918247401336e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1879|ppo_ep: 1|act_loss: -0.04168701171875|cri_loss: -0.0201873779296875|unsuper_loss: 0.0
+average reward score: 4.00390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.57%) |Training time=0.48s (21.13%) |Others=0.10 (4.30%)|CurSamplesPerSec=14.06 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1880|ppo_ep: 1|act_loss: -0.01303863525390625|cri_loss: -0.0061492919921875|unsuper_loss: 0.0
+average reward score: 4.73828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.85%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1881|ppo_ep: 1|act_loss: 0.073974609375|cri_loss: 0.039947509765625|unsuper_loss: 0.0
+average reward score: 4.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.98%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1882|ppo_ep: 1|act_loss: 0.02935791015625|cri_loss: 0.019622802734375|unsuper_loss: 0.0
+average reward score: 4.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.50%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1883|ppo_ep: 1|act_loss: 0.012115478515625|cri_loss: 0.00738525390625|unsuper_loss: 0.0
+average reward score: 4.953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.53%) |Training time=0.47s (21.91%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1884|ppo_ep: 1|act_loss: 0.0086669921875|cri_loss: 0.00626373291015625|unsuper_loss: 0.0
+average reward score: 4.87109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.56%) |Training time=0.47s (21.88%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1885|ppo_ep: 1|act_loss: -0.06195068359375|cri_loss: -0.027618408203125|unsuper_loss: 0.0
+average reward score: 4.78515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.91%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1886|ppo_ep: 1|act_loss: -0.0176544189453125|cri_loss: -0.006420135498046875|unsuper_loss: 0.0
+average reward score: 4.3359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.70%) |Training time=0.47s (21.72%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1887|ppo_ep: 1|act_loss: -0.0256195068359375|cri_loss: -0.010986328125|unsuper_loss: 0.0
+average reward score: 4.16796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.55%) |Training time=0.47s (21.89%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1888|ppo_ep: 1|act_loss: 0.05511474609375|cri_loss: 0.02960205078125|unsuper_loss: 0.0
+average reward score: 4.63671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.82s |Gather latency=0.00s (0.00%) |Generate time=1.76s (62.23%) |Training time=0.47s (16.67%) |Others=0.60 (21.09%)|CurSamplesPerSec=11.34 |AvgSamplesPerSec=14.42
+[2023-04-14 09:57:18,761] [INFO] [logging.py:96:log_dist] [Rank 0] step=1890, skipped=22, lr=[8.574729264029886e-06, 8.574729264029886e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:57:18,779] [INFO] [timer.py:199:stop] epoch=0/micro_step=1890/global_step=1890, RunningAvgSamplesPerSec=107.70136193271539, CurrSamplesPerSec=103.31760270160629, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:57:18,872] [INFO] [logging.py:96:log_dist] [Rank 0] step=1890, skipped=27, lr=[4.44588989932528e-06, 4.44588989932528e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1889|ppo_ep: 1|act_loss: 0.056121826171875|cri_loss: 0.02935791015625|unsuper_loss: 0.0
+average reward score: 4.796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.42%) |Training time=0.47s (22.00%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1890|ppo_ep: 1|act_loss: -0.0290985107421875|cri_loss: -0.0125274658203125|unsuper_loss: 0.0
+average reward score: 4.3515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.13%) |Training time=0.48s (22.31%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1891|ppo_ep: 1|act_loss: 0.17236328125|cri_loss: 0.0926513671875|unsuper_loss: 0.0
+average reward score: 4.29296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.25%) |Training time=0.48s (22.18%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1892|ppo_ep: 1|act_loss: 0.011810302734375|cri_loss: 0.00740814208984375|unsuper_loss: 0.0
+average reward score: 4.796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.65%) |Training time=0.47s (21.77%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1893|ppo_ep: 1|act_loss: 0.0084075927734375|cri_loss: 0.00481414794921875|unsuper_loss: 0.0
+average reward score: 4.796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.18%) |Training time=0.49s (22.25%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1894|ppo_ep: 1|act_loss: 0.087890625|cri_loss: 0.047149658203125|unsuper_loss: 0.0
+average reward score: 4.5859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (22.07%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1895|ppo_ep: 1|act_loss: -0.0259246826171875|cri_loss: -0.012298583984375|unsuper_loss: 0.0
+average reward score: 5.16015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.15%) |Training time=0.48s (22.32%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1896|ppo_ep: 1|act_loss: -0.013397216796875|cri_loss: -0.00591278076171875|unsuper_loss: 0.0
+average reward score: 5.09765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.78%) |Training time=0.49s (22.68%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1897|ppo_ep: 1|act_loss: -0.08074951171875|cri_loss: -0.038360595703125|unsuper_loss: 0.0
+average reward score: 5.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.75%) |Training time=0.49s (22.68%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1898|ppo_ep: 1|act_loss: -0.0638427734375|cri_loss: -0.029876708984375|unsuper_loss: 0.0
+average reward score: 5.47265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.53%) |Training time=0.50s (22.97%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.42
+[2023-04-14 09:57:40,428] [INFO] [logging.py:96:log_dist] [Rank 0] step=1900, skipped=22, lr=[8.563011117316109e-06, 8.563011117316109e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:57:40,446] [INFO] [timer.py:199:stop] epoch=0/micro_step=1900/global_step=1900, RunningAvgSamplesPerSec=107.65234667115939, CurrSamplesPerSec=96.84231002618438, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:57:40,796] [INFO] [logging.py:96:log_dist] [Rank 0] step=1900, skipped=27, lr=[4.439832708466712e-06, 4.439832708466712e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1899|ppo_ep: 1|act_loss: -0.0599365234375|cri_loss: -0.029296875|unsuper_loss: 0.0
+average reward score: 4.40234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.43s |Gather latency=0.00s (0.00%) |Generate time=1.58s (65.12%) |Training time=0.75s (30.80%) |Others=0.10 (4.09%)|CurSamplesPerSec=13.16 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1900|ppo_ep: 1|act_loss: 0.00838470458984375|cri_loss: 0.00495147705078125|unsuper_loss: 0.0
+average reward score: 5.15625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.84%) |Training time=0.50s (22.66%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1901|ppo_ep: 1|act_loss: 0.01554107666015625|cri_loss: 0.00830841064453125|unsuper_loss: 0.0
+average reward score: 4.25
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.47%) |Training time=0.46s (20.99%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1902|ppo_ep: 1|act_loss: 0.082763671875|cri_loss: 0.04510498046875|unsuper_loss: 0.0
+average reward score: 4.671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.23%) |Training time=0.46s (21.23%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1903|ppo_ep: 1|act_loss: 0.08160400390625|cri_loss: 0.04412841796875|unsuper_loss: 0.0
+average reward score: 4.390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.03%) |Training time=0.49s (20.81%) |Others=0.10 (4.16%)|CurSamplesPerSec=13.48 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1904|ppo_ep: 1|act_loss: 0.004364013671875|cri_loss: 0.0028858184814453125|unsuper_loss: 0.0
+average reward score: 4.6640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.16%) |Training time=0.46s (21.26%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1905|ppo_ep: 1|act_loss: 0.008758544921875|cri_loss: 0.005126953125|unsuper_loss: 0.0
+average reward score: 4.859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.83%) |Training time=0.45s (20.64%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1906|ppo_ep: 1|act_loss: -0.04571533203125|cri_loss: -0.0203399658203125|unsuper_loss: 0.0
+average reward score: 4.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.63s (70.66%) |Training time=0.45s (19.45%) |Others=0.23 (9.89%)|CurSamplesPerSec=13.85 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1907|ppo_ep: 1|act_loss: -0.0204315185546875|cri_loss: -0.00977325439453125|unsuper_loss: 0.0
+average reward score: 4.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.66s (74.35%) |Training time=0.47s (21.17%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.35 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1908|ppo_ep: 1|act_loss: -0.0234527587890625|cri_loss: -0.011016845703125|unsuper_loss: 0.0
+average reward score: 4.91015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.73s (75.05%) |Training time=0.48s (20.64%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.88 |AvgSamplesPerSec=14.42
+[2023-04-14 09:58:02,983] [INFO] [logging.py:96:log_dist] [Rank 0] step=1910, skipped=22, lr=[8.551237564260112e-06, 8.551237564260112e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:58:03,001] [INFO] [timer.py:199:stop] epoch=0/micro_step=1910/global_step=1910, RunningAvgSamplesPerSec=107.64097658565429, CurrSamplesPerSec=115.11625720882522, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:58:03,094] [INFO] [logging.py:96:log_dist] [Rank 0] step=1910, skipped=27, lr=[4.433746764607812e-06, 4.433746764607812e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1909|ppo_ep: 1|act_loss: -0.018829345703125|cri_loss: -0.00876617431640625|unsuper_loss: 0.0
+average reward score: 4.64453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.14%) |Training time=0.44s (20.34%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1910|ppo_ep: 1|act_loss: -0.02069091796875|cri_loss: -0.00942230224609375|unsuper_loss: 0.0
+average reward score: 4.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.48%) |Training time=0.48s (22.00%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1911|ppo_ep: 1|act_loss: 0.02587890625|cri_loss: 0.0135345458984375|unsuper_loss: 0.0
+average reward score: 4.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.89%) |Training time=0.47s (21.57%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1912|ppo_ep: 1|act_loss: 0.01739501953125|cri_loss: 0.0093841552734375|unsuper_loss: 0.0
+average reward score: 4.296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.27%) |Training time=0.48s (22.20%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1913|ppo_ep: 1|act_loss: 0.05316162109375|cri_loss: 0.0273284912109375|unsuper_loss: 0.0
+average reward score: 3.9609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.60%) |Training time=0.47s (21.87%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1914|ppo_ep: 1|act_loss: 0.0217742919921875|cri_loss: 0.01145172119140625|unsuper_loss: 0.0
+average reward score: 4.5546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.77%) |Training time=0.47s (21.72%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1915|ppo_ep: 1|act_loss: -0.00406646728515625|cri_loss: -0.0015735626220703125|unsuper_loss: 0.0
+average reward score: 4.796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.95%) |Training time=0.47s (21.52%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1916|ppo_ep: 1|act_loss: 0.0229339599609375|cri_loss: 0.01293182373046875|unsuper_loss: 0.0
+average reward score: 4.30859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.86%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1917|ppo_ep: 1|act_loss: -0.03863525390625|cri_loss: -0.01763916015625|unsuper_loss: 0.0
+average reward score: 4.296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.17%) |Training time=0.51s (21.66%) |Others=0.10 (4.17%)|CurSamplesPerSec=13.53 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1918|ppo_ep: 1|act_loss: 0.020843505859375|cri_loss: 0.0109405517578125|unsuper_loss: 0.0
+average reward score: 4.45703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.28%) |Training time=0.48s (22.18%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.42
+[2023-04-14 09:58:24,899] [INFO] [logging.py:96:log_dist] [Rank 0] step=1920, skipped=22, lr=[8.539408779374354e-06, 8.539408779374354e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:58:24,917] [INFO] [timer.py:199:stop] epoch=0/micro_step=1920/global_step=1920, RunningAvgSamplesPerSec=107.6049387338173, CurrSamplesPerSec=101.45643165445873, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:58:25,010] [INFO] [logging.py:96:log_dist] [Rank 0] step=1920, skipped=27, lr=[4.427632157956951e-06, 4.427632157956951e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1919|ppo_ep: 1|act_loss: -0.022369384765625|cri_loss: -0.01059722900390625|unsuper_loss: 0.0
+average reward score: 4.63671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (22.10%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1920|ppo_ep: 1|act_loss: -0.0789794921875|cri_loss: -0.0374755859375|unsuper_loss: 0.0
+average reward score: 4.06640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.51s |Gather latency=0.00s (0.00%) |Generate time=1.61s (64.09%) |Training time=0.49s (19.47%) |Others=0.41 (16.44%)|CurSamplesPerSec=12.77 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1921|ppo_ep: 1|act_loss: -0.027984619140625|cri_loss: -0.0128326416015625|unsuper_loss: 0.0
+average reward score: 4.7109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.48s (22.00%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1922|ppo_ep: 1|act_loss: 0.013946533203125|cri_loss: 0.0087738037109375|unsuper_loss: 0.0
+average reward score: 4.6796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.48s (22.03%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1923|ppo_ep: 1|act_loss: 0.020965576171875|cri_loss: 0.011932373046875|unsuper_loss: 0.0
+average reward score: 4.68359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.60%) |Training time=0.48s (21.88%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1924|ppo_ep: 1|act_loss: 0.0313720703125|cri_loss: 0.021392822265625|unsuper_loss: 0.0
+average reward score: 4.8984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.33%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1925|ppo_ep: 1|act_loss: 0.02484130859375|cri_loss: 0.01285552978515625|unsuper_loss: 0.0
+average reward score: 4.859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.51%) |Training time=0.47s (21.92%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1926|ppo_ep: 1|act_loss: 0.09423828125|cri_loss: 0.052276611328125|unsuper_loss: 0.0
+average reward score: 4.55859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.76%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1927|ppo_ep: 1|act_loss: -0.0212249755859375|cri_loss: -0.0092926025390625|unsuper_loss: 0.0
+average reward score: 4.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (22.03%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1928|ppo_ep: 1|act_loss: -0.0222015380859375|cri_loss: -0.01053619384765625|unsuper_loss: 0.0
+average reward score: 4.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.73%) |Training time=0.47s (21.70%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.42
+[2023-04-14 09:58:46,890] [INFO] [logging.py:96:log_dist] [Rank 0] step=1930, skipped=22, lr=[8.527524937989964e-06, 8.527524937989964e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:58:46,909] [INFO] [timer.py:199:stop] epoch=0/micro_step=1930/global_step=1930, RunningAvgSamplesPerSec=107.57664485953372, CurrSamplesPerSec=101.2196243603908, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:58:47,001] [INFO] [logging.py:96:log_dist] [Rank 0] step=1930, skipped=27, lr=[4.421488979147349e-06, 4.421488979147349e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1929|ppo_ep: 1|act_loss: -0.042266845703125|cri_loss: -0.0194244384765625|unsuper_loss: 0.0
+average reward score: 4.67578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.32%) |Training time=0.48s (22.14%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1930|ppo_ep: 1|act_loss: -0.01319122314453125|cri_loss: -0.00580596923828125|unsuper_loss: 0.0
+average reward score: 4.4921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.64%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1931|ppo_ep: 1|act_loss: -0.0440673828125|cri_loss: -0.019989013671875|unsuper_loss: 0.0
+average reward score: 4.90234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.88%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1932|ppo_ep: 1|act_loss: -0.04925537109375|cri_loss: -0.0204315185546875|unsuper_loss: 0.0
+average reward score: 4.1171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.38%) |Training time=0.47s (20.38%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.78 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1933|ppo_ep: 1|act_loss: 0.0242156982421875|cri_loss: 0.01245880126953125|unsuper_loss: 0.0
+average reward score: 4.515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.34%) |Training time=0.48s (22.10%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1934|ppo_ep: 1|act_loss: 0.057586669921875|cri_loss: 0.030181884765625|unsuper_loss: 0.0
+average reward score: 4.71484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.40%) |Training time=0.48s (22.04%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1935|ppo_ep: 1|act_loss: 0.0548095703125|cri_loss: 0.0284881591796875|unsuper_loss: 0.0
+average reward score: 4.77734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.07s |Gather latency=0.00s (0.00%) |Generate time=1.58s (76.53%) |Training time=0.39s (18.69%) |Others=0.10 (4.77%)|CurSamplesPerSec=15.49 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1936|ppo_ep: 1|act_loss: 0.04010009765625|cri_loss: 0.02294921875|unsuper_loss: 0.0
+average reward score: 4.83203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.56%) |Training time=0.48s (21.92%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1937|ppo_ep: 1|act_loss: 0.0172882080078125|cri_loss: 0.009521484375|unsuper_loss: 0.0
+average reward score: 4.47265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.67%) |Training time=0.47s (20.98%) |Others=0.10 (4.35%)|CurSamplesPerSec=14.16 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1938|ppo_ep: 1|act_loss: -0.036041259765625|cri_loss: -0.0171966552734375|unsuper_loss: 0.0
+average reward score: 4.4453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.35%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.42
+[2023-04-14 09:59:08,767] [INFO] [logging.py:96:log_dist] [Rank 0] step=1940, skipped=22, lr=[8.515586216254137e-06, 8.515586216254137e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:59:08,785] [INFO] [timer.py:199:stop] epoch=0/micro_step=1940/global_step=1940, RunningAvgSamplesPerSec=107.57414139164379, CurrSamplesPerSec=109.40669624006952, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:59:08,878] [INFO] [logging.py:96:log_dist] [Rank 0] step=1940, skipped=27, lr=[4.4153173192357375e-06, 4.4153173192357375e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1939|ppo_ep: 1|act_loss: -0.020263671875|cri_loss: -0.0081634521484375|unsuper_loss: 0.0
+average reward score: 4.7109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.36%) |Training time=0.46s (21.09%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1940|ppo_ep: 1|act_loss: 0.0053558349609375|cri_loss: 0.003795623779296875|unsuper_loss: 0.0
+average reward score: 4.44140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.89%) |Training time=0.46s (21.55%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43
+epoch: 0|step: 1941|ppo_ep: 1|act_loss: -0.0266265869140625|cri_loss: -0.0120849609375|unsuper_loss: 0.0
+average reward score: 4.31640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.85%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43
+epoch: 0|step: 1942|ppo_ep: 1|act_loss: 0.00569915771484375|cri_loss: 0.003177642822265625|unsuper_loss: 0.0
+average reward score: 4.33984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.95%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.43
+epoch: 0|step: 1943|ppo_ep: 1|act_loss: 0.00244903564453125|cri_loss: 0.0018787384033203125|unsuper_loss: 0.0
+average reward score: 4.98828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.48s (22.08%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43
+epoch: 0|step: 1944|ppo_ep: 1|act_loss: 0.02032470703125|cri_loss: 0.01052093505859375|unsuper_loss: 0.0
+average reward score: 4.91015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.46s |Gather latency=0.00s (0.00%) |Generate time=1.59s (64.58%) |Training time=0.48s (19.36%) |Others=0.39 (16.06%)|CurSamplesPerSec=13.03 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1945|ppo_ep: 1|act_loss: 0.0038471221923828125|cri_loss: 0.0023193359375|unsuper_loss: 0.0
+average reward score: 4.13671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.22%) |Training time=0.48s (22.21%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.43
+epoch: 0|step: 1946|ppo_ep: 1|act_loss: -0.00514984130859375|cri_loss: -0.002071380615234375|unsuper_loss: 0.0
+average reward score: 4.83984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.42%) |Training time=0.47s (22.02%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43
+epoch: 0|step: 1947|ppo_ep: 1|act_loss: -0.0303497314453125|cri_loss: -0.0145416259765625|unsuper_loss: 0.0
+average reward score: 4.73828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.12%) |Training time=0.48s (20.65%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.43
+epoch: 0|step: 1948|ppo_ep: 1|act_loss: -0.0226287841796875|cri_loss: -0.0107574462890625|unsuper_loss: 0.0
+average reward score: 4.5859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.19%) |Training time=0.48s (22.23%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.43
+[2023-04-14 09:59:30,807] [INFO] [logging.py:96:log_dist] [Rank 0] step=1950, skipped=22, lr=[8.50359279112753e-06, 8.50359279112753e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:59:30,826] [INFO] [timer.py:199:stop] epoch=0/micro_step=1950/global_step=1950, RunningAvgSamplesPerSec=107.54580213604925, CurrSamplesPerSec=102.16569360734277, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:59:30,918] [INFO] [logging.py:96:log_dist] [Rank 0] step=1950, skipped=27, lr=[4.4091172697010025e-06, 4.4091172697010025e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1949|ppo_ep: 1|act_loss: -0.0047607421875|cri_loss: -0.001953125|unsuper_loss: 0.0
+average reward score: 4.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.40%) |Training time=0.48s (22.07%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43
+epoch: 0|step: 1950|ppo_ep: 1|act_loss: 0.0032978057861328125|cri_loss: 0.002613067626953125|unsuper_loss: 0.0
+average reward score: 4.453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.29%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.43
+epoch: 0|step: 1951|ppo_ep: 1|act_loss: -0.009979248046875|cri_loss: -0.00467681884765625|unsuper_loss: 0.0
+average reward score: 5.2734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.30%) |Training time=0.48s (22.15%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43
+epoch: 0|step: 1952|ppo_ep: 1|act_loss: 0.033477783203125|cri_loss: 0.017303466796875|unsuper_loss: 0.0
+average reward score: 5.07421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.40%) |Training time=0.48s (22.04%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.43
+epoch: 0|step: 1953|ppo_ep: 1|act_loss: -0.0017824172973632812|cri_loss: -0.000713348388671875|unsuper_loss: 0.0
+average reward score: 4.69921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.85%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43
+epoch: 0|step: 1954|ppo_ep: 1|act_loss: 0.042510986328125|cri_loss: 0.022857666015625|unsuper_loss: 0.0
+average reward score: 4.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.48s (21.96%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.43
+epoch: 0|step: 1955|ppo_ep: 1|act_loss: 0.01194000244140625|cri_loss: 0.0062408447265625|unsuper_loss: 0.0
+average reward score: 4.44140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.03%) |Training time=0.48s (22.42%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.43
+epoch: 0|step: 1956|ppo_ep: 1|act_loss: -0.025299072265625|cri_loss: -0.0088348388671875|unsuper_loss: 0.0
+average reward score: 3.568359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.89s |Gather latency=0.00s (0.00%) |Generate time=1.58s (54.65%) |Training time=0.48s (16.54%) |Others=0.83 (28.80%)|CurSamplesPerSec=11.08 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1957|ppo_ep: 1|act_loss: -0.0439453125|cri_loss: -0.02105712890625|unsuper_loss: 0.0
+average reward score: 4.96484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.86%) |Training time=0.49s (22.59%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1958|ppo_ep: 1|act_loss: -0.02569580078125|cri_loss: -0.0119781494140625|unsuper_loss: 0.0
+average reward score: 4.5
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.85%) |Training time=0.49s (22.60%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.42
+[2023-04-14 09:59:53,126] [INFO] [logging.py:96:log_dist] [Rank 0] step=1960, skipped=22, lr=[8.491544840381637e-06, 8.491544840381637e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 09:59:53,144] [INFO] [timer.py:199:stop] epoch=0/micro_step=1960/global_step=1960, RunningAvgSamplesPerSec=107.5075454772204, CurrSamplesPerSec=98.59728841847918, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 09:59:53,237] [INFO] [logging.py:96:log_dist] [Rank 0] step=1960, skipped=27, lr=[4.4028889224428365e-06, 4.4028889224428365e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1959|ppo_ep: 1|act_loss: -0.0106353759765625|cri_loss: -0.004428863525390625|unsuper_loss: 0.0
+average reward score: 5.08203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.79%) |Training time=0.49s (22.67%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1960|ppo_ep: 1|act_loss: -0.01139068603515625|cri_loss: -0.005153656005859375|unsuper_loss: 0.0
+average reward score: 4.3515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.68%) |Training time=0.49s (22.77%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.43
+epoch: 0|step: 1961|ppo_ep: 1|act_loss: 0.00475311279296875|cri_loss: 0.0025424957275390625|unsuper_loss: 0.0
+average reward score: 4.65625
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.80%) |Training time=0.49s (20.97%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.79 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1962|ppo_ep: 1|act_loss: 0.01065826416015625|cri_loss: 0.007080078125|unsuper_loss: 0.0
+average reward score: 4.80078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.96%) |Training time=0.49s (22.50%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.43
+epoch: 0|step: 1963|ppo_ep: 1|act_loss: -0.0230560302734375|cri_loss: -0.0100250244140625|unsuper_loss: 0.0
+average reward score: 4.3203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.91%) |Training time=0.49s (22.54%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43
+epoch: 0|step: 1964|ppo_ep: 1|act_loss: -0.007442474365234375|cri_loss: -0.0033111572265625|unsuper_loss: 0.0
+average reward score: 4.89453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.69%) |Training time=0.49s (22.46%) |Others=0.10 (4.85%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.43
+epoch: 0|step: 1965|ppo_ep: 1|act_loss: -0.0241851806640625|cri_loss: -0.0106658935546875|unsuper_loss: 0.0
+average reward score: 4.6171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.12%) |Training time=0.49s (22.36%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.43
+epoch: 0|step: 1966|ppo_ep: 1|act_loss: 0.024444580078125|cri_loss: 0.0130615234375|unsuper_loss: 0.0
+average reward score: 4.6796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.67s (74.12%) |Training time=0.49s (21.54%) |Others=0.10 (4.35%)|CurSamplesPerSec=14.18 |AvgSamplesPerSec=14.43
+epoch: 0|step: 1967|ppo_ep: 1|act_loss: 0.037841796875|cri_loss: 0.02105712890625|unsuper_loss: 0.0
+average reward score: 4.70703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.87s |Gather latency=0.00s (0.00%) |Generate time=1.57s (54.79%) |Training time=0.49s (17.24%) |Others=0.80 (27.96%)|CurSamplesPerSec=11.16 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1968|ppo_ep: 1|act_loss: 0.021453857421875|cri_loss: 0.01126861572265625|unsuper_loss: 0.0
+average reward score: 4.43359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.02%) |Training time=0.47s (21.43%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.42
+[2023-04-14 10:00:15,737] [INFO] [logging.py:96:log_dist] [Rank 0] step=1970, skipped=22, lr=[8.479442542596152e-06, 8.479442542596152e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:00:15,755] [INFO] [timer.py:199:stop] epoch=0/micro_step=1970/global_step=1970, RunningAvgSamplesPerSec=107.4582436204539, CurrSamplesPerSec=94.59606963662851, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:00:15,847] [INFO] [logging.py:96:log_dist] [Rank 0] step=1970, skipped=27, lr=[4.39663236978037e-06, 4.39663236978037e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1969|ppo_ep: 1|act_loss: 0.04022216796875|cri_loss: 0.022247314453125|unsuper_loss: 0.0
+average reward score: 4.65234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.56%) |Training time=0.50s (22.96%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1970|ppo_ep: 1|act_loss: 0.002838134765625|cri_loss: 0.002742767333984375|unsuper_loss: 0.0
+average reward score: 4.7578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.26%) |Training time=0.51s (23.23%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1971|ppo_ep: 1|act_loss: 0.0199432373046875|cri_loss: 0.0103759765625|unsuper_loss: 0.0
+average reward score: 4.5
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.22%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1972|ppo_ep: 1|act_loss: 0.04071044921875|cri_loss: 0.0208740234375|unsuper_loss: 0.0
+average reward score: 4.66015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.94%) |Training time=0.49s (22.53%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1973|ppo_ep: 1|act_loss: -0.02496337890625|cri_loss: -0.011505126953125|unsuper_loss: 0.0
+average reward score: 4.25
+-------------------------------------------------------------------------------------
+|E2E latency=2.51s |Gather latency=0.00s (0.00%) |Generate time=1.58s (62.97%) |Training time=0.49s (19.54%) |Others=0.44 (17.49%)|CurSamplesPerSec=12.73 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1974|ppo_ep: 1|act_loss: -0.0089111328125|cri_loss: -0.00240325927734375|unsuper_loss: 0.0
+average reward score: 4.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.12%) |Training time=0.48s (22.33%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1975|ppo_ep: 1|act_loss: 0.035888671875|cri_loss: 0.019439697265625|unsuper_loss: 0.0
+average reward score: 4.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.63%) |Training time=0.50s (22.83%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1976|ppo_ep: 1|act_loss: -0.0299835205078125|cri_loss: -0.0144195556640625|unsuper_loss: 0.0
+average reward score: 4.828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.02%) |Training time=0.48s (22.41%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1977|ppo_ep: 1|act_loss: 0.12469482421875|cri_loss: 0.06622314453125|unsuper_loss: 0.0
+average reward score: 4.9140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.34%) |Training time=0.51s (21.49%) |Others=0.10 (4.17%)|CurSamplesPerSec=13.59 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1978|ppo_ep: 1|act_loss: 0.00582122802734375|cri_loss: 0.00420379638671875|unsuper_loss: 0.0
+average reward score: 4.8125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.79%) |Training time=0.49s (22.67%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42
+[2023-04-14 10:00:37,960] [INFO] [logging.py:96:log_dist] [Rank 0] step=1980, skipped=22, lr=[8.467286077156324e-06, 8.467286077156324e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:00:37,978] [INFO] [timer.py:199:stop] epoch=0/micro_step=1980/global_step=1980, RunningAvgSamplesPerSec=107.40380143627976, CurrSamplesPerSec=102.09924531086331, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:00:38,071] [INFO] [logging.py:96:log_dist] [Rank 0] step=1980, skipped=27, lr=[4.3903477044508066e-06, 4.3903477044508066e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1979|ppo_ep: 1|act_loss: -0.00296783447265625|cri_loss: -0.0011501312255859375|unsuper_loss: 0.0
+average reward score: 4.5234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.59s (69.84%) |Training time=0.48s (20.97%) |Others=0.21 (9.19%)|CurSamplesPerSec=14.10 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1980|ppo_ep: 1|act_loss: -0.04632568359375|cri_loss: -0.021453857421875|unsuper_loss: 0.0
+average reward score: 4.54296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.48s (22.06%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1981|ppo_ep: 1|act_loss: 0.01451873779296875|cri_loss: 0.0079803466796875|unsuper_loss: 0.0
+average reward score: 4.2109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.48s (22.09%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1982|ppo_ep: 1|act_loss: -0.0458984375|cri_loss: -0.02252197265625|unsuper_loss: 0.0
+average reward score: 4.80859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.48s (22.41%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1983|ppo_ep: 1|act_loss: 0.006771087646484375|cri_loss: 0.0038604736328125|unsuper_loss: 0.0
+average reward score: 5.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.57%) |Training time=0.48s (21.90%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1984|ppo_ep: 1|act_loss: 0.028411865234375|cri_loss: 0.0149078369140625|unsuper_loss: 0.0
+average reward score: 4.8515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.30%) |Training time=0.48s (22.15%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1985|ppo_ep: 1|act_loss: -0.0034332275390625|cri_loss: -0.0011224746704101562|unsuper_loss: 0.0
+average reward score: 4.21875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.48s (22.41%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1986|ppo_ep: 1|act_loss: 0.025604248046875|cri_loss: 0.01398468017578125|unsuper_loss: 0.0
+average reward score: 4.76171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.25%) |Training time=0.48s (22.18%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1987|ppo_ep: 1|act_loss: 0.01654052734375|cri_loss: 0.00942230224609375|unsuper_loss: 0.0
+average reward score: 4.671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.24%) |Training time=0.48s (22.20%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1988|ppo_ep: 1|act_loss: -0.04742431640625|cri_loss: -0.0215301513671875|unsuper_loss: 0.0
+average reward score: 5.08984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.08%) |Training time=0.48s (22.36%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.42
+[2023-04-14 10:00:59,653] [INFO] [logging.py:96:log_dist] [Rank 0] step=1990, skipped=22, lr=[8.455075624250293e-06, 8.455075624250293e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:00:59,671] [INFO] [timer.py:199:stop] epoch=0/micro_step=1990/global_step=1990, RunningAvgSamplesPerSec=107.3690434626594, CurrSamplesPerSec=99.88764347649415, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:00:59,764] [INFO] [logging.py:96:log_dist] [Rank 0] step=1990, skipped=27, lr=[4.3840350196080485e-06, 4.3840350196080485e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1989|ppo_ep: 1|act_loss: 0.0305023193359375|cri_loss: 0.017486572265625|unsuper_loss: 0.0
+average reward score: 4.38671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.05%) |Training time=0.48s (22.40%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43
+epoch: 0|step: 1990|ppo_ep: 1|act_loss: 0.03271484375|cri_loss: 0.017822265625|unsuper_loss: 0.0
+average reward score: 4.95703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.24%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.43
+epoch: 0|step: 1991|ppo_ep: 1|act_loss: 0.031494140625|cri_loss: 0.0171051025390625|unsuper_loss: 0.0
+average reward score: 4.25390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.78%) |Training time=0.49s (22.66%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.43
+epoch: 0|step: 1992|ppo_ep: 1|act_loss: -0.0189361572265625|cri_loss: -0.00396728515625|unsuper_loss: 0.0
+average reward score: 4.7265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.89%) |Training time=0.49s (22.55%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.43
+epoch: 0|step: 1993|ppo_ep: 1|act_loss: -0.026123046875|cri_loss: -0.011444091796875|unsuper_loss: 0.0
+average reward score: 4.34765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.57s |Gather latency=0.00s (0.00%) |Generate time=1.74s (67.83%) |Training time=0.49s (19.00%) |Others=0.34 (13.16%)|CurSamplesPerSec=12.47 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1994|ppo_ep: 1|act_loss: 0.00157928466796875|cri_loss: 0.0015621185302734375|unsuper_loss: 0.0
+average reward score: 4.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.02%) |Training time=0.49s (22.46%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1995|ppo_ep: 1|act_loss: 0.04425048828125|cri_loss: 0.022979736328125|unsuper_loss: 0.0
+average reward score: 4.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.68s (74.22%) |Training time=0.48s (21.39%) |Others=0.10 (4.39%)|CurSamplesPerSec=14.12 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1996|ppo_ep: 1|act_loss: 0.025238037109375|cri_loss: 0.01305389404296875|unsuper_loss: 0.0
+average reward score: 4.8046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.04%) |Training time=0.48s (22.40%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1997|ppo_ep: 1|act_loss: 0.02642822265625|cri_loss: 0.01448822021484375|unsuper_loss: 0.0
+average reward score: 4.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.11%) |Training time=0.48s (22.33%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.42
+epoch: 0|step: 1998|ppo_ep: 1|act_loss: 0.01488494873046875|cri_loss: 0.0079498291015625|unsuper_loss: 0.0
+average reward score: 4.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.91%) |Training time=0.47s (21.54%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.43
+[2023-04-14 10:01:21,774] [INFO] [logging.py:96:log_dist] [Rank 0] step=2000, skipped=22, lr=[8.442811364866433e-06, 8.442811364866433e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:01:21,793] [INFO] [timer.py:199:stop] epoch=0/micro_step=2000/global_step=2000, RunningAvgSamplesPerSec=107.33059698092995, CurrSamplesPerSec=98.97223170601313, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:01:21,885] [INFO] [logging.py:96:log_dist] [Rank 0] step=2000, skipped=27, lr=[4.3776944088213124e-06, 4.3776944088213124e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 1999|ppo_ep: 1|act_loss: -0.0130462646484375|cri_loss: -0.005878448486328125|unsuper_loss: 0.0
+average reward score: 4.984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.02%) |Training time=0.49s (22.45%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2000|ppo_ep: 1|act_loss: 0.0110015869140625|cri_loss: 0.00705718994140625|unsuper_loss: 0.0
+average reward score: 4.8046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.08%) |Training time=0.48s (22.37%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2001|ppo_ep: 1|act_loss: -0.0264434814453125|cri_loss: -0.012451171875|unsuper_loss: 0.0
+average reward score: 4.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.26%) |Training time=0.48s (22.19%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2002|ppo_ep: 1|act_loss: -0.00392913818359375|cri_loss: 7.2479248046875e-05|unsuper_loss: 0.0
+average reward score: 5.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.20%) |Training time=0.49s (22.28%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2003|ppo_ep: 1|act_loss: -0.0116119384765625|cri_loss: -0.00412750244140625|unsuper_loss: 0.0
+average reward score: 4.71875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.08%) |Training time=0.48s (22.37%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2004|ppo_ep: 1|act_loss: 0.0240478515625|cri_loss: 0.01318359375|unsuper_loss: 0.0
+average reward score: 5.05078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.24%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2005|ppo_ep: 1|act_loss: -0.0154266357421875|cri_loss: -0.005596160888671875|unsuper_loss: 0.0
+average reward score: 5.5
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.97%) |Training time=0.48s (22.49%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2006|ppo_ep: 1|act_loss: -0.040557861328125|cri_loss: -0.0189056396484375|unsuper_loss: 0.0
+average reward score: 5.0
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.83%) |Training time=0.49s (22.62%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.43
+[2023-04-14 10:01:39,335] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 2007|ppo_ep: 1|act_loss: -0.044219970703125|cri_loss: -0.0193023681640625|unsuper_loss: 0.0
+average reward score: 5.51171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.43s |Gather latency=0.00s (0.00%) |Generate time=1.63s (67.23%) |Training time=0.59s (24.41%) |Others=0.20 (8.35%)|CurSamplesPerSec=13.17 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2008|ppo_ep: 1|act_loss: -0.019622802734375|cri_loss: -0.0090789794921875|unsuper_loss: 0.0
+average reward score: 5.20703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.03%) |Training time=0.48s (22.41%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43
+[2023-04-14 10:01:43,655] [INFO] [logging.py:96:log_dist] [Rank 0] step=2010, skipped=22, lr=[8.43049348079065e-06, 8.43049348079065e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:01:43,673] [INFO] [timer.py:199:stop] epoch=0/micro_step=2010/global_step=2010, RunningAvgSamplesPerSec=107.27663857336522, CurrSamplesPerSec=97.40335191638643, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:01:43,766] [INFO] [logging.py:96:log_dist] [Rank 0] step=2010, skipped=28, lr=[4.37196406009969e-06, 4.37196406009969e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2009|ppo_ep: 1|act_loss: -0.005809783935546875|cri_loss: -0.000865936279296875|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.76%) |Training time=0.49s (22.70%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2010|ppo_ep: 1|act_loss: -0.00772857666015625|cri_loss: -0.00311279296875|unsuper_loss: 0.0
+average reward score: 5.61328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.23%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2011|ppo_ep: 1|act_loss: -0.00501251220703125|cri_loss: -0.0017919540405273438|unsuper_loss: 0.0
+average reward score: 5.13671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.82%) |Training time=0.49s (22.63%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2012|ppo_ep: 1|act_loss: 0.0738525390625|cri_loss: 0.038116455078125|unsuper_loss: 0.0
+average reward score: 5.47265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.04%) |Training time=0.48s (22.40%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2013|ppo_ep: 1|act_loss: 0.0192413330078125|cri_loss: 0.0116424560546875|unsuper_loss: 0.0
+average reward score: 4.9765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.33%) |Training time=0.51s (23.15%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2014|ppo_ep: 1|act_loss: 0.0540771484375|cri_loss: 0.029754638671875|unsuper_loss: 0.0
+average reward score: 5.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.17%) |Training time=0.51s (23.33%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2015|ppo_ep: 1|act_loss: -0.0133514404296875|cri_loss: -0.00567626953125|unsuper_loss: 0.0
+average reward score: 5.25
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.29%) |Training time=0.45s (20.97%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2016|ppo_ep: 1|act_loss: 0.014251708984375|cri_loss: 0.00818634033203125|unsuper_loss: 0.0
+average reward score: 5.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.17%) |Training time=0.42s (19.22%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2017|ppo_ep: 1|act_loss: -0.0249481201171875|cri_loss: -0.0024261474609375|unsuper_loss: 0.0
+average reward score: 5.4453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.59%) |Training time=0.46s (20.89%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2018|ppo_ep: 1|act_loss: 0.0255126953125|cri_loss: 0.013580322265625|unsuper_loss: 0.0
+average reward score: 5.70703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.35%) |Training time=0.46s (21.09%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.43
+[2023-04-14 10:02:05,348] [INFO] [logging.py:96:log_dist] [Rank 0] step=2020, skipped=22, lr=[8.418122154603703e-06, 8.418122154603703e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:02:05,367] [INFO] [timer.py:199:stop] epoch=0/micro_step=2020/global_step=2020, RunningAvgSamplesPerSec=107.26067687682459, CurrSamplesPerSec=111.90816404803911, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:02:05,459] [INFO] [logging.py:96:log_dist] [Rank 0] step=2020, skipped=28, lr=[4.36557064928396e-06, 4.36557064928396e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2019|ppo_ep: 1|act_loss: 0.04864501953125|cri_loss: 0.0251312255859375|unsuper_loss: 0.0
+average reward score: 5.21875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.70%) |Training time=0.45s (20.75%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2020|ppo_ep: 1|act_loss: -0.04766845703125|cri_loss: -0.0232086181640625|unsuper_loss: 0.0
+average reward score: 5.12890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.46s (21.27%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2021|ppo_ep: 1|act_loss: 0.019683837890625|cri_loss: 0.011077880859375|unsuper_loss: 0.0
+average reward score: 4.96484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.69%) |Training time=0.45s (20.62%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2022|ppo_ep: 1|act_loss: 0.02667236328125|cri_loss: 0.018280029296875|unsuper_loss: 0.0
+average reward score: 5.0859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.46s |Gather latency=0.00s (0.00%) |Generate time=1.78s (72.36%) |Training time=0.46s (18.90%) |Others=0.21 (8.74%)|CurSamplesPerSec=13.01 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2023|ppo_ep: 1|act_loss: -0.0203704833984375|cri_loss: -0.00766754150390625|unsuper_loss: 0.0
+average reward score: 4.828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.67%) |Training time=0.47s (21.77%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2024|ppo_ep: 1|act_loss: -0.0394287109375|cri_loss: -0.018646240234375|unsuper_loss: 0.0
+average reward score: 5.32421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.62s (72.14%) |Training time=0.53s (23.49%) |Others=0.10 (4.37%)|CurSamplesPerSec=14.25 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2025|ppo_ep: 1|act_loss: 0.00598907470703125|cri_loss: 0.00673675537109375|unsuper_loss: 0.0
+average reward score: 5.546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.39%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2026|ppo_ep: 1|act_loss: -0.01861572265625|cri_loss: -0.0078582763671875|unsuper_loss: 0.0
+average reward score: 5.20703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.08%) |Training time=0.46s (21.32%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2027|ppo_ep: 1|act_loss: 0.064453125|cri_loss: 0.033477783203125|unsuper_loss: 0.0
+average reward score: 5.52734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.28%) |Training time=0.45s (21.10%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2028|ppo_ep: 1|act_loss: 0.01007080078125|cri_loss: 0.00605010986328125|unsuper_loss: 0.0
+average reward score: 5.69921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.27%) |Training time=0.48s (22.21%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.43
+[2023-04-14 10:02:27,376] [INFO] [logging.py:96:log_dist] [Rank 0] step=2030, skipped=22, lr=[8.405697569678487e-06, 8.405697569678487e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:02:27,394] [INFO] [timer.py:199:stop] epoch=0/micro_step=2030/global_step=2030, RunningAvgSamplesPerSec=107.2414902830951, CurrSamplesPerSec=93.43053466253548, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:02:27,487] [INFO] [logging.py:96:log_dist] [Rank 0] step=2030, skipped=28, lr=[4.3591495862107625e-06, 4.3591495862107625e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2029|ppo_ep: 1|act_loss: 0.07958984375|cri_loss: 0.04119873046875|unsuper_loss: 0.0
+average reward score: 4.9921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.56%) |Training time=0.51s (22.99%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2030|ppo_ep: 1|act_loss: 0.0701904296875|cri_loss: 0.036529541015625|unsuper_loss: 0.0
+average reward score: 4.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.47%) |Training time=0.48s (22.00%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2031|ppo_ep: 1|act_loss: 0.03338623046875|cri_loss: 0.017608642578125|unsuper_loss: 0.0
+average reward score: 5.14453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.79%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2032|ppo_ep: 1|act_loss: -0.0008573532104492188|cri_loss: -0.00025463104248046875|unsuper_loss: 0.0
+average reward score: 4.62890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (22.03%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2033|ppo_ep: 1|act_loss: -0.0310211181640625|cri_loss: -0.0144195556640625|unsuper_loss: 0.0
+average reward score: 5.296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.08s |Gather latency=0.00s (0.00%) |Generate time=1.59s (76.65%) |Training time=0.39s (18.61%) |Others=0.10 (4.75%)|CurSamplesPerSec=15.42 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2034|ppo_ep: 1|act_loss: -0.03662109375|cri_loss: -0.0159759521484375|unsuper_loss: 0.0
+average reward score: 5.23828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.28%) |Training time=0.48s (22.17%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2035|ppo_ep: 1|act_loss: -0.04473876953125|cri_loss: -0.021026611328125|unsuper_loss: 0.0
+average reward score: 5.50390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.04%) |Training time=0.49s (22.42%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2036|ppo_ep: 1|act_loss: -0.072265625|cri_loss: -0.034637451171875|unsuper_loss: 0.0
+average reward score: 4.8984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.90%) |Training time=0.49s (22.58%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2037|ppo_ep: 1|act_loss: 0.021484375|cri_loss: 0.01324462890625|unsuper_loss: 0.0
+average reward score: 5.16796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.94s |Gather latency=0.00s (0.00%) |Generate time=1.58s (53.92%) |Training time=0.59s (20.15%) |Others=0.76 (25.93%)|CurSamplesPerSec=10.90 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2038|ppo_ep: 1|act_loss: 0.164794921875|cri_loss: 0.093994140625|unsuper_loss: 0.0
+average reward score: 4.7421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.92%) |Training time=0.49s (22.54%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.43
+[2023-04-14 10:02:50,207] [INFO] [logging.py:96:log_dist] [Rank 0] step=2040, skipped=22, lr=[8.393219910177327e-06, 8.393219910177327e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:02:50,225] [INFO] [timer.py:199:stop] epoch=0/micro_step=2040/global_step=2040, RunningAvgSamplesPerSec=107.20764921501167, CurrSamplesPerSec=99.54234960114154, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:02:50,317] [INFO] [logging.py:96:log_dist] [Rank 0] step=2040, skipped=28, lr=[4.352700966055743e-06, 4.352700966055743e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2039|ppo_ep: 1|act_loss: -0.006927490234375|cri_loss: -0.003032684326171875|unsuper_loss: 0.0
+average reward score: 5.71875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.12%) |Training time=0.48s (22.36%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2040|ppo_ep: 1|act_loss: 0.061065673828125|cri_loss: 0.032318115234375|unsuper_loss: 0.0
+average reward score: 5.36328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.03%) |Training time=0.49s (22.43%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2041|ppo_ep: 1|act_loss: 0.09600830078125|cri_loss: 0.05084228515625|unsuper_loss: 0.0
+average reward score: 5.33984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.49s (22.41%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2042|ppo_ep: 1|act_loss: 0.067626953125|cri_loss: 0.037261962890625|unsuper_loss: 0.0
+average reward score: 5.03515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.85%) |Training time=0.49s (22.64%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2043|ppo_ep: 1|act_loss: 0.01378631591796875|cri_loss: 0.0082244873046875|unsuper_loss: 0.0
+average reward score: 5.65625
+-------------------------------------------------------------------------------------
+|E2E latency=2.73s |Gather latency=0.00s (0.00%) |Generate time=1.60s (58.73%) |Training time=0.49s (17.81%) |Others=0.64 (23.46%)|CurSamplesPerSec=11.73 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2044|ppo_ep: 1|act_loss: -0.0100250244140625|cri_loss: -0.004634857177734375|unsuper_loss: 0.0
+average reward score: 5.12109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.37%) |Training time=0.47s (22.02%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2045|ppo_ep: 1|act_loss: -0.02764892578125|cri_loss: -0.0125579833984375|unsuper_loss: 0.0
+average reward score: 5.59765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.45%) |Training time=0.50s (23.05%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2046|ppo_ep: 1|act_loss: -0.019775390625|cri_loss: -0.0078277587890625|unsuper_loss: 0.0
+average reward score: 5.625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.60%) |Training time=0.50s (22.80%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2047|ppo_ep: 1|act_loss: -0.0533447265625|cri_loss: -0.0249481201171875|unsuper_loss: 0.0
+average reward score: 4.87109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.77%) |Training time=0.50s (22.74%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2048|ppo_ep: 1|act_loss: -0.0435791015625|cri_loss: -0.020751953125|unsuper_loss: 0.0
+average reward score: 5.45703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.92%) |Training time=0.49s (22.57%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.43
+[2023-04-14 10:03:12,500] [INFO] [logging.py:96:log_dist] [Rank 0] step=2050, skipped=22, lr=[8.380689361049238e-06, 8.380689361049238e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:03:12,724] [INFO] [timer.py:199:stop] epoch=0/micro_step=2050/global_step=2050, RunningAvgSamplesPerSec=107.1224647769258, CurrSamplesPerSec=61.993959389620294, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:03:12,816] [INFO] [logging.py:96:log_dist] [Rank 0] step=2050, skipped=28, lr=[4.34622488440301e-06, 4.34622488440301e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2049|ppo_ep: 1|act_loss: -0.03973388671875|cri_loss: -0.0189056396484375|unsuper_loss: 0.0
+average reward score: 5.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.59s (67.24%) |Training time=0.68s (28.64%) |Others=0.10 (4.13%)|CurSamplesPerSec=13.50 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2050|ppo_ep: 1|act_loss: 0.01849365234375|cri_loss: 0.0101165771484375|unsuper_loss: 0.0
+average reward score: 5.21875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.48s (22.10%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2051|ppo_ep: 1|act_loss: 0.038970947265625|cri_loss: 0.020233154296875|unsuper_loss: 0.0
+average reward score: 5.55859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.81%) |Training time=0.49s (21.00%) |Others=0.10 (4.19%)|CurSamplesPerSec=13.65 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2052|ppo_ep: 1|act_loss: 0.0194244384765625|cri_loss: 0.010223388671875|unsuper_loss: 0.0
+average reward score: 4.875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.52%) |Training time=0.48s (21.99%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2053|ppo_ep: 1|act_loss: 0.0357666015625|cri_loss: 0.0200347900390625|unsuper_loss: 0.0
+average reward score: 5.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.60%) |Training time=0.48s (21.05%) |Others=0.10 (4.35%)|CurSamplesPerSec=14.17 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2054|ppo_ep: 1|act_loss: 0.01934814453125|cri_loss: 0.01020050048828125|unsuper_loss: 0.0
+average reward score: 5.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.58s (67.31%) |Training time=0.46s (19.47%) |Others=0.31 (13.22%)|CurSamplesPerSec=13.60 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2055|ppo_ep: 1|act_loss: -0.04913330078125|cri_loss: -0.020721435546875|unsuper_loss: 0.0
+average reward score: 5.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.95%) |Training time=0.46s (21.47%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2056|ppo_ep: 1|act_loss: -0.0174407958984375|cri_loss: -0.0081787109375|unsuper_loss: 0.0
+average reward score: 5.45703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.87%) |Training time=0.47s (21.58%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2057|ppo_ep: 1|act_loss: -0.0110015869140625|cri_loss: -0.004688262939453125|unsuper_loss: 0.0
+average reward score: 5.203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.62%) |Training time=0.47s (21.80%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2058|ppo_ep: 1|act_loss: -0.01165771484375|cri_loss: -0.00550079345703125|unsuper_loss: 0.0
+average reward score: 5.21875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.93%) |Training time=0.46s (21.52%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.43
+[2023-04-14 10:03:34,775] [INFO] [logging.py:96:log_dist] [Rank 0] step=2060, skipped=22, lr=[8.368106108027184e-06, 8.368106108027184e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:03:34,794] [INFO] [timer.py:199:stop] epoch=0/micro_step=2060/global_step=2060, RunningAvgSamplesPerSec=107.10558702661527, CurrSamplesPerSec=106.73938194768076, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:03:34,886] [INFO] [logging.py:96:log_dist] [Rank 0] step=2060, skipped=28, lr=[4.339721437243713e-06, 4.339721437243713e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2059|ppo_ep: 1|act_loss: 0.025543212890625|cri_loss: 0.01482391357421875|unsuper_loss: 0.0
+average reward score: 5.56640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.70s |Gather latency=0.00s (0.00%) |Generate time=1.58s (58.77%) |Training time=0.46s (17.16%) |Others=0.65 (24.07%)|CurSamplesPerSec=11.87 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2060|ppo_ep: 1|act_loss: -0.03955078125|cri_loss: -0.0190887451171875|unsuper_loss: 0.0
+average reward score: 4.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.06s |Gather latency=0.00s (0.00%) |Generate time=1.58s (76.50%) |Training time=0.39s (18.73%) |Others=0.10 (4.77%)|CurSamplesPerSec=15.50 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2061|ppo_ep: 1|act_loss: 0.002399444580078125|cri_loss: 0.0017910003662109375|unsuper_loss: 0.0
+average reward score: 5.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.02%) |Training time=0.46s (21.41%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2062|ppo_ep: 1|act_loss: -0.0010166168212890625|cri_loss: -0.0002918243408203125|unsuper_loss: 0.0
+average reward score: 5.12109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.74%) |Training time=0.47s (21.69%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2063|ppo_ep: 1|act_loss: -0.055419921875|cri_loss: -0.0272369384765625|unsuper_loss: 0.0
+average reward score: 5.3046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.47s (21.69%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2064|ppo_ep: 1|act_loss: 0.029693603515625|cri_loss: 0.0154266357421875|unsuper_loss: 0.0
+average reward score: 5.48828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.69%) |Training time=0.47s (21.75%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2065|ppo_ep: 1|act_loss: 0.0760498046875|cri_loss: 0.03997802734375|unsuper_loss: 0.0
+average reward score: 4.6796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.55%) |Training time=0.47s (21.89%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2066|ppo_ep: 1|act_loss: 0.00406646728515625|cri_loss: 0.00244140625|unsuper_loss: 0.0
+average reward score: 5.515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.59%) |Training time=0.47s (20.17%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.82 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2067|ppo_ep: 1|act_loss: 0.0028285980224609375|cri_loss: 0.00255584716796875|unsuper_loss: 0.0
+average reward score: 5.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.51%) |Training time=0.47s (21.94%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2068|ppo_ep: 1|act_loss: -0.01499176025390625|cri_loss: -0.0057525634765625|unsuper_loss: 0.0
+average reward score: 5.29296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.46s (21.48%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.43
+[2023-04-14 10:03:56,902] [INFO] [logging.py:96:log_dist] [Rank 0] step=2070, skipped=22, lr=[8.35547033762533e-06, 8.35547033762533e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:03:57,081] [INFO] [timer.py:199:stop] epoch=0/micro_step=2070/global_step=2070, RunningAvgSamplesPerSec=107.08235179869443, CurrSamplesPerSec=68.86106831377825, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:03:57,174] [INFO] [logging.py:96:log_dist] [Rank 0] step=2070, skipped=28, lr=[4.333190720974631e-06, 4.333190720974631e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2069|ppo_ep: 1|act_loss: -0.04583740234375|cri_loss: -0.02099609375|unsuper_loss: 0.0
+average reward score: 5.14453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.58s (68.57%) |Training time=0.63s (27.18%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.87 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2070|ppo_ep: 1|act_loss: -0.01213836669921875|cri_loss: -0.00543975830078125|unsuper_loss: 0.0
+average reward score: 5.23828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.49%) |Training time=0.47s (21.95%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2071|ppo_ep: 1|act_loss: 0.02960205078125|cri_loss: 0.0157470703125|unsuper_loss: 0.0
+average reward score: 4.85546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.27%) |Training time=0.45s (21.16%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2072|ppo_ep: 1|act_loss: -0.00476837158203125|cri_loss: -0.0019512176513671875|unsuper_loss: 0.0
+average reward score: 5.2734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.39%) |Training time=0.48s (21.88%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2073|ppo_ep: 1|act_loss: 0.07574462890625|cri_loss: 0.039306640625|unsuper_loss: 0.0
+average reward score: 5.42578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.35%) |Training time=0.48s (22.15%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2074|ppo_ep: 1|act_loss: 0.0472412109375|cri_loss: 0.0240936279296875|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.34%) |Training time=0.48s (22.13%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2075|ppo_ep: 1|act_loss: 0.05096435546875|cri_loss: 0.026123046875|unsuper_loss: 0.0
+average reward score: 4.92578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.48s |Gather latency=0.00s (0.00%) |Generate time=1.59s (63.90%) |Training time=0.48s (19.21%) |Others=0.42 (16.89%)|CurSamplesPerSec=12.90 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2076|ppo_ep: 1|act_loss: 0.023681640625|cri_loss: 0.0122833251953125|unsuper_loss: 0.0
+average reward score: 4.8828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.34%) |Training time=0.47s (21.99%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2077|ppo_ep: 1|act_loss: 0.01788330078125|cri_loss: 0.0093536376953125|unsuper_loss: 0.0
+average reward score: 5.49609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.11%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2078|ppo_ep: 1|act_loss: 0.04364013671875|cri_loss: 0.02362060546875|unsuper_loss: 0.0
+average reward score: 4.7578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.13%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.43
+[2023-04-14 10:04:19,005] [INFO] [logging.py:96:log_dist] [Rank 0] step=2080, skipped=22, lr=[8.342782237136277e-06, 8.342782237136277e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:04:19,024] [INFO] [timer.py:199:stop] epoch=0/micro_step=2080/global_step=2080, RunningAvgSamplesPerSec=107.05842882438776, CurrSamplesPerSec=101.66045801757994, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:04:19,116] [INFO] [logging.py:96:log_dist] [Rank 0] step=2080, skipped=28, lr=[4.326632832396733e-06, 4.326632832396733e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2079|ppo_ep: 1|act_loss: -0.033355712890625|cri_loss: -0.0159149169921875|unsuper_loss: 0.0
+average reward score: 5.23046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.33%) |Training time=0.48s (22.14%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2080|ppo_ep: 1|act_loss: -2.86102294921875e-05|cri_loss: 0.0017223358154296875|unsuper_loss: 0.0
+average reward score: 5.50390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.51%) |Training time=0.47s (21.94%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2081|ppo_ep: 1|act_loss: -0.0098876953125|cri_loss: -0.00450897216796875|unsuper_loss: 0.0
+average reward score: 5.62109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.85s |Gather latency=0.00s (0.00%) |Generate time=1.76s (61.87%) |Training time=0.49s (17.14%) |Others=0.60 (20.98%)|CurSamplesPerSec=11.23 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2082|ppo_ep: 1|act_loss: -0.01531982421875|cri_loss: -0.007282257080078125|unsuper_loss: 0.0
+average reward score: 5.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.68s (75.09%) |Training time=0.46s (20.52%) |Others=0.10 (4.38%)|CurSamplesPerSec=14.28 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2083|ppo_ep: 1|act_loss: -0.012115478515625|cri_loss: -0.005657196044921875|unsuper_loss: 0.0
+average reward score: 5.5234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.53%) |Training time=0.45s (20.90%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2084|ppo_ep: 1|act_loss: 0.03741455078125|cri_loss: 0.020172119140625|unsuper_loss: 0.0
+average reward score: 5.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.88%) |Training time=0.46s (21.52%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2085|ppo_ep: 1|act_loss: 0.0025177001953125|cri_loss: 0.0016345977783203125|unsuper_loss: 0.0
+average reward score: 5.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.60%) |Training time=0.47s (21.83%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2086|ppo_ep: 1|act_loss: 0.094970703125|cri_loss: 0.0496826171875|unsuper_loss: 0.0
+average reward score: 5.27734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.93%) |Training time=0.46s (21.46%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2087|ppo_ep: 1|act_loss: -0.020263671875|cri_loss: -0.00921630859375|unsuper_loss: 0.0
+average reward score: 5.11328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.47s |Gather latency=0.00s (0.00%) |Generate time=1.60s (64.66%) |Training time=0.46s (18.46%) |Others=0.42 (16.88%)|CurSamplesPerSec=12.95 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2088|ppo_ep: 1|act_loss: 0.0782470703125|cri_loss: 0.0416259765625|unsuper_loss: 0.0
+average reward score: 5.51171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.67%) |Training time=0.47s (21.75%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43
+[2023-04-14 10:04:41,595] [INFO] [logging.py:96:log_dist] [Rank 0] step=2090, skipped=22, lr=[8.330041994628283e-06, 8.330041994628283e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:04:41,613] [INFO] [timer.py:199:stop] epoch=0/micro_step=2090/global_step=2090, RunningAvgSamplesPerSec=107.05361134522774, CurrSamplesPerSec=108.12874805543001, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:04:41,706] [INFO] [logging.py:96:log_dist] [Rank 0] step=2090, skipped=28, lr=[4.320047868713747e-06, 4.320047868713747e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2089|ppo_ep: 1|act_loss: -0.01372528076171875|cri_loss: -0.006328582763671875|unsuper_loss: 0.0
+average reward score: 5.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.99%) |Training time=0.46s (21.44%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2090|ppo_ep: 1|act_loss: 0.005096435546875|cri_loss: 0.00392913818359375|unsuper_loss: 0.0
+average reward score: 5.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.91%) |Training time=0.46s (21.50%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2091|ppo_ep: 1|act_loss: -0.005229949951171875|cri_loss: -0.0017862319946289062|unsuper_loss: 0.0
+average reward score: 5.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.74%) |Training time=0.46s (21.67%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2092|ppo_ep: 1|act_loss: 0.0266876220703125|cri_loss: 0.0213470458984375|unsuper_loss: 0.0
+average reward score: 5.12890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.77%) |Training time=0.46s (21.66%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2093|ppo_ep: 1|act_loss: -0.0106201171875|cri_loss: -0.0045623779296875|unsuper_loss: 0.0
+average reward score: 5.12890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.85%) |Training time=0.46s (21.55%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2094|ppo_ep: 1|act_loss: 0.040252685546875|cri_loss: 0.025543212890625|unsuper_loss: 0.0
+average reward score: 4.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.12%) |Training time=0.46s (21.29%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2095|ppo_ep: 1|act_loss: -0.0213623046875|cri_loss: -0.00997161865234375|unsuper_loss: 0.0
+average reward score: 4.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.84%) |Training time=0.46s (21.56%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2096|ppo_ep: 1|act_loss: -0.0013408660888671875|cri_loss: -0.0003294944763183594|unsuper_loss: 0.0
+average reward score: 5.49609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.76s (76.15%) |Training time=0.45s (19.60%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.85 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2097|ppo_ep: 1|act_loss: 0.02960205078125|cri_loss: 0.0153045654296875|unsuper_loss: 0.0
+average reward score: 5.26171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.99%) |Training time=0.46s (21.41%) |Others=0.10 (4.60%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2098|ppo_ep: 1|act_loss: 0.08538818359375|cri_loss: 0.045074462890625|unsuper_loss: 0.0
+average reward score: 5.39453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.85%) |Training time=0.46s (21.55%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.43
+[2023-04-14 10:05:03,168] [INFO] [logging.py:96:log_dist] [Rank 0] step=2100, skipped=22, lr=[8.317249798942473e-06, 8.317249798942473e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:05:03,186] [INFO] [timer.py:199:stop] epoch=0/micro_step=2100/global_step=2100, RunningAvgSamplesPerSec=107.05548599476938, CurrSamplesPerSec=106.59958715986158, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:05:03,279] [INFO] [logging.py:96:log_dist] [Rank 0] step=2100, skipped=28, lr=[4.313435927530719e-06, 4.313435927530719e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2099|ppo_ep: 1|act_loss: 0.0094146728515625|cri_loss: 0.005313873291015625|unsuper_loss: 0.0
+average reward score: 5.88671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.81%) |Training time=0.46s (21.62%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2100|ppo_ep: 1|act_loss: -0.013214111328125|cri_loss: -0.004150390625|unsuper_loss: 0.0
+average reward score: 4.9921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.82%) |Training time=0.46s (21.54%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2101|ppo_ep: 1|act_loss: -0.037139892578125|cri_loss: -0.0179595947265625|unsuper_loss: 0.0
+average reward score: 5.01171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.03%) |Training time=0.46s (21.37%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2102|ppo_ep: 1|act_loss: -0.02099609375|cri_loss: -0.0099945068359375|unsuper_loss: 0.0
+average reward score: 5.3828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.46s |Gather latency=0.00s (0.00%) |Generate time=1.61s (65.65%) |Training time=0.44s (17.92%) |Others=0.40 (16.43%)|CurSamplesPerSec=13.02 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2103|ppo_ep: 1|act_loss: 0.01552581787109375|cri_loss: 0.0088958740234375|unsuper_loss: 0.0
+average reward score: 5.41796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.10%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2104|ppo_ep: 1|act_loss: -0.002620697021484375|cri_loss: -0.0007905960083007812|unsuper_loss: 0.0
+average reward score: 5.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.32%) |Training time=0.45s (21.08%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2105|ppo_ep: 1|act_loss: 0.0304412841796875|cri_loss: 0.0157318115234375|unsuper_loss: 0.0
+average reward score: 5.078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.51%) |Training time=0.45s (20.89%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2106|ppo_ep: 1|act_loss: 0.0146331787109375|cri_loss: 0.00815582275390625|unsuper_loss: 0.0
+average reward score: 4.8046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.53%) |Training time=0.45s (20.91%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2107|ppo_ep: 1|act_loss: -0.0046539306640625|cri_loss: -0.001953125|unsuper_loss: 0.0
+average reward score: 5.796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.32%) |Training time=0.45s (21.08%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.43
+[2023-04-14 10:05:22,846] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 2108|ppo_ep: 1|act_loss: -0.006683349609375|cri_loss: -0.002918243408203125|unsuper_loss: 0.0
+average reward score: 5.3046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.79%) |Training time=0.45s (21.08%) |Others=0.09 (4.12%)|CurSamplesPerSec=15.04 |AvgSamplesPerSec=14.43
+[2023-04-14 10:05:24,880] [INFO] [logging.py:96:log_dist] [Rank 0] step=2110, skipped=22, lr=[8.30440583969005e-06, 8.30440583969005e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:05:24,898] [INFO] [timer.py:199:stop] epoch=0/micro_step=2110/global_step=2110, RunningAvgSamplesPerSec=107.07153675733814, CurrSamplesPerSec=106.8084394578795, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:05:24,982] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+[2023-04-14 10:05:24,982] [INFO] [logging.py:96:log_dist] [Rank 0] step=2110, skipped=30, lr=[4.308127016630176e-06, 4.308127016630176e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2109|ppo_ep: 1|act_loss: -0.0302276611328125|cri_loss: -0.01470184326171875|unsuper_loss: 0.0
+average reward score: 5.65234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.46s (21.64%) |Others=0.09 (4.10%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2110|ppo_ep: 1|act_loss: 0.011016845703125|cri_loss: 0.00604248046875|unsuper_loss: 0.0
+average reward score: 5.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.40%) |Training time=0.45s (21.02%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2111|ppo_ep: 1|act_loss: 0.0041656494140625|cri_loss: 0.0021915435791015625|unsuper_loss: 0.0
+average reward score: 4.875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.95%) |Training time=0.45s (20.54%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2112|ppo_ep: 1|act_loss: 0.0007038116455078125|cri_loss: 0.0009160041809082031|unsuper_loss: 0.0
+average reward score: 5.4921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.38s |Gather latency=0.00s (0.00%) |Generate time=1.82s (76.67%) |Training time=0.46s (19.18%) |Others=0.10 (4.14%)|CurSamplesPerSec=13.46 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2113|ppo_ep: 1|act_loss: 0.035888671875|cri_loss: 0.0190582275390625|unsuper_loss: 0.0
+average reward score: 5.078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.17%) |Training time=0.45s (21.24%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2114|ppo_ep: 1|act_loss: -0.0216522216796875|cri_loss: -0.01023101806640625|unsuper_loss: 0.0
+average reward score: 5.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.34%) |Training time=0.45s (21.08%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2115|ppo_ep: 1|act_loss: -0.014190673828125|cri_loss: -0.006740570068359375|unsuper_loss: 0.0
+average reward score: 4.8515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.33%) |Training time=0.45s (21.07%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2116|ppo_ep: 1|act_loss: -0.03753662109375|cri_loss: -0.016510009765625|unsuper_loss: 0.0
+average reward score: 5.20703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.00%) |Training time=0.46s (21.43%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2117|ppo_ep: 1|act_loss: -0.02288818359375|cri_loss: -0.01116943359375|unsuper_loss: 0.0
+average reward score: 5.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.45s (20.86%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2118|ppo_ep: 1|act_loss: 0.041748046875|cri_loss: 0.0218658447265625|unsuper_loss: 0.0
+average reward score: 5.29296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.16%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.43
+[2023-04-14 10:05:46,571] [INFO] [logging.py:96:log_dist] [Rank 0] step=2120, skipped=22, lr=[8.291510307249474e-06, 8.291510307249474e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:05:46,589] [INFO] [timer.py:199:stop] epoch=0/micro_step=2120/global_step=2120, RunningAvgSamplesPerSec=107.08617854175999, CurrSamplesPerSec=108.78975468800026, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:05:46,682] [INFO] [logging.py:96:log_dist] [Rank 0] step=2120, skipped=30, lr=[4.301466763187256e-06, 4.301466763187256e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2119|ppo_ep: 1|act_loss: -0.0065765380859375|cri_loss: -0.00258636474609375|unsuper_loss: 0.0
+average reward score: 5.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.09%) |Training time=0.46s (21.34%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2120|ppo_ep: 1|act_loss: 0.0123443603515625|cri_loss: 0.007183074951171875|unsuper_loss: 0.0
+average reward score: 4.796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.23%) |Training time=0.45s (21.18%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2121|ppo_ep: 1|act_loss: -0.019927978515625|cri_loss: -0.0091552734375|unsuper_loss: 0.0
+average reward score: 5.26171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.95s |Gather latency=0.00s (0.00%) |Generate time=1.58s (53.69%) |Training time=0.45s (15.36%) |Others=0.91 (30.95%)|CurSamplesPerSec=10.85 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2122|ppo_ep: 1|act_loss: -0.0125274658203125|cri_loss: -0.005584716796875|unsuper_loss: 0.0
+average reward score: 5.23046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.77%) |Training time=0.46s (21.65%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2123|ppo_ep: 1|act_loss: 0.03314208984375|cri_loss: 0.0187530517578125|unsuper_loss: 0.0
+average reward score: 4.93359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.22%) |Training time=0.45s (21.19%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.43
+[2023-04-14 10:05:58,065] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 2124|ppo_ep: 1|act_loss: 0.0307159423828125|cri_loss: 0.0158843994140625|unsuper_loss: 0.0
+average reward score: 5.52734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.59s (75.23%) |Training time=0.42s (20.12%) |Others=0.10 (4.65%)|CurSamplesPerSec=15.18 |AvgSamplesPerSec=14.43
+[2023-04-14 10:06:00,172] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 2125|ppo_ep: 1|act_loss: 0.00861358642578125|cri_loss: 0.004924774169921875|unsuper_loss: 0.0
+average reward score: 5.04296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.58s (75.14%) |Training time=0.43s (20.21%) |Others=0.10 (4.65%)|CurSamplesPerSec=15.19 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2126|ppo_ep: 1|act_loss: -0.0016460418701171875|cri_loss: 2.6702880859375e-05|unsuper_loss: 0.0
+average reward score: 5.1796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.08%) |Training time=0.48s (21.46%) |Others=0.17 (7.45%)|CurSamplesPerSec=14.34 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2127|ppo_ep: 1|act_loss: -0.0195770263671875|cri_loss: -0.0091552734375|unsuper_loss: 0.0
+average reward score: 5.24609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.21%) |Training time=0.45s (21.19%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2128|ppo_ep: 1|act_loss: 0.0018329620361328125|cri_loss: 0.0025920867919921875|unsuper_loss: 0.0
+average reward score: 5.29296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.18%) |Training time=0.45s (21.23%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.43
+[2023-04-14 10:06:08,802] [INFO] [logging.py:96:log_dist] [Rank 0] step=2130, skipped=24, lr=[8.281156877023959e-06, 8.281156877023959e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:06:08,820] [INFO] [timer.py:199:stop] epoch=0/micro_step=2130/global_step=2130, RunningAvgSamplesPerSec=107.10202712994473, CurrSamplesPerSec=108.1727569609888, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:06:08,912] [INFO] [logging.py:96:log_dist] [Rank 0] step=2130, skipped=30, lr=[4.294779807661105e-06, 4.294779807661105e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2129|ppo_ep: 1|act_loss: 0.0150299072265625|cri_loss: 0.0085296630859375|unsuper_loss: 0.0
+average reward score: 5.29296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.40%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2130|ppo_ep: 1|act_loss: -0.00832366943359375|cri_loss: -0.0035114288330078125|unsuper_loss: 0.0
+average reward score: 5.30859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.46s (21.28%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2131|ppo_ep: 1|act_loss: 0.0592041015625|cri_loss: 0.0307769775390625|unsuper_loss: 0.0
+average reward score: 5.6328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.17%) |Training time=0.45s (21.19%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2132|ppo_ep: 1|act_loss: 0.0191650390625|cri_loss: 0.01004791259765625|unsuper_loss: 0.0
+average reward score: 4.9296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.80%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2133|ppo_ep: 1|act_loss: -0.0196533203125|cri_loss: -0.00951385498046875|unsuper_loss: 0.0
+average reward score: 5.0
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.83%) |Training time=0.47s (21.65%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2134|ppo_ep: 1|act_loss: 0.012603759765625|cri_loss: 0.007221221923828125|unsuper_loss: 0.0
+average reward score: 4.90625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.81%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2135|ppo_ep: 1|act_loss: -0.01303863525390625|cri_loss: -0.0056915283203125|unsuper_loss: 0.0
+average reward score: 5.46484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.68%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2136|ppo_ep: 1|act_loss: 0.005916595458984375|cri_loss: 0.003437042236328125|unsuper_loss: 0.0
+average reward score: 5.1328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.05%) |Training time=0.46s (21.39%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2137|ppo_ep: 1|act_loss: 0.0465087890625|cri_loss: 0.02520751953125|unsuper_loss: 0.0
+average reward score: 5.375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.71%) |Training time=0.47s (21.73%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2138|ppo_ep: 1|act_loss: 0.02044677734375|cri_loss: 0.011077880859375|unsuper_loss: 0.0
+average reward score: 5.52734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.67%) |Training time=0.47s (21.78%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43
+[2023-04-14 10:06:30,321] [INFO] [logging.py:96:log_dist] [Rank 0] step=2140, skipped=24, lr=[8.268168995036705e-06, 8.268168995036705e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:06:30,339] [INFO] [timer.py:199:stop] epoch=0/micro_step=2140/global_step=2140, RunningAvgSamplesPerSec=107.09897142545462, CurrSamplesPerSec=106.86669384427232, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:06:30,432] [INFO] [logging.py:96:log_dist] [Rank 0] step=2140, skipped=30, lr=[4.2880662491685345e-06, 4.2880662491685345e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2139|ppo_ep: 1|act_loss: 0.0289459228515625|cri_loss: 0.0149688720703125|unsuper_loss: 0.0
+average reward score: 5.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.97%) |Training time=0.46s (21.48%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2140|ppo_ep: 1|act_loss: -0.02886962890625|cri_loss: -0.01361083984375|unsuper_loss: 0.0
+average reward score: 5.46484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.76%) |Training time=0.47s (21.74%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2141|ppo_ep: 1|act_loss: 0.0021343231201171875|cri_loss: 0.0018062591552734375|unsuper_loss: 0.0
+average reward score: 5.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.50s |Gather latency=0.00s (0.00%) |Generate time=1.86s (74.51%) |Training time=0.46s (18.56%) |Others=0.17 (6.94%)|CurSamplesPerSec=12.79 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2142|ppo_ep: 1|act_loss: 0.0106658935546875|cri_loss: 0.00615692138671875|unsuper_loss: 0.0
+average reward score: 5.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.89%) |Training time=0.46s (21.53%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2143|ppo_ep: 1|act_loss: 0.044830322265625|cri_loss: 0.0255889892578125|unsuper_loss: 0.0
+average reward score: 4.66796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.80%) |Training time=0.47s (21.64%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2144|ppo_ep: 1|act_loss: -0.0017185211181640625|cri_loss: -0.0005717277526855469|unsuper_loss: 0.0
+average reward score: 4.77734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.67%) |Training time=0.47s (21.76%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2145|ppo_ep: 1|act_loss: -0.03558349609375|cri_loss: -0.0161895751953125|unsuper_loss: 0.0
+average reward score: 5.484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.00%) |Training time=0.46s (21.44%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2146|ppo_ep: 1|act_loss: -0.05517578125|cri_loss: -0.0265045166015625|unsuper_loss: 0.0
+average reward score: 5.8046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.47s (21.63%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2147|ppo_ep: 1|act_loss: -0.046875|cri_loss: -0.022003173828125|unsuper_loss: 0.0
+average reward score: 5.23828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.08%) |Training time=0.46s (21.33%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2148|ppo_ep: 1|act_loss: 0.0095367431640625|cri_loss: 0.005390167236328125|unsuper_loss: 0.0
+average reward score: 4.63671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.96%) |Training time=0.49s (22.55%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.43
+[2023-04-14 10:06:52,265] [INFO] [logging.py:96:log_dist] [Rank 0] step=2150, skipped=24, lr=[8.255130076978754e-06, 8.255130076978754e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:06:52,283] [INFO] [timer.py:199:stop] epoch=0/micro_step=2150/global_step=2150, RunningAvgSamplesPerSec=107.08429552059788, CurrSamplesPerSec=98.68667589679414, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:06:52,375] [INFO] [logging.py:96:log_dist] [Rank 0] step=2150, skipped=30, lr=[4.281326187220675e-06, 4.281326187220675e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2149|ppo_ep: 1|act_loss: 0.029083251953125|cri_loss: 0.0157318115234375|unsuper_loss: 0.0
+average reward score: 4.90625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.02%) |Training time=0.49s (22.47%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2150|ppo_ep: 1|act_loss: 0.0665283203125|cri_loss: 0.03466796875|unsuper_loss: 0.0
+average reward score: 5.3203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.31%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2151|ppo_ep: 1|act_loss: 0.01038360595703125|cri_loss: 0.006134033203125|unsuper_loss: 0.0
+average reward score: 5.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.12%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2152|ppo_ep: 1|act_loss: 0.0092620849609375|cri_loss: 0.004779815673828125|unsuper_loss: 0.0
+average reward score: 5.390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.49s (22.47%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2153|ppo_ep: 1|act_loss: 0.001251220703125|cri_loss: 0.0017547607421875|unsuper_loss: 0.0
+average reward score: 5.7265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.69%) |Training time=0.50s (22.81%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2154|ppo_ep: 1|act_loss: -0.04931640625|cri_loss: -0.02398681640625|unsuper_loss: 0.0
+average reward score: 5.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.88%) |Training time=0.49s (22.63%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2155|ppo_ep: 1|act_loss: 0.0128173828125|cri_loss: 0.00759124755859375|unsuper_loss: 0.0
+average reward score: 4.75
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.24%) |Training time=0.48s (22.23%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2156|ppo_ep: 1|act_loss: -0.005664825439453125|cri_loss: -0.002101898193359375|unsuper_loss: 0.0
+average reward score: 6.09375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.21%) |Training time=0.48s (22.25%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2157|ppo_ep: 1|act_loss: 0.0102081298828125|cri_loss: 0.005924224853515625|unsuper_loss: 0.0
+average reward score: 5.25
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.51%) |Training time=0.50s (21.30%) |Others=0.10 (4.19%)|CurSamplesPerSec=13.58 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2158|ppo_ep: 1|act_loss: -0.004772186279296875|cri_loss: -0.000301361083984375|unsuper_loss: 0.0
+average reward score: 5.3203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.47%) |Training time=0.47s (21.97%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.43
+[2023-04-14 10:07:14,131] [INFO] [logging.py:96:log_dist] [Rank 0] step=2160, skipped=24, lr=[8.242040316118323e-06, 8.242040316118323e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:07:14,150] [INFO] [timer.py:199:stop] epoch=0/micro_step=2160/global_step=2160, RunningAvgSamplesPerSec=107.04436231088094, CurrSamplesPerSec=103.56585864587393, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:07:14,242] [INFO] [logging.py:96:log_dist] [Rank 0] step=2160, skipped=30, lr=[4.2745597217215065e-06, 4.2745597217215065e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2159|ppo_ep: 1|act_loss: 0.0247344970703125|cri_loss: 0.01314544677734375|unsuper_loss: 0.0
+average reward score: 5.29296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.87%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2160|ppo_ep: 1|act_loss: -0.05059814453125|cri_loss: -0.0238494873046875|unsuper_loss: 0.0
+average reward score: 5.76171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.08%) |Training time=0.47s (21.65%) |Others=0.11 (5.27%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2161|ppo_ep: 1|act_loss: -0.03314208984375|cri_loss: -0.0160369873046875|unsuper_loss: 0.0
+average reward score: 5.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.90%) |Training time=0.46s (21.53%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2162|ppo_ep: 1|act_loss: -0.028076171875|cri_loss: -0.013458251953125|unsuper_loss: 0.0
+average reward score: 5.34375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.65%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2163|ppo_ep: 1|act_loss: 0.003276824951171875|cri_loss: 0.0018472671508789062|unsuper_loss: 0.0
+average reward score: 5.2109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.28%) |Training time=0.45s (21.13%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2164|ppo_ep: 1|act_loss: 0.0155181884765625|cri_loss: 0.008453369140625|unsuper_loss: 0.0
+average reward score: 5.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.46s (21.30%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2165|ppo_ep: 1|act_loss: 0.03338623046875|cri_loss: 0.017547607421875|unsuper_loss: 0.0
+average reward score: 5.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.75%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2166|ppo_ep: 1|act_loss: 0.0576171875|cri_loss: 0.029571533203125|unsuper_loss: 0.0
+average reward score: 5.2109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.68%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2167|ppo_ep: 1|act_loss: 0.0482177734375|cri_loss: 0.02642822265625|unsuper_loss: 0.0
+average reward score: 4.84765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.85%) |Training time=0.47s (21.58%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2168|ppo_ep: 1|act_loss: 0.0726318359375|cri_loss: 0.03887939453125|unsuper_loss: 0.0
+average reward score: 5.125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.50%) |Training time=0.47s (21.95%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
+[2023-04-14 10:07:35,717] [INFO] [logging.py:96:log_dist] [Rank 0] step=2170, skipped=24, lr=[8.228899906477248e-06, 8.228899906477248e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:07:35,735] [INFO] [timer.py:199:stop] epoch=0/micro_step=2170/global_step=2170, RunningAvgSamplesPerSec=107.03586667134884, CurrSamplesPerSec=102.76559751986322, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:07:35,828] [INFO] [logging.py:96:log_dist] [Rank 0] step=2170, skipped=30, lr=[4.267766952966369e-06, 4.267766952966369e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2169|ppo_ep: 1|act_loss: -0.03533935546875|cri_loss: -0.0171356201171875|unsuper_loss: 0.0
+average reward score: 4.89453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.89%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2170|ppo_ep: 1|act_loss: -0.030029296875|cri_loss: -0.0144195556640625|unsuper_loss: 0.0
+average reward score: 5.25390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.63%) |Training time=0.48s (21.90%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.54 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2171|ppo_ep: 1|act_loss: -0.020263671875|cri_loss: -0.00963592529296875|unsuper_loss: 0.0
+average reward score: 5.21484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.43%) |Training time=0.48s (21.10%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.02 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2172|ppo_ep: 1|act_loss: 0.032257080078125|cri_loss: 0.0193328857421875|unsuper_loss: 0.0
+average reward score: 4.89453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.71%) |Training time=0.49s (21.10%) |Others=0.10 (4.19%)|CurSamplesPerSec=13.66 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2173|ppo_ep: 1|act_loss: -0.018524169921875|cri_loss: -0.00890350341796875|unsuper_loss: 0.0
+average reward score: 5.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.05%) |Training time=0.49s (22.33%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2174|ppo_ep: 1|act_loss: -0.00188446044921875|cri_loss: -0.0005998611450195312|unsuper_loss: 0.0
+average reward score: 5.74609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.48s (22.02%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2175|ppo_ep: 1|act_loss: 0.01287841796875|cri_loss: 0.006801605224609375|unsuper_loss: 0.0
+average reward score: 5.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.30%) |Training time=0.48s (22.16%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2176|ppo_ep: 1|act_loss: 0.0091705322265625|cri_loss: 0.004795074462890625|unsuper_loss: 0.0
+average reward score: 5.3359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.87%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2177|ppo_ep: 1|act_loss: -0.04803466796875|cri_loss: -0.02337646484375|unsuper_loss: 0.0
+average reward score: 5.125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.82%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2178|ppo_ep: 1|act_loss: 0.0144805908203125|cri_loss: 0.00882720947265625|unsuper_loss: 0.0
+average reward score: 4.93359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (22.05%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.44
+[2023-04-14 10:07:57,675] [INFO] [logging.py:96:log_dist] [Rank 0] step=2180, skipped=24, lr=[8.215709042828096e-06, 8.215709042828096e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:07:57,693] [INFO] [timer.py:199:stop] epoch=0/micro_step=2180/global_step=2180, RunningAvgSamplesPerSec=107.0068419629447, CurrSamplesPerSec=103.48066046173109, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:07:57,786] [INFO] [logging.py:96:log_dist] [Rank 0] step=2180, skipped=30, lr=[4.2609479816404836e-06, 4.2609479816404836e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2179|ppo_ep: 1|act_loss: 0.00109100341796875|cri_loss: 0.0007219314575195312|unsuper_loss: 0.0
+average reward score: 5.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.54%) |Training time=0.47s (21.90%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2180|ppo_ep: 1|act_loss: 0.00707244873046875|cri_loss: 0.0037384033203125|unsuper_loss: 0.0
+average reward score: 5.45703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.49s (22.40%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2181|ppo_ep: 1|act_loss: -0.0042572021484375|cri_loss: -0.0018062591552734375|unsuper_loss: 0.0
+average reward score: 5.18359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.81s |Gather latency=0.00s (0.00%) |Generate time=1.58s (56.29%) |Training time=0.48s (17.15%) |Others=0.75 (26.57%)|CurSamplesPerSec=11.38 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2182|ppo_ep: 1|act_loss: 0.054351806640625|cri_loss: 0.0278472900390625|unsuper_loss: 0.0
+average reward score: 5.1640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.25%) |Training time=0.48s (22.21%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2183|ppo_ep: 1|act_loss: -0.01373291015625|cri_loss: -0.0059661865234375|unsuper_loss: 0.0
+average reward score: 5.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.92%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2184|ppo_ep: 1|act_loss: 0.0653076171875|cri_loss: 0.03363037109375|unsuper_loss: 0.0
+average reward score: 5.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.45%) |Training time=0.47s (22.00%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2185|ppo_ep: 1|act_loss: -0.0047760009765625|cri_loss: -0.0015497207641601562|unsuper_loss: 0.0
+average reward score: 5.1953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.86%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2186|ppo_ep: 1|act_loss: -0.00888824462890625|cri_loss: -0.00391387939453125|unsuper_loss: 0.0
+average reward score: 4.875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.88%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2187|ppo_ep: 1|act_loss: -0.031890869140625|cri_loss: -0.0145111083984375|unsuper_loss: 0.0
+average reward score: 5.31640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.28%) |Training time=0.48s (20.51%) |Others=0.10 (4.21%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2188|ppo_ep: 1|act_loss: -0.027069091796875|cri_loss: -0.01320648193359375|unsuper_loss: 0.0
+average reward score: 5.25
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.49%) |Training time=0.47s (21.96%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44
+[2023-04-14 10:08:20,079] [INFO] [logging.py:96:log_dist] [Rank 0] step=2190, skipped=24, lr=[8.20246792069129e-06, 8.20246792069129e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:08:20,097] [INFO] [timer.py:199:stop] epoch=0/micro_step=2190/global_step=2190, RunningAvgSamplesPerSec=106.98401354109774, CurrSamplesPerSec=106.3205036803265, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:08:20,190] [INFO] [logging.py:96:log_dist] [Rank 0] step=2190, skipped=30, lr=[4.254102908817454e-06, 4.254102908817454e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2189|ppo_ep: 1|act_loss: 0.0097808837890625|cri_loss: 0.005466461181640625|unsuper_loss: 0.0
+average reward score: 5.47265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.54%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2190|ppo_ep: 1|act_loss: -0.016876220703125|cri_loss: -0.00792694091796875|unsuper_loss: 0.0
+average reward score: 5.078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.64%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2191|ppo_ep: 1|act_loss: 0.040283203125|cri_loss: 0.0205841064453125|unsuper_loss: 0.0
+average reward score: 5.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.76%) |Training time=0.47s (21.66%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2192|ppo_ep: 1|act_loss: 0.021575927734375|cri_loss: 0.0172271728515625|unsuper_loss: 0.0
+average reward score: 5.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.47s (21.96%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2193|ppo_ep: 1|act_loss: 0.045806884765625|cri_loss: 0.02337646484375|unsuper_loss: 0.0
+average reward score: 5.82421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.49s (22.38%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2194|ppo_ep: 1|act_loss: 0.0016117095947265625|cri_loss: 0.0012531280517578125|unsuper_loss: 0.0
+average reward score: 5.515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.10%) |Training time=0.51s (23.42%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2195|ppo_ep: 1|act_loss: -0.029296875|cri_loss: -0.013427734375|unsuper_loss: 0.0
+average reward score: 5.65625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.34%) |Training time=0.51s (23.19%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2196|ppo_ep: 1|act_loss: -0.0033473968505859375|cri_loss: -0.0009822845458984375|unsuper_loss: 0.0
+average reward score: 5.0234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.63%) |Training time=0.50s (22.89%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2197|ppo_ep: 1|act_loss: -0.033203125|cri_loss: -0.016326904296875|unsuper_loss: 0.0
+average reward score: 5.515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.05%) |Training time=0.46s (21.36%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2198|ppo_ep: 1|act_loss: -0.00848388671875|cri_loss: -0.003993988037109375|unsuper_loss: 0.0
+average reward score: 5.6015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.30%) |Training time=0.48s (22.17%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.44
+[2023-04-14 10:08:41,790] [INFO] [logging.py:96:log_dist] [Rank 0] step=2200, skipped=24, lr=[8.189176736332201e-06, 8.189176736332201e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:08:41,808] [INFO] [timer.py:199:stop] epoch=0/micro_step=2200/global_step=2200, RunningAvgSamplesPerSec=106.94942238546258, CurrSamplesPerSec=98.5336633016457, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:08:41,900] [INFO] [logging.py:96:log_dist] [Rank 0] step=2200, skipped=30, lr=[4.247231835957773e-06, 4.247231835957773e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2199|ppo_ep: 1|act_loss: 0.040740966796875|cri_loss: 0.0230560302734375|unsuper_loss: 0.0
+average reward score: 4.8046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.48%) |Training time=0.49s (22.10%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.51 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2200|ppo_ep: 1|act_loss: -0.04095458984375|cri_loss: -0.02008056640625|unsuper_loss: 0.0
+average reward score: 5.19140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.04%) |Training time=0.49s (21.63%) |Others=0.10 (4.33%)|CurSamplesPerSec=14.06 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2201|ppo_ep: 1|act_loss: 0.0758056640625|cri_loss: 0.03912353515625|unsuper_loss: 0.0
+average reward score: 5.3671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.61s |Gather latency=0.00s (0.00%) |Generate time=1.58s (60.48%) |Training time=0.49s (18.92%) |Others=0.54 (20.61%)|CurSamplesPerSec=12.26 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2202|ppo_ep: 1|act_loss: -0.07659912109375|cri_loss: -0.0325927734375|unsuper_loss: 0.0
+average reward score: 5.4921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.77%) |Training time=0.46s (21.66%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2203|ppo_ep: 1|act_loss: -0.0361328125|cri_loss: -0.0171661376953125|unsuper_loss: 0.0
+average reward score: 5.25
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.96%) |Training time=0.46s (21.45%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2204|ppo_ep: 1|act_loss: 0.01202392578125|cri_loss: 0.006740570068359375|unsuper_loss: 0.0
+average reward score: 5.14453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.02%) |Training time=0.46s (21.38%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2205|ppo_ep: 1|act_loss: -0.01861572265625|cri_loss: -0.0091094970703125|unsuper_loss: 0.0
+average reward score: 5.51171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.77%) |Training time=0.47s (21.67%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2206|ppo_ep: 1|act_loss: 0.0245513916015625|cri_loss: 0.013031005859375|unsuper_loss: 0.0
+average reward score: 5.04296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.72%) |Training time=0.47s (21.69%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2207|ppo_ep: 1|act_loss: -0.00606536865234375|cri_loss: -0.0018520355224609375|unsuper_loss: 0.0
+average reward score: 5.03515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.80%) |Training time=0.46s (21.59%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2208|ppo_ep: 1|act_loss: 0.030975341796875|cri_loss: 0.0165557861328125|unsuper_loss: 0.0
+average reward score: 4.87890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.21%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.44
+[2023-04-14 10:09:03,847] [INFO] [logging.py:96:log_dist] [Rank 0] step=2210, skipped=24, lr=[8.175835686758245e-06, 8.175835686758245e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:09:03,865] [INFO] [timer.py:199:stop] epoch=0/micro_step=2210/global_step=2210, RunningAvgSamplesPerSec=106.93468634085123, CurrSamplesPerSec=105.99765446011911, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:09:03,958] [INFO] [logging.py:96:log_dist] [Rank 0] step=2210, skipped=30, lr=[4.240334864907317e-06, 4.240334864907317e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2209|ppo_ep: 1|act_loss: 0.018798828125|cri_loss: 0.0097198486328125|unsuper_loss: 0.0
+average reward score: 5.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.75%) |Training time=0.46s (21.67%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44
+[2023-04-14 10:09:06,094] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 2210|ppo_ep: 1|act_loss: 0.09375|cri_loss: 0.048583984375|unsuper_loss: 0.0
+average reward score: 5.0859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.20%) |Training time=0.46s (21.69%) |Others=0.09 (4.10%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.44
+[2023-04-14 10:09:08,221] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 2211|ppo_ep: 1|act_loss: 0.0106658935546875|cri_loss: 0.006023406982421875|unsuper_loss: 0.0
+average reward score: 5.44921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.58%) |Training time=0.45s (21.30%) |Others=0.09 (4.12%)|CurSamplesPerSec=15.04 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2212|ppo_ep: 1|act_loss: -0.001712799072265625|cri_loss: -0.0007014274597167969|unsuper_loss: 0.0
+average reward score: 4.8125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.88%) |Training time=0.46s (21.54%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2213|ppo_ep: 1|act_loss: 0.02947998046875|cri_loss: 0.0155029296875|unsuper_loss: 0.0
+average reward score: 5.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.13%) |Training time=0.45s (21.28%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2214|ppo_ep: 1|act_loss: -0.0101470947265625|cri_loss: -0.004581451416015625|unsuper_loss: 0.0
+average reward score: 4.8984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.24%) |Training time=0.45s (21.16%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2215|ppo_ep: 1|act_loss: 0.004032135009765625|cri_loss: 0.0030670166015625|unsuper_loss: 0.0
+average reward score: 5.203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.92%) |Training time=0.46s (21.50%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2216|ppo_ep: 1|act_loss: -0.00817108154296875|cri_loss: -0.0038509368896484375|unsuper_loss: 0.0
+average reward score: 5.69140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.84%) |Training time=0.46s (19.93%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2217|ppo_ep: 1|act_loss: -0.03326416015625|cri_loss: -0.0159149169921875|unsuper_loss: 0.0
+average reward score: 5.328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.15%) |Training time=0.45s (21.26%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2218|ppo_ep: 1|act_loss: 0.026885986328125|cri_loss: 0.01406097412109375|unsuper_loss: 0.0
+average reward score: 4.87890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.07%) |Training time=0.46s (21.34%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.44
+[2023-04-14 10:09:25,399] [INFO] [logging.py:96:log_dist] [Rank 0] step=2220, skipped=24, lr=[8.162444969715961e-06, 8.162444969715961e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:09:25,418] [INFO] [timer.py:199:stop] epoch=0/micro_step=2220/global_step=2220, RunningAvgSamplesPerSec=106.94111013177839, CurrSamplesPerSec=108.4043708167457, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:09:25,510] [INFO] [logging.py:96:log_dist] [Rank 0] step=2220, skipped=32, lr=[4.234798710055124e-06, 4.234798710055124e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2219|ppo_ep: 1|act_loss: 0.033538818359375|cri_loss: 0.0184783935546875|unsuper_loss: 0.0
+average reward score: 4.8359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.06%) |Training time=0.46s (21.37%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2220|ppo_ep: 1|act_loss: 0.016204833984375|cri_loss: 0.00933837890625|unsuper_loss: 0.0
+average reward score: 4.73828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.24%) |Training time=0.45s (21.16%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2221|ppo_ep: 1|act_loss: 0.01727294921875|cri_loss: 0.0093231201171875|unsuper_loss: 0.0
+average reward score: 4.63671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.22%) |Training time=0.46s (21.21%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2222|ppo_ep: 1|act_loss: -0.0013675689697265625|cri_loss: -0.0002951622009277344|unsuper_loss: 0.0
+average reward score: 4.44140625
+-------------------------------------------------------------------------------------
+|E2E latency=3.02s |Gather latency=0.00s (0.00%) |Generate time=1.58s (52.40%) |Training time=0.46s (15.08%) |Others=0.98 (32.52%)|CurSamplesPerSec=10.58 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2223|ppo_ep: 1|act_loss: 0.0084075927734375|cri_loss: 0.0046844482421875|unsuper_loss: 0.0
+average reward score: 5.078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.78%) |Training time=0.49s (22.45%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2224|ppo_ep: 1|act_loss: -0.0291595458984375|cri_loss: -0.0138092041015625|unsuper_loss: 0.0
+average reward score: 5.19140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.39%) |Training time=0.50s (23.11%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2225|ppo_ep: 1|act_loss: 0.0131683349609375|cri_loss: 0.00794219970703125|unsuper_loss: 0.0
+average reward score: 5.0859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.16%) |Training time=0.49s (22.33%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.44
+[2023-04-14 10:09:41,411] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 2226|ppo_ep: 1|act_loss: -0.040618896484375|cri_loss: -0.018310546875|unsuper_loss: 0.0
+average reward score: 5.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.85%) |Training time=0.44s (20.58%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.44
+[2023-04-14 10:09:43,556] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 2227|ppo_ep: 1|act_loss: 0.018310546875|cri_loss: 0.009613037109375|unsuper_loss: 0.0
+average reward score: 5.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.46s (21.22%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2228|ppo_ep: 1|act_loss: -0.00025463104248046875|cri_loss: 0.0007419586181640625|unsuper_loss: 0.0
+average reward score: 4.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.04%) |Training time=0.49s (22.49%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.44
+[2023-04-14 10:09:48,024] [INFO] [logging.py:96:log_dist] [Rank 0] step=2230, skipped=26, lr=[8.15169676886067e-06, 8.15169676886067e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:09:48,043] [INFO] [timer.py:199:stop] epoch=0/micro_step=2230/global_step=2230, RunningAvgSamplesPerSec=106.92186677661333, CurrSamplesPerSec=95.00962924947352, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:09:48,135] [INFO] [logging.py:96:log_dist] [Rank 0] step=2230, skipped=32, lr=[4.227855380137234e-06, 4.227855380137234e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2229|ppo_ep: 1|act_loss: 0.0445556640625|cri_loss: 0.024322509765625|unsuper_loss: 0.0
+average reward score: 5.08203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.69s (73.91%) |Training time=0.50s (21.81%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.98 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2230|ppo_ep: 1|act_loss: -0.001605987548828125|cri_loss: -0.00045108795166015625|unsuper_loss: 0.0
+average reward score: 5.04296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.25%) |Training time=0.45s (21.15%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2231|ppo_ep: 1|act_loss: -0.0088653564453125|cri_loss: -0.00415802001953125|unsuper_loss: 0.0
+average reward score: 5.8203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.02%) |Training time=0.49s (20.77%) |Others=0.10 (4.21%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2232|ppo_ep: 1|act_loss: 0.00551605224609375|cri_loss: 0.00337982177734375|unsuper_loss: 0.0
+average reward score: 5.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.88%) |Training time=0.46s (21.53%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2233|ppo_ep: 1|act_loss: 0.00797271728515625|cri_loss: 0.0043792724609375|unsuper_loss: 0.0
+average reward score: 5.36328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.13%) |Training time=0.45s (21.28%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2234|ppo_ep: 1|act_loss: 0.0162353515625|cri_loss: 0.0086517333984375|unsuper_loss: 0.0
+average reward score: 5.1171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.17%) |Training time=0.46s (21.16%) |Others=0.15 (6.67%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2235|ppo_ep: 1|act_loss: -0.0178070068359375|cri_loss: -0.00872039794921875|unsuper_loss: 0.0
+average reward score: 4.796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.08%) |Training time=0.46s (21.31%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2236|ppo_ep: 1|act_loss: -0.037078857421875|cri_loss: -0.0180206298828125|unsuper_loss: 0.0
+average reward score: 5.1796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.90%) |Training time=0.46s (21.53%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2237|ppo_ep: 1|act_loss: -0.02801513671875|cri_loss: -0.01322174072265625|unsuper_loss: 0.0
+average reward score: 4.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.73%) |Training time=0.46s (21.68%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2238|ppo_ep: 1|act_loss: 0.031951904296875|cri_loss: 0.0170135498046875|unsuper_loss: 0.0
+average reward score: 5.37890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.37%) |Training time=0.47s (21.87%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
+[2023-04-14 10:10:09,702] [INFO] [logging.py:96:log_dist] [Rank 0] step=2240, skipped=26, lr=[8.138217151044717e-06, 8.138217151044717e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:10:09,721] [INFO] [timer.py:199:stop] epoch=0/micro_step=2240/global_step=2240, RunningAvgSamplesPerSec=106.91724169004912, CurrSamplesPerSec=102.24546793214905, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:10:09,813] [INFO] [logging.py:96:log_dist] [Rank 0] step=2240, skipped=32, lr=[4.220886439234385e-06, 4.220886439234385e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2239|ppo_ep: 1|act_loss: -0.039947509765625|cri_loss: -0.0196380615234375|unsuper_loss: 0.0
+average reward score: 5.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.48s (22.09%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2240|ppo_ep: 1|act_loss: -0.023529052734375|cri_loss: -0.01129913330078125|unsuper_loss: 0.0
+average reward score: 5.23828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.44s |Gather latency=0.00s (0.00%) |Generate time=1.58s (64.78%) |Training time=0.49s (20.00%) |Others=0.37 (15.22%)|CurSamplesPerSec=13.11 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2241|ppo_ep: 1|act_loss: 0.07281494140625|cri_loss: 0.0377197265625|unsuper_loss: 0.0
+average reward score: 5.18359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.52%) |Training time=0.43s (19.84%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2242|ppo_ep: 1|act_loss: 0.0274200439453125|cri_loss: 0.0153656005859375|unsuper_loss: 0.0
+average reward score: 4.90234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.38%) |Training time=0.44s (20.04%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2243|ppo_ep: 1|act_loss: 0.03289794921875|cri_loss: 0.017913818359375|unsuper_loss: 0.0
+average reward score: 5.5
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.36%) |Training time=0.46s (21.14%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2244|ppo_ep: 1|act_loss: 0.00852203369140625|cri_loss: 0.004596710205078125|unsuper_loss: 0.0
+average reward score: 5.62890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.29%) |Training time=0.46s (21.18%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2245|ppo_ep: 1|act_loss: 0.0084381103515625|cri_loss: 0.005046844482421875|unsuper_loss: 0.0
+average reward score: 4.7109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.61s (67.92%) |Training time=0.46s (19.40%) |Others=0.30 (12.68%)|CurSamplesPerSec=13.51 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2246|ppo_ep: 1|act_loss: 0.0362548828125|cri_loss: 0.020599365234375|unsuper_loss: 0.0
+average reward score: 4.9765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.91%) |Training time=0.44s (19.68%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.24 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2247|ppo_ep: 1|act_loss: -0.027587890625|cri_loss: -0.01079559326171875|unsuper_loss: 0.0
+average reward score: 5.12109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.05%) |Training time=0.44s (20.38%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2248|ppo_ep: 1|act_loss: -0.033111572265625|cri_loss: -0.0156707763671875|unsuper_loss: 0.0
+average reward score: 5.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.29%) |Training time=0.46s (21.18%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.44
+[2023-04-14 10:10:31,997] [INFO] [logging.py:96:log_dist] [Rank 0] step=2250, skipped=26, lr=[8.124688423357883e-06, 8.124688423357883e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:10:32,015] [INFO] [timer.py:199:stop] epoch=0/micro_step=2250/global_step=2250, RunningAvgSamplesPerSec=106.928745519496, CurrSamplesPerSec=107.82194537475227, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:10:32,108] [INFO] [logging.py:96:log_dist] [Rank 0] step=2250, skipped=32, lr=[4.213891990643095e-06, 4.213891990643095e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2249|ppo_ep: 1|act_loss: -0.00763702392578125|cri_loss: -0.0036678314208984375|unsuper_loss: 0.0
+average reward score: 5.29296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.27%) |Training time=0.46s (21.20%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2250|ppo_ep: 1|act_loss: -0.029144287109375|cri_loss: -0.013763427734375|unsuper_loss: 0.0
+average reward score: 5.921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.33%) |Training time=0.46s (21.11%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2251|ppo_ep: 1|act_loss: -0.017852783203125|cri_loss: -0.00787353515625|unsuper_loss: 0.0
+average reward score: 5.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.28%) |Training time=0.46s (21.16%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2252|ppo_ep: 1|act_loss: 0.00494384765625|cri_loss: 0.0029144287109375|unsuper_loss: 0.0
+average reward score: 5.01171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.80%) |Training time=0.47s (21.67%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2253|ppo_ep: 1|act_loss: 0.024017333984375|cri_loss: 0.0124053955078125|unsuper_loss: 0.0
+average reward score: 5.1015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.15%) |Training time=0.46s (21.22%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2254|ppo_ep: 1|act_loss: 0.05157470703125|cri_loss: 0.0269622802734375|unsuper_loss: 0.0
+average reward score: 5.2109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.19%) |Training time=0.49s (22.35%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2255|ppo_ep: 1|act_loss: 0.03814697265625|cri_loss: 0.0195159912109375|unsuper_loss: 0.0
+average reward score: 5.09375
+-------------------------------------------------------------------------------------
+|E2E latency=3.26s |Gather latency=0.00s (0.00%) |Generate time=1.61s (49.47%) |Training time=0.46s (14.15%) |Others=1.19 (36.38%)|CurSamplesPerSec=9.81 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2256|ppo_ep: 1|act_loss: 0.008209228515625|cri_loss: 0.0043182373046875|unsuper_loss: 0.0
+average reward score: 4.98828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.66%) |Training time=0.46s (20.83%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2257|ppo_ep: 1|act_loss: -0.010101318359375|cri_loss: -0.00496673583984375|unsuper_loss: 0.0
+average reward score: 5.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.92%) |Training time=0.45s (20.57%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2258|ppo_ep: 1|act_loss: -0.053955078125|cri_loss: -0.0260467529296875|unsuper_loss: 0.0
+average reward score: 5.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.09%) |Training time=0.47s (20.59%) |Others=0.10 (4.32%)|CurSamplesPerSec=14.04 |AvgSamplesPerSec=14.44
+[2023-04-14 10:10:54,960] [INFO] [logging.py:96:log_dist] [Rank 0] step=2260, skipped=26, lr=[8.11111078632855e-06, 8.11111078632855e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:10:54,978] [INFO] [timer.py:199:stop] epoch=0/micro_step=2260/global_step=2260, RunningAvgSamplesPerSec=106.92700638560709, CurrSamplesPerSec=110.86272184070586, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:10:55,071] [INFO] [logging.py:96:log_dist] [Rank 0] step=2260, skipped=32, lr=[4.206872138037964e-06, 4.206872138037964e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2259|ppo_ep: 1|act_loss: -0.022674560546875|cri_loss: -0.01006317138671875|unsuper_loss: 0.0
+average reward score: 5.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.50%) |Training time=0.45s (20.95%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2260|ppo_ep: 1|act_loss: -0.013671875|cri_loss: -0.0060577392578125|unsuper_loss: 0.0
+average reward score: 5.4921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.07%) |Training time=0.49s (20.76%) |Others=0.10 (4.17%)|CurSamplesPerSec=13.57 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2261|ppo_ep: 1|act_loss: 0.00531005859375|cri_loss: 0.00421905517578125|unsuper_loss: 0.0
+average reward score: 5.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.40%) |Training time=0.45s (21.05%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2262|ppo_ep: 1|act_loss: -0.034423828125|cri_loss: -0.0164947509765625|unsuper_loss: 0.0
+average reward score: 5.796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.32%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2263|ppo_ep: 1|act_loss: -0.00730133056640625|cri_loss: -0.003337860107421875|unsuper_loss: 0.0
+average reward score: 5.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.46s (21.18%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2264|ppo_ep: 1|act_loss: 0.0159912109375|cri_loss: 0.0100555419921875|unsuper_loss: 0.0
+average reward score: 5.50390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.28%) |Training time=0.46s (21.16%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2265|ppo_ep: 1|act_loss: 0.012115478515625|cri_loss: 0.00759124755859375|unsuper_loss: 0.0
+average reward score: 5.1328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.26%) |Training time=0.46s (21.18%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2266|ppo_ep: 1|act_loss: 0.00399017333984375|cri_loss: 0.0026092529296875|unsuper_loss: 0.0
+average reward score: 5.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.74s |Gather latency=0.00s (0.00%) |Generate time=1.60s (58.38%) |Training time=0.45s (16.54%) |Others=0.69 (25.08%)|CurSamplesPerSec=11.69 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2267|ppo_ep: 1|act_loss: -0.047210693359375|cri_loss: -0.0228271484375|unsuper_loss: 0.0
+average reward score: 5.58984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.02%) |Training time=0.49s (22.45%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2268|ppo_ep: 1|act_loss: -0.0060577392578125|cri_loss: -0.00244140625|unsuper_loss: 0.0
+average reward score: 5.51171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.28%) |Training time=0.48s (22.19%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.44
+[2023-04-14 10:11:17,364] [INFO] [logging.py:96:log_dist] [Rank 0] step=2270, skipped=26, lr=[8.097484441210055e-06, 8.097484441210055e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:11:17,382] [INFO] [timer.py:199:stop] epoch=0/micro_step=2270/global_step=2270, RunningAvgSamplesPerSec=106.91831941913372, CurrSamplesPerSec=103.83418432078822, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:11:17,475] [INFO] [logging.py:96:log_dist] [Rank 0] step=2270, skipped=32, lr=[4.199826985470142e-06, 4.199826985470142e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2269|ppo_ep: 1|act_loss: -0.04705810546875|cri_loss: -0.0223846435546875|unsuper_loss: 0.0
+average reward score: 5.1796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.83%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2270|ppo_ep: 1|act_loss: -0.030059814453125|cri_loss: -0.01427459716796875|unsuper_loss: 0.0
+average reward score: 5.6015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.90%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2271|ppo_ep: 1|act_loss: -0.016143798828125|cri_loss: -0.0068817138671875|unsuper_loss: 0.0
+average reward score: 5.296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.51%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2272|ppo_ep: 1|act_loss: -0.05194091796875|cri_loss: -0.020294189453125|unsuper_loss: 0.0
+average reward score: 5.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.55%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2273|ppo_ep: 1|act_loss: -0.018035888671875|cri_loss: -0.00811767578125|unsuper_loss: 0.0
+average reward score: 4.9921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.77%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2274|ppo_ep: 1|act_loss: 0.01580810546875|cri_loss: 0.0082855224609375|unsuper_loss: 0.0
+average reward score: 5.44921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.89%) |Training time=0.49s (20.92%) |Others=0.10 (4.19%)|CurSamplesPerSec=13.64 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2275|ppo_ep: 1|act_loss: 0.017425537109375|cri_loss: 0.00977325439453125|unsuper_loss: 0.0
+average reward score: 4.67578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.81%) |Training time=0.47s (21.70%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2276|ppo_ep: 1|act_loss: -0.0180816650390625|cri_loss: -0.0082855224609375|unsuper_loss: 0.0
+average reward score: 5.01171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.46s (21.50%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2277|ppo_ep: 1|act_loss: -0.01300048828125|cri_loss: -0.0060882568359375|unsuper_loss: 0.0
+average reward score: 5.51953125
+-------------------------------------------------------------------------------------
+|E2E latency=3.46s |Gather latency=0.00s (0.00%) |Generate time=1.59s (45.97%) |Training time=0.47s (13.45%) |Others=1.41 (40.58%)|CurSamplesPerSec=9.24 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2278|ppo_ep: 1|act_loss: -0.017974853515625|cri_loss: -0.008453369140625|unsuper_loss: 0.0
+average reward score: 5.65234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.90%) |Training time=0.46s (21.53%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43
+[2023-04-14 10:11:40,451] [INFO] [logging.py:96:log_dist] [Rank 0] step=2280, skipped=26, lr=[8.083809589977701e-06, 8.083809589977701e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:11:40,469] [INFO] [timer.py:199:stop] epoch=0/micro_step=2280/global_step=2280, RunningAvgSamplesPerSec=106.90553455426142, CurrSamplesPerSec=104.34825010417897, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:11:40,562] [INFO] [logging.py:96:log_dist] [Rank 0] step=2280, skipped=32, lr=[4.192756637365787e-06, 4.192756637365787e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2279|ppo_ep: 1|act_loss: 0.0111846923828125|cri_loss: 0.00595855712890625|unsuper_loss: 0.0
+average reward score: 5.81640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.77%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2280|ppo_ep: 1|act_loss: 0.0008172988891601562|cri_loss: 0.0007848739624023438|unsuper_loss: 0.0
+average reward score: 5.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.75%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2281|ppo_ep: 1|act_loss: 0.016265869140625|cri_loss: 0.00873565673828125|unsuper_loss: 0.0
+average reward score: 5.11328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.14%) |Training time=0.49s (22.33%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2282|ppo_ep: 1|act_loss: 0.050537109375|cri_loss: 0.027069091796875|unsuper_loss: 0.0
+average reward score: 5.98046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.70%) |Training time=0.50s (22.79%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2283|ppo_ep: 1|act_loss: 0.00830078125|cri_loss: 0.004779815673828125|unsuper_loss: 0.0
+average reward score: 5.0234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.49s (22.37%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2284|ppo_ep: 1|act_loss: -0.0672607421875|cri_loss: -0.032012939453125|unsuper_loss: 0.0
+average reward score: 5.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.46s (21.46%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2285|ppo_ep: 1|act_loss: -0.0333251953125|cri_loss: -0.0162506103515625|unsuper_loss: 0.0
+average reward score: 5.1328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.34%) |Training time=0.43s (19.93%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2286|ppo_ep: 1|act_loss: 0.019287109375|cri_loss: 0.0107269287109375|unsuper_loss: 0.0
+average reward score: 5.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.68s (75.92%) |Training time=0.44s (19.62%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.42 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2287|ppo_ep: 1|act_loss: 0.0294036865234375|cri_loss: 0.01515960693359375|unsuper_loss: 0.0
+average reward score: 5.5
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.98%) |Training time=0.44s (20.44%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2288|ppo_ep: 1|act_loss: 0.016387939453125|cri_loss: 0.00876617431640625|unsuper_loss: 0.0
+average reward score: 5.328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.06%) |Training time=0.44s (20.36%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43
+[2023-04-14 10:12:02,342] [INFO] [logging.py:96:log_dist] [Rank 0] step=2290, skipped=26, lr=[8.070086435325772e-06, 8.070086435325772e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:12:02,361] [INFO] [timer.py:199:stop] epoch=0/micro_step=2290/global_step=2290, RunningAvgSamplesPerSec=106.89813915883823, CurrSamplesPerSec=87.2612355131024, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:12:02,454] [INFO] [logging.py:96:log_dist] [Rank 0] step=2290, skipped=32, lr=[4.185661198524513e-06, 4.185661198524513e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2289|ppo_ep: 1|act_loss: 0.00677490234375|cri_loss: 0.0037097930908203125|unsuper_loss: 0.0
+average reward score: 5.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.71s (72.81%) |Training time=0.54s (23.01%) |Others=0.10 (4.18%)|CurSamplesPerSec=13.63 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2290|ppo_ep: 1|act_loss: 0.04388427734375|cri_loss: 0.0227508544921875|unsuper_loss: 0.0
+average reward score: 5.14453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.49%) |Training time=0.43s (19.93%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2291|ppo_ep: 1|act_loss: 0.035247802734375|cri_loss: 0.0183563232421875|unsuper_loss: 0.0
+average reward score: 5.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.32%) |Training time=0.43s (20.11%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2292|ppo_ep: 1|act_loss: -0.0136871337890625|cri_loss: -0.0064544677734375|unsuper_loss: 0.0
+average reward score: 5.41796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.48%) |Training time=0.45s (20.92%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2293|ppo_ep: 1|act_loss: 0.006671905517578125|cri_loss: 0.0039825439453125|unsuper_loss: 0.0
+average reward score: 5.296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.66%) |Training time=0.45s (20.76%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2294|ppo_ep: 1|act_loss: -0.09033203125|cri_loss: -0.040313720703125|unsuper_loss: 0.0
+average reward score: 5.13671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.79%) |Training time=0.44s (20.63%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2295|ppo_ep: 1|act_loss: -0.0204925537109375|cri_loss: -0.00887298583984375|unsuper_loss: 0.0
+average reward score: 5.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.41%) |Training time=0.45s (20.84%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2296|ppo_ep: 1|act_loss: -0.05853271484375|cri_loss: -0.028350830078125|unsuper_loss: 0.0
+average reward score: 4.9140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.35%) |Training time=0.45s (21.07%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2297|ppo_ep: 1|act_loss: -0.0142822265625|cri_loss: -0.00630950927734375|unsuper_loss: 0.0
+average reward score: 5.24609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.05%) |Training time=0.49s (22.44%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2298|ppo_ep: 1|act_loss: 0.0034999847412109375|cri_loss: 0.002185821533203125|unsuper_loss: 0.0
+average reward score: 6.1015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.88%) |Training time=0.47s (21.58%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.44
+[2023-04-14 10:12:23,932] [INFO] [logging.py:96:log_dist] [Rank 0] step=2300, skipped=26, lr=[8.056315180664529e-06, 8.056315180664529e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:12:23,950] [INFO] [timer.py:199:stop] epoch=0/micro_step=2300/global_step=2300, RunningAvgSamplesPerSec=106.91339089598668, CurrSamplesPerSec=109.58776763603377, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:12:24,042] [INFO] [logging.py:96:log_dist] [Rank 0] step=2300, skipped=32, lr=[4.17854077411784e-06, 4.17854077411784e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2299|ppo_ep: 1|act_loss: -0.0406494140625|cri_loss: -0.0195770263671875|unsuper_loss: 0.0
+average reward score: 4.9453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (21.08%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2300|ppo_ep: 1|act_loss: 0.0259552001953125|cri_loss: 0.0132904052734375|unsuper_loss: 0.0
+average reward score: 5.515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.46s (21.18%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2301|ppo_ep: 1|act_loss: 0.07537841796875|cri_loss: 0.038604736328125|unsuper_loss: 0.0
+average reward score: 5.1171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.46s |Gather latency=0.00s (0.00%) |Generate time=1.59s (64.52%) |Training time=0.46s (18.84%) |Others=0.41 (16.63%)|CurSamplesPerSec=12.98 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2302|ppo_ep: 1|act_loss: 0.051544189453125|cri_loss: 0.0265960693359375|unsuper_loss: 0.0
+average reward score: 5.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.77%) |Training time=0.47s (21.71%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2303|ppo_ep: 1|act_loss: 0.0904541015625|cri_loss: 0.047149658203125|unsuper_loss: 0.0
+average reward score: 5.05078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.67%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2304|ppo_ep: 1|act_loss: -0.0343017578125|cri_loss: -0.0167236328125|unsuper_loss: 0.0
+average reward score: 5.24609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.92%) |Training time=0.49s (20.88%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.64 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2305|ppo_ep: 1|act_loss: -0.024200439453125|cri_loss: -0.0115966796875|unsuper_loss: 0.0
+average reward score: 5.12109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.96%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2306|ppo_ep: 1|act_loss: 0.001941680908203125|cri_loss: 0.0023021697998046875|unsuper_loss: 0.0
+average reward score: 5.3671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.84%) |Training time=0.47s (21.61%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2307|ppo_ep: 1|act_loss: -0.0360107421875|cri_loss: -0.0176544189453125|unsuper_loss: 0.0
+average reward score: 5.34375
+-------------------------------------------------------------------------------------
+|E2E latency=2.61s |Gather latency=0.00s (0.00%) |Generate time=1.59s (60.94%) |Training time=0.46s (17.63%) |Others=0.56 (21.43%)|CurSamplesPerSec=12.25 |AvgSamplesPerSec=14.43
+epoch: 0|step: 2308|ppo_ep: 1|act_loss: -0.017486572265625|cri_loss: -0.00789642333984375|unsuper_loss: 0.0
+average reward score: 5.046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.27%) |Training time=0.48s (22.18%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.43
+[2023-04-14 10:12:46,476] [INFO] [logging.py:96:log_dist] [Rank 0] step=2310, skipped=26, lr=[8.042496030117185e-06, 8.042496030117185e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:12:46,494] [INFO] [timer.py:199:stop] epoch=0/micro_step=2310/global_step=2310, RunningAvgSamplesPerSec=106.90101331828065, CurrSamplesPerSec=105.29688544396153, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:12:46,587] [INFO] [logging.py:96:log_dist] [Rank 0] step=2310, skipped=32, lr=[4.171395469687636e-06, 4.171395469687636e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2309|ppo_ep: 1|act_loss: 0.034759521484375|cri_loss: 0.017852783203125|unsuper_loss: 0.0
+average reward score: 5.5703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.47s (21.70%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2310|ppo_ep: 1|act_loss: 0.018341064453125|cri_loss: 0.0097503662109375|unsuper_loss: 0.0
+average reward score: 5.41015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.88%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2311|ppo_ep: 1|act_loss: 0.01898193359375|cri_loss: 0.0101470947265625|unsuper_loss: 0.0
+average reward score: 5.68359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.27%) |Training time=0.48s (22.20%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.44
+[2023-04-14 10:12:53,077] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 2312|ppo_ep: 1|act_loss: 0.0770263671875|cri_loss: 0.04034423828125|unsuper_loss: 0.0
+average reward score: 5.78515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.49s (22.57%) |Others=0.09 (4.04%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.44
+[2023-04-14 10:12:55,219] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 2313|ppo_ep: 1|act_loss: 0.0029315948486328125|cri_loss: 0.0021209716796875|unsuper_loss: 0.0
+average reward score: 5.6171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.59s (69.81%) |Training time=0.47s (20.57%) |Others=0.22 (9.62%)|CurSamplesPerSec=14.08 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2314|ppo_ep: 1|act_loss: -0.02044677734375|cri_loss: -0.01010894775390625|unsuper_loss: 0.0
+average reward score: 4.8828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.90%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2315|ppo_ep: 1|act_loss: 0.0189056396484375|cri_loss: 0.01038360595703125|unsuper_loss: 0.0
+average reward score: 5.1796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.62s (71.70%) |Training time=0.50s (21.90%) |Others=0.14 (6.40%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2316|ppo_ep: 1|act_loss: 0.01366424560546875|cri_loss: 0.00711822509765625|unsuper_loss: 0.0
+average reward score: 5.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.82%) |Training time=0.46s (21.32%) |Others=0.11 (4.86%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2317|ppo_ep: 1|act_loss: 0.034149169921875|cri_loss: 0.0182647705078125|unsuper_loss: 0.0
+average reward score: 5.39453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.43%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2318|ppo_ep: 1|act_loss: -0.0007562637329101562|cri_loss: -0.00014853477478027344|unsuper_loss: 0.0
+average reward score: 5.55859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.59%) |Training time=0.47s (21.83%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.44
+[2023-04-14 10:13:08,300] [INFO] [logging.py:96:log_dist] [Rank 0] step=2320, skipped=26, lr=[8.028629188516887e-06, 8.028629188516887e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:13:08,539] [INFO] [timer.py:199:stop] epoch=0/micro_step=2320/global_step=2320, RunningAvgSamplesPerSec=106.84855308456247, CurrSamplesPerSec=61.47600233413061, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:13:08,632] [INFO] [logging.py:96:log_dist] [Rank 0] step=2320, skipped=34, lr=[4.165661383686135e-06, 4.165661383686135e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2319|ppo_ep: 1|act_loss: -0.05047607421875|cri_loss: -0.02435302734375|unsuper_loss: 0.0
+average reward score: 5.30859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.59s (67.09%) |Training time=0.68s (28.79%) |Others=0.10 (4.11%)|CurSamplesPerSec=13.49 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2320|ppo_ep: 1|act_loss: 0.0015115737915039062|cri_loss: 0.0011758804321289062|unsuper_loss: 0.0
+average reward score: 5.05859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.88%) |Training time=0.49s (20.92%) |Others=0.10 (4.19%)|CurSamplesPerSec=13.65 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2321|ppo_ep: 1|act_loss: -0.0006151199340820312|cri_loss: -3.0040740966796875e-05|unsuper_loss: 0.0
+average reward score: 5.44140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.73%) |Training time=0.47s (21.70%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2322|ppo_ep: 1|act_loss: -0.0045928955078125|cri_loss: -0.00206756591796875|unsuper_loss: 0.0
+average reward score: 5.6953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.38%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2323|ppo_ep: 1|act_loss: 0.0068511962890625|cri_loss: 0.00389862060546875|unsuper_loss: 0.0
+average reward score: 5.37890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.47s (21.63%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2324|ppo_ep: 1|act_loss: 0.016845703125|cri_loss: 0.0091400146484375|unsuper_loss: 0.0
+average reward score: 5.46484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.91%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2325|ppo_ep: 1|act_loss: -0.028961181640625|cri_loss: -0.01421356201171875|unsuper_loss: 0.0
+average reward score: 5.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.08s |Gather latency=0.00s (0.00%) |Generate time=1.59s (76.63%) |Training time=0.39s (18.61%) |Others=0.10 (4.76%)|CurSamplesPerSec=15.42 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2326|ppo_ep: 1|act_loss: 0.0291290283203125|cri_loss: 0.01497650146484375|unsuper_loss: 0.0
+average reward score: 5.06640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (22.07%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2327|ppo_ep: 1|act_loss: 0.020904541015625|cri_loss: 0.01064300537109375|unsuper_loss: 0.0
+average reward score: 5.41015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.46s (21.28%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
+[2023-04-14 10:13:28,936] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 2328|ppo_ep: 1|act_loss: 0.05792236328125|cri_loss: 0.03179931640625|unsuper_loss: 0.0
+average reward score: 5.08203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.59s (75.04%) |Training time=0.43s (20.32%) |Others=0.10 (4.64%)|CurSamplesPerSec=15.13 |AvgSamplesPerSec=14.44
+[2023-04-14 10:13:31,062] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+[2023-04-14 10:13:31,063] [INFO] [logging.py:96:log_dist] [Rank 0] step=2330, skipped=28, lr=[8.017501515777759e-06, 8.017501515777759e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:13:31,063] [INFO] [timer.py:199:stop] epoch=0/micro_step=2330/global_step=2330, RunningAvgSamplesPerSec=106.86175914958172, CurrSamplesPerSec=123.63699739954438, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:13:31,155] [INFO] [logging.py:96:log_dist] [Rank 0] step=2330, skipped=34, lr=[4.158471562355229e-06, 4.158471562355229e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2329|ppo_ep: 1|act_loss: -0.018341064453125|cri_loss: -0.00897216796875|unsuper_loss: 0.0
+average reward score: 5.40234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.62%) |Training time=0.42s (19.78%) |Others=0.10 (4.59%)|CurSamplesPerSec=15.05 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2330|ppo_ep: 1|act_loss: 0.0045166015625|cri_loss: 0.002838134765625|unsuper_loss: 0.0
+average reward score: 4.87109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.14%) |Training time=0.45s (21.26%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2331|ppo_ep: 1|act_loss: 0.00347137451171875|cri_loss: 0.0021820068359375|unsuper_loss: 0.0
+average reward score: 5.37109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.17%) |Training time=0.45s (21.23%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2332|ppo_ep: 1|act_loss: -0.056640625|cri_loss: -0.02764892578125|unsuper_loss: 0.0
+average reward score: 5.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.34%) |Training time=0.45s (21.06%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2333|ppo_ep: 1|act_loss: -0.02557373046875|cri_loss: -0.01247406005859375|unsuper_loss: 0.0
+average reward score: 5.76953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.17%) |Training time=0.45s (21.24%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2334|ppo_ep: 1|act_loss: 0.012054443359375|cri_loss: 0.006420135498046875|unsuper_loss: 0.0
+average reward score: 4.94921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.43s |Gather latency=0.00s (0.00%) |Generate time=1.75s (71.93%) |Training time=0.45s (18.59%) |Others=0.23 (9.47%)|CurSamplesPerSec=13.14 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2335|ppo_ep: 1|act_loss: 0.03173828125|cri_loss: 0.01629638671875|unsuper_loss: 0.0
+average reward score: 5.796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.22%) |Training time=0.45s (21.18%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2336|ppo_ep: 1|act_loss: 0.0335693359375|cri_loss: 0.0183258056640625|unsuper_loss: 0.0
+average reward score: 4.7265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.28%) |Training time=0.45s (21.12%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2337|ppo_ep: 1|act_loss: -0.042266845703125|cri_loss: -0.0205078125|unsuper_loss: 0.0
+average reward score: 5.2734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.27%) |Training time=0.45s (21.11%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2338|ppo_ep: 1|act_loss: 0.015228271484375|cri_loss: 0.0080108642578125|unsuper_loss: 0.0
+average reward score: 5.4921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.79%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44
+[2023-04-14 10:13:52,736] [INFO] [logging.py:96:log_dist] [Rank 0] step=2340, skipped=28, lr=[8.003549348716149e-06, 8.003549348716149e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:13:52,754] [INFO] [timer.py:199:stop] epoch=0/micro_step=2340/global_step=2340, RunningAvgSamplesPerSec=106.87153709245142, CurrSamplesPerSec=106.25905734856126, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:13:52,846] [INFO] [logging.py:96:log_dist] [Rank 0] step=2340, skipped=34, lr=[4.151257158474923e-06, 4.151257158474923e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2339|ppo_ep: 1|act_loss: 0.041748046875|cri_loss: 0.02178955078125|unsuper_loss: 0.0
+average reward score: 5.484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.76%) |Training time=0.46s (21.67%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2340|ppo_ep: 1|act_loss: -0.0221710205078125|cri_loss: -0.01041412353515625|unsuper_loss: 0.0
+average reward score: 5.3359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.82%) |Training time=0.46s (21.58%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2341|ppo_ep: 1|act_loss: 0.018798828125|cri_loss: 0.0099029541015625|unsuper_loss: 0.0
+average reward score: 4.84765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.10%) |Training time=0.46s (21.32%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2342|ppo_ep: 1|act_loss: 0.00762939453125|cri_loss: 0.007808685302734375|unsuper_loss: 0.0
+average reward score: 5.7578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.23%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2343|ppo_ep: 1|act_loss: -0.029632568359375|cri_loss: -0.01442718505859375|unsuper_loss: 0.0
+average reward score: 5.5
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.46s (21.22%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2344|ppo_ep: 1|act_loss: 0.008575439453125|cri_loss: 0.004589080810546875|unsuper_loss: 0.0
+average reward score: 5.109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.68%) |Training time=0.45s (20.80%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2345|ppo_ep: 1|act_loss: 0.0219573974609375|cri_loss: 0.01154327392578125|unsuper_loss: 0.0
+average reward score: 5.65234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.69s (75.47%) |Training time=0.45s (20.13%) |Others=0.10 (4.39%)|CurSamplesPerSec=14.29 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2346|ppo_ep: 1|act_loss: 0.00861358642578125|cri_loss: 0.00467681884765625|unsuper_loss: 0.0
+average reward score: 5.6328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.32%) |Training time=0.45s (21.07%) |Others=0.10 (4.62%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2347|ppo_ep: 1|act_loss: 0.0147552490234375|cri_loss: 0.00815582275390625|unsuper_loss: 0.0
+average reward score: 5.31640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.33%) |Training time=0.45s (21.07%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2348|ppo_ep: 1|act_loss: -0.0386962890625|cri_loss: -0.01849365234375|unsuper_loss: 0.0
+average reward score: 5.12109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.37%) |Training time=0.45s (21.04%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.44
+[2023-04-14 10:14:14,328] [INFO] [logging.py:96:log_dist] [Rank 0] step=2350, skipped=28, lr=[7.989550067885356e-06, 7.989550067885356e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:14:14,860] [INFO] [timer.py:199:stop] epoch=0/micro_step=2350/global_step=2350, RunningAvgSamplesPerSec=106.79837881327074, CurrSamplesPerSec=38.86040216651423, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:14:14,953] [INFO] [logging.py:96:log_dist] [Rank 0] step=2350, skipped=34, lr=[4.1440182789800875e-06, 4.1440182789800875e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2349|ppo_ep: 1|act_loss: -0.01824951171875|cri_loss: -0.00884246826171875|unsuper_loss: 0.0
+average reward score: 5.453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.67s |Gather latency=0.00s (0.00%) |Generate time=1.59s (59.43%) |Training time=0.99s (36.91%) |Others=0.10 (3.65%)|CurSamplesPerSec=11.98 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2350|ppo_ep: 1|act_loss: 0.01251220703125|cri_loss: 0.00763702392578125|unsuper_loss: 0.0
+average reward score: 5.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.45s (21.20%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2351|ppo_ep: 1|act_loss: 0.006336212158203125|cri_loss: 0.0037937164306640625|unsuper_loss: 0.0
+average reward score: 5.35546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.34%) |Training time=0.45s (21.07%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2352|ppo_ep: 1|act_loss: -0.009918212890625|cri_loss: -0.004642486572265625|unsuper_loss: 0.0
+average reward score: 5.77734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.65%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2353|ppo_ep: 1|act_loss: 0.069091796875|cri_loss: 0.03546142578125|unsuper_loss: 0.0
+average reward score: 4.89453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.75%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2354|ppo_ep: 1|act_loss: 0.0645751953125|cri_loss: 0.033111572265625|unsuper_loss: 0.0
+average reward score: 5.7109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.94%) |Training time=0.46s (21.48%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2355|ppo_ep: 1|act_loss: -0.000469207763671875|cri_loss: 0.00017404556274414062|unsuper_loss: 0.0
+average reward score: 5.0859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.80%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2356|ppo_ep: 1|act_loss: 0.0271759033203125|cri_loss: 0.0141143798828125|unsuper_loss: 0.0
+average reward score: 5.109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.87%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2357|ppo_ep: 1|act_loss: -0.05279541015625|cri_loss: -0.025543212890625|unsuper_loss: 0.0
+average reward score: 5.7265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.55%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2358|ppo_ep: 1|act_loss: -0.038299560546875|cri_loss: -0.017913818359375|unsuper_loss: 0.0
+average reward score: 5.3671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.85%) |Training time=0.46s (21.58%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
+[2023-04-14 10:14:36,326] [INFO] [logging.py:96:log_dist] [Rank 0] step=2360, skipped=28, lr=[7.975503880788498e-06, 7.975503880788498e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:14:36,344] [INFO] [timer.py:199:stop] epoch=0/micro_step=2360/global_step=2360, RunningAvgSamplesPerSec=106.79686323206259, CurrSamplesPerSec=106.73972149543116, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:14:36,437] [INFO] [logging.py:96:log_dist] [Rank 0] step=2360, skipped=34, lr=[4.13675503116838e-06, 4.13675503116838e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2359|ppo_ep: 1|act_loss: -0.00870513916015625|cri_loss: -0.003223419189453125|unsuper_loss: 0.0
+average reward score: 5.69140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.81%) |Training time=0.46s (21.61%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2360|ppo_ep: 1|act_loss: -0.0218505859375|cri_loss: -0.010284423828125|unsuper_loss: 0.0
+average reward score: 5.37890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.83%) |Training time=0.46s (21.59%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2361|ppo_ep: 1|act_loss: 0.003803253173828125|cri_loss: 0.002079010009765625|unsuper_loss: 0.0
+average reward score: 5.25390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.04%) |Training time=0.46s (21.35%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2362|ppo_ep: 1|act_loss: 0.09405517578125|cri_loss: 0.04913330078125|unsuper_loss: 0.0
+average reward score: 6.08203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.81%) |Training time=0.46s (21.61%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2363|ppo_ep: 1|act_loss: 0.03997802734375|cri_loss: 0.0205230712890625|unsuper_loss: 0.0
+average reward score: 5.0234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.50%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2364|ppo_ep: 1|act_loss: -0.01141357421875|cri_loss: -0.00485992431640625|unsuper_loss: 0.0
+average reward score: 5.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.68s |Gather latency=0.00s (0.00%) |Generate time=1.58s (58.98%) |Training time=0.46s (17.17%) |Others=0.64 (23.86%)|CurSamplesPerSec=11.94 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2365|ppo_ep: 1|act_loss: 0.00457000732421875|cri_loss: 0.00278472900390625|unsuper_loss: 0.0
+average reward score: 4.96875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.90%) |Training time=0.46s (21.50%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2366|ppo_ep: 1|act_loss: 0.01678466796875|cri_loss: 0.00885772705078125|unsuper_loss: 0.0
+average reward score: 5.65234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.14%) |Training time=0.45s (21.26%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2367|ppo_ep: 1|act_loss: -0.0163726806640625|cri_loss: -0.0079345703125|unsuper_loss: 0.0
+average reward score: 5.48828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.99%) |Training time=0.46s (21.40%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2368|ppo_ep: 1|act_loss: 0.0004329681396484375|cri_loss: 0.0009245872497558594|unsuper_loss: 0.0
+average reward score: 5.1171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.72%) |Training time=0.46s (21.69%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44
+[2023-04-14 10:14:58,277] [INFO] [logging.py:96:log_dist] [Rank 0] step=2370, skipped=28, lr=[7.961410995623948e-06, 7.961410995623948e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:14:58,296] [INFO] [timer.py:199:stop] epoch=0/micro_step=2370/global_step=2370, RunningAvgSamplesPerSec=106.79987659925104, CurrSamplesPerSec=108.05257971649202, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:14:58,388] [INFO] [logging.py:96:log_dist] [Rank 0] step=2370, skipped=34, lr=[4.129467522698653e-06, 4.129467522698653e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2369|ppo_ep: 1|act_loss: -0.04071044921875|cri_loss: -0.01959228515625|unsuper_loss: 0.0
+average reward score: 5.02734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.41%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2370|ppo_ep: 1|act_loss: -0.04754638671875|cri_loss: -0.023162841796875|unsuper_loss: 0.0
+average reward score: 5.46484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.87%) |Training time=0.46s (21.55%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2371|ppo_ep: 1|act_loss: -0.02392578125|cri_loss: -0.0097503662109375|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.05%) |Training time=0.46s (21.36%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2372|ppo_ep: 1|act_loss: 0.03375244140625|cri_loss: 0.0176544189453125|unsuper_loss: 0.0
+average reward score: 5.765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.70%) |Training time=0.44s (20.72%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2373|ppo_ep: 1|act_loss: 0.03924560546875|cri_loss: 0.02032470703125|unsuper_loss: 0.0
+average reward score: 5.7109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.96%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2374|ppo_ep: 1|act_loss: 0.0574951171875|cri_loss: 0.03045654296875|unsuper_loss: 0.0
+average reward score: 5.01171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.61%) |Training time=0.45s (20.03%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.20 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2375|ppo_ep: 1|act_loss: 0.0867919921875|cri_loss: 0.047027587890625|unsuper_loss: 0.0
+average reward score: 5.3203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.13%) |Training time=0.46s (21.29%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2376|ppo_ep: 1|act_loss: 0.0019407272338867188|cri_loss: 0.00193023681640625|unsuper_loss: 0.0
+average reward score: 4.84765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.05%) |Training time=0.46s (21.36%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2377|ppo_ep: 1|act_loss: -0.01904296875|cri_loss: -0.00902557373046875|unsuper_loss: 0.0
+average reward score: 4.78515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.71%) |Training time=0.47s (21.71%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2378|ppo_ep: 1|act_loss: -0.0322265625|cri_loss: -0.01522064208984375|unsuper_loss: 0.0
+average reward score: 5.1640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.90%) |Training time=0.46s (21.53%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
+[2023-04-14 10:15:19,996] [INFO] [logging.py:96:log_dist] [Rank 0] step=2380, skipped=28, lr=[7.947271621282263e-06, 7.947271621282263e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:15:20,014] [INFO] [timer.py:199:stop] epoch=0/micro_step=2380/global_step=2380, RunningAvgSamplesPerSec=106.80961062155866, CurrSamplesPerSec=108.14730586013292, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:15:20,107] [INFO] [logging.py:96:log_dist] [Rank 0] step=2380, skipped=34, lr=[4.122155861589364e-06, 4.122155861589364e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2379|ppo_ep: 1|act_loss: -0.03759765625|cri_loss: -0.0180816650390625|unsuper_loss: 0.0
+average reward score: 5.453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.98%) |Training time=0.46s (19.79%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.80 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2380|ppo_ep: 1|act_loss: -0.0775146484375|cri_loss: -0.037384033203125|unsuper_loss: 0.0
+average reward score: 5.37890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.38%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2381|ppo_ep: 1|act_loss: 0.005031585693359375|cri_loss: 0.0032672882080078125|unsuper_loss: 0.0
+average reward score: 5.078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.38%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2382|ppo_ep: 1|act_loss: 0.00275421142578125|cri_loss: 0.0020999908447265625|unsuper_loss: 0.0
+average reward score: 5.24609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.44%) |Training time=0.47s (21.99%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2383|ppo_ep: 1|act_loss: 0.025909423828125|cri_loss: 0.0133514404296875|unsuper_loss: 0.0
+average reward score: 5.1328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.63%) |Training time=0.47s (21.79%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2384|ppo_ep: 1|act_loss: -0.0069427490234375|cri_loss: -0.003017425537109375|unsuper_loss: 0.0
+average reward score: 5.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.10%) |Training time=0.46s (21.36%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2385|ppo_ep: 1|act_loss: 0.0206756591796875|cri_loss: 0.01073455810546875|unsuper_loss: 0.0
+average reward score: 5.2109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.97%) |Training time=0.46s (21.40%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2386|ppo_ep: 1|act_loss: 0.0285186767578125|cri_loss: 0.015472412109375|unsuper_loss: 0.0
+average reward score: 4.9453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.77%) |Training time=0.46s (21.63%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2387|ppo_ep: 1|act_loss: 0.01178741455078125|cri_loss: 0.006023406982421875|unsuper_loss: 0.0
+average reward score: 5.60546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.47s (21.62%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2388|ppo_ep: 1|act_loss: 0.00728607177734375|cri_loss: 0.004486083984375|unsuper_loss: 0.0
+average reward score: 5.4453125
+-------------------------------------------------------------------------------------
+|E2E latency=3.55s |Gather latency=0.00s (0.00%) |Generate time=1.59s (44.74%) |Training time=0.47s (13.16%) |Others=1.49 (42.10%)|CurSamplesPerSec=9.02 |AvgSamplesPerSec=14.44
+[2023-04-14 10:15:42,901] [INFO] [logging.py:96:log_dist] [Rank 0] step=2390, skipped=28, lr=[7.933085967343084e-06, 7.933085967343084e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:15:42,920] [INFO] [timer.py:199:stop] epoch=0/micro_step=2390/global_step=2390, RunningAvgSamplesPerSec=106.80495752503842, CurrSamplesPerSec=102.83638302795524, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:15:43,012] [INFO] [logging.py:96:log_dist] [Rank 0] step=2390, skipped=34, lr=[4.114820156216969e-06, 4.114820156216969e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2389|ppo_ep: 1|act_loss: 0.00827789306640625|cri_loss: 0.004276275634765625|unsuper_loss: 0.0
+average reward score: 5.98828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.41%) |Training time=0.47s (22.04%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2390|ppo_ep: 1|act_loss: 0.07122802734375|cri_loss: 0.03704833984375|unsuper_loss: 0.0
+average reward score: 5.60546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.36%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2391|ppo_ep: 1|act_loss: -0.0092926025390625|cri_loss: -0.0042572021484375|unsuper_loss: 0.0
+average reward score: 5.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.96%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2392|ppo_ep: 1|act_loss: 0.034332275390625|cri_loss: 0.018798828125|unsuper_loss: 0.0
+average reward score: 5.453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.53%) |Training time=0.47s (21.91%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2393|ppo_ep: 1|act_loss: 0.005512237548828125|cri_loss: 0.0033626556396484375|unsuper_loss: 0.0
+average reward score: 5.21484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.59s (70.45%) |Training time=0.48s (21.54%) |Others=0.18 (8.01%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2394|ppo_ep: 1|act_loss: -0.0863037109375|cri_loss: -0.039642333984375|unsuper_loss: 0.0
+average reward score: 5.08984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.50%) |Training time=0.47s (21.93%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2395|ppo_ep: 1|act_loss: 0.0174560546875|cri_loss: 0.0109100341796875|unsuper_loss: 0.0
+average reward score: 5.234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.88%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2396|ppo_ep: 1|act_loss: 0.0147552490234375|cri_loss: 0.0081024169921875|unsuper_loss: 0.0
+average reward score: 5.15625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.87%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2397|ppo_ep: 1|act_loss: 0.04022216796875|cri_loss: 0.02227783203125|unsuper_loss: 0.0
+average reward score: 5.05078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.90%) |Training time=0.46s (21.53%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2398|ppo_ep: 1|act_loss: 0.00225067138671875|cri_loss: 0.002269744873046875|unsuper_loss: 0.0
+average reward score: 5.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.80%) |Training time=0.47s (21.64%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44
+[2023-04-14 10:16:04,555] [INFO] [logging.py:96:log_dist] [Rank 0] step=2400, skipped=28, lr=[7.918854244072016e-06, 7.918854244072016e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:16:04,930] [INFO] [timer.py:199:stop] epoch=0/micro_step=2400/global_step=2400, RunningAvgSamplesPerSec=106.73590195556389, CurrSamplesPerSec=48.33765913846442, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:16:05,022] [INFO] [logging.py:96:log_dist] [Rank 0] step=2400, skipped=34, lr=[4.107460515314316e-06, 4.107460515314316e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2399|ppo_ep: 1|act_loss: 0.03839111328125|cri_loss: 0.019744873046875|unsuper_loss: 0.0
+average reward score: 5.015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.51s |Gather latency=0.00s (0.00%) |Generate time=1.59s (63.25%) |Training time=0.83s (32.86%) |Others=0.10 (3.89%)|CurSamplesPerSec=12.74 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2400|ppo_ep: 1|act_loss: 0.0072479248046875|cri_loss: 0.0039520263671875|unsuper_loss: 0.0
+average reward score: 5.57421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.46%) |Training time=0.48s (22.00%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2401|ppo_ep: 1|act_loss: 0.014312744140625|cri_loss: 0.00835418701171875|unsuper_loss: 0.0
+average reward score: 5.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.94%) |Training time=0.46s (21.49%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2402|ppo_ep: 1|act_loss: -0.0147247314453125|cri_loss: -0.0068511962890625|unsuper_loss: 0.0
+average reward score: 4.9453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.29%) |Training time=0.49s (22.20%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2403|ppo_ep: 1|act_loss: -0.0408935546875|cri_loss: -0.0194549560546875|unsuper_loss: 0.0
+average reward score: 5.0859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.99%) |Training time=0.47s (20.65%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.17 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2404|ppo_ep: 1|act_loss: -0.0189361572265625|cri_loss: -0.0086669921875|unsuper_loss: 0.0
+average reward score: 5.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.49%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2405|ppo_ep: 1|act_loss: -0.0101776123046875|cri_loss: -0.0037841796875|unsuper_loss: 0.0
+average reward score: 5.71875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.85%) |Training time=0.46s (21.57%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2406|ppo_ep: 1|act_loss: -0.02496337890625|cri_loss: -0.0120086669921875|unsuper_loss: 0.0
+average reward score: 5.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.52%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2407|ppo_ep: 1|act_loss: -0.023956298828125|cri_loss: -0.01080322265625|unsuper_loss: 0.0
+average reward score: 5.08203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.09%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2408|ppo_ep: 1|act_loss: -0.03350830078125|cri_loss: -0.0165557861328125|unsuper_loss: 0.0
+average reward score: 5.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.79%) |Training time=0.49s (21.01%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.44
+[2023-04-14 10:16:26,789] [INFO] [logging.py:96:log_dist] [Rank 0] step=2410, skipped=28, lr=[7.904576662417536e-06, 7.904576662417536e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:16:26,807] [INFO] [timer.py:199:stop] epoch=0/micro_step=2410/global_step=2410, RunningAvgSamplesPerSec=106.72267940839932, CurrSamplesPerSec=106.29027667458327, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:16:26,900] [INFO] [logging.py:96:log_dist] [Rank 0] step=2410, skipped=34, lr=[4.10007704796904e-06, 4.10007704796904e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2409|ppo_ep: 1|act_loss: 0.013824462890625|cri_loss: 0.00738525390625|unsuper_loss: 0.0
+average reward score: 5.3671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.52%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2410|ppo_ep: 1|act_loss: 0.06011962890625|cri_loss: 0.030853271484375|unsuper_loss: 0.0
+average reward score: 5.49609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.51%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2411|ppo_ep: 1|act_loss: 0.0352783203125|cri_loss: 0.0180816650390625|unsuper_loss: 0.0
+average reward score: 5.81640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.46s (21.42%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2412|ppo_ep: 1|act_loss: -0.01898193359375|cri_loss: -0.0083770751953125|unsuper_loss: 0.0
+average reward score: 4.51953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.83%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2413|ppo_ep: 1|act_loss: 0.0094451904296875|cri_loss: 0.005046844482421875|unsuper_loss: 0.0
+average reward score: 5.3515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.46s (21.62%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
+[2023-04-14 10:16:37,982] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 2414|ppo_ep: 1|act_loss: 0.016632080078125|cri_loss: 0.00992584228515625|unsuper_loss: 0.0
+average reward score: 5.01171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.06s |Gather latency=0.00s (0.00%) |Generate time=1.58s (76.94%) |Training time=0.39s (18.77%) |Others=0.09 (4.29%)|CurSamplesPerSec=15.53 |AvgSamplesPerSec=14.44
+[2023-04-14 10:16:40,127] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 2415|ppo_ep: 1|act_loss: 0.0082550048828125|cri_loss: 0.00455474853515625|unsuper_loss: 0.0
+average reward score: 5.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.01%) |Training time=0.47s (21.90%) |Others=0.09 (4.09%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2416|ppo_ep: 1|act_loss: -0.0082855224609375|cri_loss: -0.002712249755859375|unsuper_loss: 0.0
+average reward score: 4.8359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.90%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2417|ppo_ep: 1|act_loss: -0.03045654296875|cri_loss: -0.0147552490234375|unsuper_loss: 0.0
+average reward score: 5.015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.34%) |Training time=0.48s (22.12%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2418|ppo_ep: 1|act_loss: -0.05352783203125|cri_loss: -0.0257720947265625|unsuper_loss: 0.0
+average reward score: 5.91796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.07%) |Training time=0.46s (21.34%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44
+[2023-04-14 10:16:48,627] [INFO] [logging.py:96:log_dist] [Rank 0] step=2420, skipped=28, lr=[7.890253434007843e-06, 7.890253434007843e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:16:48,646] [INFO] [timer.py:199:stop] epoch=0/micro_step=2420/global_step=2420, RunningAvgSamplesPerSec=106.727923793645, CurrSamplesPerSec=107.89796226803395, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:16:48,739] [INFO] [logging.py:96:log_dist] [Rank 0] step=2420, skipped=36, lr=[4.09415319258652e-06, 4.09415319258652e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2419|ppo_ep: 1|act_loss: -0.021881103515625|cri_loss: -0.0089111328125|unsuper_loss: 0.0
+average reward score: 5.58203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.98%) |Training time=0.46s (21.45%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2420|ppo_ep: 1|act_loss: -0.023956298828125|cri_loss: -0.0104522705078125|unsuper_loss: 0.0
+average reward score: 5.02734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.59%) |Training time=0.47s (21.82%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2421|ppo_ep: 1|act_loss: -0.0184326171875|cri_loss: -0.00875091552734375|unsuper_loss: 0.0
+average reward score: 4.83203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.17%) |Training time=0.45s (21.23%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2422|ppo_ep: 1|act_loss: 0.0226287841796875|cri_loss: 0.0121002197265625|unsuper_loss: 0.0
+average reward score: 5.0
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.92%) |Training time=0.46s (21.49%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2423|ppo_ep: 1|act_loss: 0.03912353515625|cri_loss: 0.0204925537109375|unsuper_loss: 0.0
+average reward score: 4.7265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.76%) |Training time=0.46s (19.95%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.88 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2424|ppo_ep: 1|act_loss: -0.020233154296875|cri_loss: -0.009307861328125|unsuper_loss: 0.0
+average reward score: 5.34375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.88%) |Training time=0.46s (21.53%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2425|ppo_ep: 1|act_loss: -0.003818511962890625|cri_loss: -0.0005397796630859375|unsuper_loss: 0.0
+average reward score: 5.3671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.96%) |Training time=0.46s (21.45%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2426|ppo_ep: 1|act_loss: 0.0031452178955078125|cri_loss: 0.00262451171875|unsuper_loss: 0.0
+average reward score: 5.27734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.92%) |Training time=0.46s (21.49%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2427|ppo_ep: 1|act_loss: 0.01540374755859375|cri_loss: 0.0081024169921875|unsuper_loss: 0.0
+average reward score: 5.15234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.18%) |Training time=0.46s (21.24%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2428|ppo_ep: 1|act_loss: -0.0140380859375|cri_loss: -0.00598907470703125|unsuper_loss: 0.0
+average reward score: 5.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.22%) |Training time=0.45s (21.20%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44
+[2023-04-14 10:17:10,235] [INFO] [logging.py:96:log_dist] [Rank 0] step=2430, skipped=28, lr=[7.875884771147738e-06, 7.875884771147738e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:17:10,700] [INFO] [timer.py:199:stop] epoch=0/micro_step=2430/global_step=2430, RunningAvgSamplesPerSec=106.66533428350265, CurrSamplesPerSec=42.60237837151541, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:17:10,793] [INFO] [logging.py:96:log_dist] [Rank 0] step=2430, skipped=36, lr=[4.0867271136716215e-06, 4.0867271136716215e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2429|ppo_ep: 1|act_loss: 0.012298583984375|cri_loss: 0.00710296630859375|unsuper_loss: 0.0
+average reward score: 5.05859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.60s |Gather latency=0.00s (0.00%) |Generate time=1.59s (61.03%) |Training time=0.91s (35.20%) |Others=0.10 (3.77%)|CurSamplesPerSec=12.32 |AvgSamplesPerSec=14.44
+[2023-04-14 10:17:12,818] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 2430|ppo_ep: 1|act_loss: 0.060546875|cri_loss: 0.031036376953125|unsuper_loss: 0.0
+average reward score: 4.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.78%) |Training time=0.44s (20.58%) |Others=0.10 (4.64%)|CurSamplesPerSec=15.11 |AvgSamplesPerSec=14.44
+[2023-04-14 10:17:14,938] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 2431|ppo_ep: 1|act_loss: 0.031097412109375|cri_loss: 0.0160064697265625|unsuper_loss: 0.0
+average reward score: 5.7265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.71%) |Training time=0.42s (19.65%) |Others=0.10 (4.64%)|CurSamplesPerSec=15.09 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2432|ppo_ep: 1|act_loss: 0.023956298828125|cri_loss: 0.01248931884765625|unsuper_loss: 0.0
+average reward score: 5.125
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.06%) |Training time=0.50s (22.35%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.36 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2433|ppo_ep: 1|act_loss: -0.008575439453125|cri_loss: -0.0037174224853515625|unsuper_loss: 0.0
+average reward score: 5.54296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.32%) |Training time=0.45s (21.08%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2434|ppo_ep: 1|act_loss: -0.03350830078125|cri_loss: -0.01513671875|unsuper_loss: 0.0
+average reward score: 5.0859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.00%) |Training time=0.46s (21.43%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2435|ppo_ep: 1|act_loss: -0.048828125|cri_loss: -0.0222625732421875|unsuper_loss: 0.0
+average reward score: 4.953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.05%) |Training time=0.46s (21.37%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2436|ppo_ep: 1|act_loss: -0.03192138671875|cri_loss: -0.01523590087890625|unsuper_loss: 0.0
+average reward score: 5.0859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.00%) |Training time=0.46s (21.43%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2437|ppo_ep: 1|act_loss: -0.03497314453125|cri_loss: -0.0169219970703125|unsuper_loss: 0.0
+average reward score: 5.203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.81%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2438|ppo_ep: 1|act_loss: -0.00943756103515625|cri_loss: -0.004093170166015625|unsuper_loss: 0.0
+average reward score: 4.97265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.73%) |Training time=0.47s (20.02%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.78 |AvgSamplesPerSec=14.44
+[2023-04-14 10:17:32,354] [INFO] [logging.py:96:log_dist] [Rank 0] step=2440, skipped=30, lr=[7.86435727115419e-06, 7.86435727115419e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:17:32,372] [INFO] [timer.py:199:stop] epoch=0/micro_step=2440/global_step=2440, RunningAvgSamplesPerSec=106.67192282118397, CurrSamplesPerSec=106.79475676768126, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:17:32,464] [INFO] [logging.py:96:log_dist] [Rank 0] step=2440, skipped=36, lr=[4.079277515633127e-06, 4.079277515633127e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2439|ppo_ep: 1|act_loss: -0.02801513671875|cri_loss: -0.01326751708984375|unsuper_loss: 0.0
+average reward score: 4.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.53%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2440|ppo_ep: 1|act_loss: 0.0024929046630859375|cri_loss: 0.002971649169921875|unsuper_loss: 0.0
+average reward score: 5.52734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.90%) |Training time=0.46s (21.54%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2441|ppo_ep: 1|act_loss: 0.0130157470703125|cri_loss: 0.00727081298828125|unsuper_loss: 0.0
+average reward score: 5.3046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.94%) |Training time=0.46s (21.48%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2442|ppo_ep: 1|act_loss: -0.056610107421875|cri_loss: -0.0220947265625|unsuper_loss: 0.0
+average reward score: 5.25
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.79%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2443|ppo_ep: 1|act_loss: 0.002925872802734375|cri_loss: 0.0020599365234375|unsuper_loss: 0.0
+average reward score: 5.28515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.50%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2444|ppo_ep: 1|act_loss: -0.0277099609375|cri_loss: -0.0124053955078125|unsuper_loss: 0.0
+average reward score: 5.3828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.63%) |Training time=0.47s (21.80%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2445|ppo_ep: 1|act_loss: -0.01244354248046875|cri_loss: -0.00579071044921875|unsuper_loss: 0.0
+average reward score: 5.28125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.83%) |Training time=0.46s (21.58%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2446|ppo_ep: 1|act_loss: 0.003345489501953125|cri_loss: 0.0025730133056640625|unsuper_loss: 0.0
+average reward score: 5.1796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.85%) |Training time=0.46s (21.43%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2447|ppo_ep: 1|act_loss: 0.0196533203125|cri_loss: 0.010162353515625|unsuper_loss: 0.0
+average reward score: 5.50390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.90%) |Training time=0.49s (22.59%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2448|ppo_ep: 1|act_loss: 0.00513458251953125|cri_loss: 0.003154754638671875|unsuper_loss: 0.0
+average reward score: 5.7578125
+-------------------------------------------------------------------------------------
+|E2E latency=3.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (50.62%) |Training time=0.49s (15.71%) |Others=1.06 (33.67%)|CurSamplesPerSec=10.16 |AvgSamplesPerSec=14.44
+[2023-04-14 10:17:54,826] [INFO] [logging.py:96:log_dist] [Rank 0] step=2450, skipped=30, lr=[7.849907363439066e-06, 7.849907363439066e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:17:54,844] [INFO] [timer.py:199:stop] epoch=0/micro_step=2450/global_step=2450, RunningAvgSamplesPerSec=106.67218273543259, CurrSamplesPerSec=143.05433748050848, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:17:54,937] [INFO] [logging.py:96:log_dist] [Rank 0] step=2450, skipped=36, lr=[4.071804508892053e-06, 4.071804508892053e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2449|ppo_ep: 1|act_loss: 0.0030059814453125|cri_loss: 0.0019683837890625|unsuper_loss: 0.0
+average reward score: 5.13671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.09s |Gather latency=0.00s (0.00%) |Generate time=1.61s (76.84%) |Training time=0.39s (18.46%) |Others=0.10 (4.70%)|CurSamplesPerSec=15.28 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2450|ppo_ep: 1|act_loss: 0.0238037109375|cri_loss: 0.0124053955078125|unsuper_loss: 0.0
+average reward score: 5.75
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.47%) |Training time=0.48s (22.01%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2451|ppo_ep: 1|act_loss: 0.023345947265625|cri_loss: 0.01213836669921875|unsuper_loss: 0.0
+average reward score: 4.91015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.95%) |Training time=0.47s (21.52%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2452|ppo_ep: 1|act_loss: -0.028411865234375|cri_loss: -0.0133209228515625|unsuper_loss: 0.0
+average reward score: 5.86328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.61s (69.67%) |Training time=0.60s (26.07%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.87 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2453|ppo_ep: 1|act_loss: -0.03826904296875|cri_loss: -0.01776123046875|unsuper_loss: 0.0
+average reward score: 5.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.20%) |Training time=0.46s (21.27%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2454|ppo_ep: 1|act_loss: -0.0279541015625|cri_loss: -0.01248931884765625|unsuper_loss: 0.0
+average reward score: 5.98046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.85%) |Training time=0.47s (21.62%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2455|ppo_ep: 1|act_loss: -0.070068359375|cri_loss: -0.033843994140625|unsuper_loss: 0.0
+average reward score: 5.5
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.92%) |Training time=0.47s (21.53%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2456|ppo_ep: 1|act_loss: -0.0152740478515625|cri_loss: -0.00713348388671875|unsuper_loss: 0.0
+average reward score: 5.61328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.47s (21.47%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2457|ppo_ep: 1|act_loss: 0.03961181640625|cri_loss: 0.0210113525390625|unsuper_loss: 0.0
+average reward score: 5.94140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.77%) |Training time=0.47s (21.69%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2458|ppo_ep: 1|act_loss: 0.0264739990234375|cri_loss: 0.013702392578125|unsuper_loss: 0.0
+average reward score: 5.20703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.15%) |Training time=0.46s (21.30%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.44
+[2023-04-14 10:18:16,652] [INFO] [logging.py:96:log_dist] [Rank 0] step=2460, skipped=30, lr=[7.83541261929962e-06, 7.83541261929962e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:18:16,671] [INFO] [timer.py:199:stop] epoch=0/micro_step=2460/global_step=2460, RunningAvgSamplesPerSec=106.65022498334142, CurrSamplesPerSec=106.88601260323848, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:18:16,763] [INFO] [logging.py:96:log_dist] [Rank 0] step=2460, skipped=36, lr=[4.064308204216384e-06, 4.064308204216384e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2459|ppo_ep: 1|act_loss: 0.04412841796875|cri_loss: 0.0247955322265625|unsuper_loss: 0.0
+average reward score: 4.89453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.19%) |Training time=0.46s (21.30%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2460|ppo_ep: 1|act_loss: 0.00756072998046875|cri_loss: 0.004199981689453125|unsuper_loss: 0.0
+average reward score: 5.3046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.90%) |Training time=0.47s (21.58%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2461|ppo_ep: 1|act_loss: -0.016754150390625|cri_loss: -0.00782012939453125|unsuper_loss: 0.0
+average reward score: 5.046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.36%) |Training time=0.50s (22.24%) |Others=0.10 (4.40%)|CurSamplesPerSec=14.30 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2462|ppo_ep: 1|act_loss: -0.0281982421875|cri_loss: -0.01314544677734375|unsuper_loss: 0.0
+average reward score: 5.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.72s (73.11%) |Training time=0.53s (22.69%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.64 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2463|ppo_ep: 1|act_loss: 0.015106201171875|cri_loss: 0.00778961181640625|unsuper_loss: 0.0
+average reward score: 6.09765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.79%) |Training time=0.45s (20.65%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2464|ppo_ep: 1|act_loss: 0.005290985107421875|cri_loss: 0.003032684326171875|unsuper_loss: 0.0
+average reward score: 5.55859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.44%) |Training time=0.45s (21.01%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2465|ppo_ep: 1|act_loss: 0.03558349609375|cri_loss: 0.018585205078125|unsuper_loss: 0.0
+average reward score: 5.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.61%) |Training time=0.45s (20.82%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2466|ppo_ep: 1|act_loss: 0.05950927734375|cri_loss: 0.03204345703125|unsuper_loss: 0.0
+average reward score: 5.8828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.17%) |Training time=0.47s (21.34%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2467|ppo_ep: 1|act_loss: -0.024322509765625|cri_loss: -0.011688232421875|unsuper_loss: 0.0
+average reward score: 5.859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.83s |Gather latency=0.00s (0.00%) |Generate time=1.77s (62.55%) |Training time=0.47s (16.55%) |Others=0.59 (20.91%)|CurSamplesPerSec=11.32 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2468|ppo_ep: 1|act_loss: -0.0190582275390625|cri_loss: -0.0084686279296875|unsuper_loss: 0.0
+average reward score: 5.515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.32%) |Training time=0.46s (21.10%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.44
+[2023-04-14 10:18:39,237] [INFO] [logging.py:96:log_dist] [Rank 0] step=2470, skipped=30, lr=[7.820873253582933e-06, 7.820873253582933e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:18:39,255] [INFO] [timer.py:199:stop] epoch=0/micro_step=2470/global_step=2470, RunningAvgSamplesPerSec=106.64010750425537, CurrSamplesPerSec=108.77555969060602, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:18:39,348] [INFO] [logging.py:96:log_dist] [Rank 0] step=2470, skipped=36, lr=[4.0567887127194405e-06, 4.0567887127194405e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2469|ppo_ep: 1|act_loss: -0.0386962890625|cri_loss: -0.01812744140625|unsuper_loss: 0.0
+average reward score: 5.453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.33%) |Training time=0.46s (21.14%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2470|ppo_ep: 1|act_loss: -0.01959228515625|cri_loss: -0.00817108154296875|unsuper_loss: 0.0
+average reward score: 5.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.08%) |Training time=0.46s (21.37%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2471|ppo_ep: 1|act_loss: -0.060882568359375|cri_loss: -0.029144287109375|unsuper_loss: 0.0
+average reward score: 5.24609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.31%) |Training time=0.46s (21.13%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2472|ppo_ep: 1|act_loss: -0.0433349609375|cri_loss: -0.02105712890625|unsuper_loss: 0.0
+average reward score: 5.73046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.31%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2473|ppo_ep: 1|act_loss: 0.016021728515625|cri_loss: 0.00916290283203125|unsuper_loss: 0.0
+average reward score: 5.015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.57%) |Training time=0.45s (20.86%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2474|ppo_ep: 1|act_loss: 0.01537322998046875|cri_loss: 0.008544921875|unsuper_loss: 0.0
+average reward score: 5.8203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.46s (21.28%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2475|ppo_ep: 1|act_loss: 0.0110931396484375|cri_loss: 0.00629425048828125|unsuper_loss: 0.0
+average reward score: 5.01171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.29%) |Training time=0.46s (21.16%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2476|ppo_ep: 1|act_loss: 0.033050537109375|cri_loss: 0.01837158203125|unsuper_loss: 0.0
+average reward score: 5.48046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.10%) |Training time=0.46s (21.35%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2477|ppo_ep: 1|act_loss: 0.0188446044921875|cri_loss: 0.00989532470703125|unsuper_loss: 0.0
+average reward score: 5.63671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.35%) |Training time=0.46s (21.12%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2478|ppo_ep: 1|act_loss: 0.025146484375|cri_loss: 0.0129547119140625|unsuper_loss: 0.0
+average reward score: 5.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.99%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44
+[2023-04-14 10:19:00,844] [INFO] [logging.py:96:log_dist] [Rank 0] step=2480, skipped=30, lr=[7.806289481797477e-06, 7.806289481797477e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:19:00,863] [INFO] [timer.py:199:stop] epoch=0/micro_step=2480/global_step=2480, RunningAvgSamplesPerSec=106.6468968374762, CurrSamplesPerSec=107.46410836588056, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:19:00,955] [INFO] [logging.py:96:log_dist] [Rank 0] step=2480, skipped=36, lr=[4.049246145858227e-06, 4.049246145858227e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2479|ppo_ep: 1|act_loss: 0.01556396484375|cri_loss: 0.008331298828125|unsuper_loss: 0.0
+average reward score: 5.2734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.16%) |Training time=0.46s (21.31%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2480|ppo_ep: 1|act_loss: -0.0232086181640625|cri_loss: -0.01064300537109375|unsuper_loss: 0.0
+average reward score: 5.2734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.23%) |Training time=0.46s (21.21%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2481|ppo_ep: 1|act_loss: -0.02044677734375|cri_loss: -0.01001739501953125|unsuper_loss: 0.0
+average reward score: 5.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.16%) |Training time=0.46s (21.29%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2482|ppo_ep: 1|act_loss: -0.02606201171875|cri_loss: -0.01244354248046875|unsuper_loss: 0.0
+average reward score: 5.43359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.26%) |Training time=0.46s (21.18%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2483|ppo_ep: 1|act_loss: -0.022430419921875|cri_loss: -0.01071929931640625|unsuper_loss: 0.0
+average reward score: 5.38671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.22%) |Training time=0.45s (19.56%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2484|ppo_ep: 1|act_loss: -0.041107177734375|cri_loss: -0.0194549560546875|unsuper_loss: 0.0
+average reward score: 6.00390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.46s (21.17%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2485|ppo_ep: 1|act_loss: -0.0049285888671875|cri_loss: -0.0019235610961914062|unsuper_loss: 0.0
+average reward score: 5.7421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.98%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2486|ppo_ep: 1|act_loss: 0.01280975341796875|cri_loss: 0.006969451904296875|unsuper_loss: 0.0
+average reward score: 5.6484375
+-------------------------------------------------------------------------------------
+|E2E latency=3.28s |Gather latency=0.00s (0.00%) |Generate time=1.61s (49.07%) |Training time=0.45s (13.68%) |Others=1.22 (37.25%)|CurSamplesPerSec=9.76 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2487|ppo_ep: 1|act_loss: -0.00832366943359375|cri_loss: -0.0038585662841796875|unsuper_loss: 0.0
+average reward score: 5.8203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.24%) |Training time=0.46s (21.21%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2488|ppo_ep: 1|act_loss: 0.04339599609375|cri_loss: 0.0228118896484375|unsuper_loss: 0.0
+average reward score: 5.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.40%) |Training time=0.45s (21.04%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.44
+[2023-04-14 10:19:23,722] [INFO] [logging.py:96:log_dist] [Rank 0] step=2490, skipped=30, lr=[7.791661520109931e-06, 7.791661520109931e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:19:23,741] [INFO] [timer.py:199:stop] epoch=0/micro_step=2490/global_step=2490, RunningAvgSamplesPerSec=106.6586371426018, CurrSamplesPerSec=114.51340619587567, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:19:23,833] [INFO] [logging.py:96:log_dist] [Rank 0] step=2490, skipped=36, lr=[4.041680615431779e-06, 4.041680615431779e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2489|ppo_ep: 1|act_loss: -0.00444793701171875|cri_loss: -0.0018949508666992188|unsuper_loss: 0.0
+average reward score: 5.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.01%) |Training time=0.44s (20.45%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2490|ppo_ep: 1|act_loss: -0.036956787109375|cri_loss: -0.0178070068359375|unsuper_loss: 0.0
+average reward score: 5.8828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.48%) |Training time=0.46s (20.95%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2491|ppo_ep: 1|act_loss: -0.022216796875|cri_loss: -0.0098419189453125|unsuper_loss: 0.0
+average reward score: 5.5
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.59%) |Training time=0.45s (20.84%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2492|ppo_ep: 1|act_loss: -0.011993408203125|cri_loss: -0.00548553466796875|unsuper_loss: 0.0
+average reward score: 5.8125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.99%) |Training time=0.44s (20.43%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2493|ppo_ep: 1|act_loss: 0.0078582763671875|cri_loss: 0.0042724609375|unsuper_loss: 0.0
+average reward score: 5.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.56%) |Training time=0.46s (20.92%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2494|ppo_ep: 1|act_loss: 0.00855255126953125|cri_loss: 0.00464630126953125|unsuper_loss: 0.0
+average reward score: 5.828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.93%) |Training time=0.47s (21.55%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2495|ppo_ep: 1|act_loss: -0.022430419921875|cri_loss: -0.0108795166015625|unsuper_loss: 0.0
+average reward score: 5.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.52%) |Training time=0.45s (20.92%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2496|ppo_ep: 1|act_loss: 0.018402099609375|cri_loss: 0.0096435546875|unsuper_loss: 0.0
+average reward score: 5.55859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.56%) |Training time=0.45s (20.88%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2497|ppo_ep: 1|act_loss: -0.00981903076171875|cri_loss: -0.004726409912109375|unsuper_loss: 0.0
+average reward score: 5.44921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.66s (73.36%) |Training time=0.50s (22.27%) |Others=0.10 (4.37%)|CurSamplesPerSec=14.16 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2498|ppo_ep: 1|act_loss: 0.0111083984375|cri_loss: 0.00756072998046875|unsuper_loss: 0.0
+average reward score: 5.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.53%) |Training time=0.45s (20.90%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
+[2023-04-14 10:19:45,473] [INFO] [logging.py:96:log_dist] [Rank 0] step=2500, skipped=30, lr=[7.776989585341974e-06, 7.776989585341974e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:19:45,491] [INFO] [timer.py:199:stop] epoch=0/micro_step=2500/global_step=2500, RunningAvgSamplesPerSec=106.674029550773, CurrSamplesPerSec=108.61095709058188, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:19:45,584] [INFO] [logging.py:96:log_dist] [Rank 0] step=2500, skipped=36, lr=[4.034092233579507e-06, 4.034092233579507e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2499|ppo_ep: 1|act_loss: 0.00739288330078125|cri_loss: 0.003993988037109375|unsuper_loss: 0.0
+average reward score: 5.921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.28%) |Training time=0.46s (21.19%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2500|ppo_ep: 1|act_loss: 0.02899169921875|cri_loss: 0.015228271484375|unsuper_loss: 0.0
+average reward score: 5.62109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.29%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2501|ppo_ep: 1|act_loss: 0.0305938720703125|cri_loss: 0.0160675048828125|unsuper_loss: 0.0
+average reward score: 5.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.08%) |Training time=0.46s (21.37%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2502|ppo_ep: 1|act_loss: -0.043670654296875|cri_loss: -0.02056884765625|unsuper_loss: 0.0
+average reward score: 5.40234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.29%) |Training time=0.46s (21.15%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2503|ppo_ep: 1|act_loss: -0.059326171875|cri_loss: -0.02783203125|unsuper_loss: 0.0
+average reward score: 6.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.34%) |Training time=0.45s (21.09%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2504|ppo_ep: 1|act_loss: -0.0218048095703125|cri_loss: -0.01064300537109375|unsuper_loss: 0.0
+average reward score: 5.8125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.44%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2505|ppo_ep: 1|act_loss: 0.0115966796875|cri_loss: 0.006610870361328125|unsuper_loss: 0.0
+average reward score: 5.53515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.34%) |Training time=0.46s (21.11%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2506|ppo_ep: 1|act_loss: 0.0372314453125|cri_loss: 0.019439697265625|unsuper_loss: 0.0
+average reward score: 4.828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.94%) |Training time=0.47s (21.52%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2507|ppo_ep: 1|act_loss: 0.019927978515625|cri_loss: 0.01120758056640625|unsuper_loss: 0.0
+average reward score: 5.12109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.87%) |Training time=0.44s (20.55%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2508|ppo_ep: 1|act_loss: -0.009979248046875|cri_loss: -0.00489044189453125|unsuper_loss: 0.0
+average reward score: 5.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.14%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45
+[2023-04-14 10:20:07,063] [INFO] [logging.py:96:log_dist] [Rank 0] step=2510, skipped=30, lr=[7.762273894967078e-06, 7.762273894967078e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:20:07,081] [INFO] [timer.py:199:stop] epoch=0/micro_step=2510/global_step=2510, RunningAvgSamplesPerSec=106.67546349044484, CurrSamplesPerSec=101.66207505639152, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:20:07,174] [INFO] [logging.py:96:log_dist] [Rank 0] step=2510, skipped=36, lr=[4.0264811127795375e-06, 4.0264811127795375e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2509|ppo_ep: 1|act_loss: 0.059600830078125|cri_loss: 0.034027099609375|unsuper_loss: 0.0
+average reward score: 4.93359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.34%) |Training time=0.48s (22.12%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2510|ppo_ep: 1|act_loss: -0.0127716064453125|cri_loss: -0.00566864013671875|unsuper_loss: 0.0
+average reward score: 5.8125
+-------------------------------------------------------------------------------------
+|E2E latency=2.90s |Gather latency=0.00s (0.00%) |Generate time=1.58s (54.53%) |Training time=0.47s (16.30%) |Others=0.85 (29.17%)|CurSamplesPerSec=11.04 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2511|ppo_ep: 1|act_loss: 0.017974853515625|cri_loss: 0.0095367431640625|unsuper_loss: 0.0
+average reward score: 4.94140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.95%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2512|ppo_ep: 1|act_loss: -0.02691650390625|cri_loss: -0.01318359375|unsuper_loss: 0.0
+average reward score: 5.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.82%) |Training time=0.48s (21.59%) |Others=0.15 (6.59%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2513|ppo_ep: 1|act_loss: 0.04351806640625|cri_loss: 0.025054931640625|unsuper_loss: 0.0
+average reward score: 5.44921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.66%) |Training time=0.47s (20.99%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.17 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2514|ppo_ep: 1|act_loss: 0.0301361083984375|cri_loss: 0.0162506103515625|unsuper_loss: 0.0
+average reward score: 5.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.81%) |Training time=0.49s (22.68%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2515|ppo_ep: 1|act_loss: 0.0187225341796875|cri_loss: 0.0095977783203125|unsuper_loss: 0.0
+average reward score: 5.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.79%) |Training time=0.47s (21.65%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.44
+[2023-04-14 10:20:23,173] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 2516|ppo_ep: 1|act_loss: -0.0048065185546875|cri_loss: -0.0020961761474609375|unsuper_loss: 0.0
+average reward score: 5.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.86%) |Training time=0.47s (22.07%) |Others=0.09 (4.08%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44
+[2023-04-14 10:20:25,315] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 2517|ppo_ep: 1|act_loss: -0.00971221923828125|cri_loss: -0.0034580230712890625|unsuper_loss: 0.0
+average reward score: 5.328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.93%) |Training time=0.47s (21.99%) |Others=0.09 (4.08%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2518|ppo_ep: 1|act_loss: 0.0168914794921875|cri_loss: 0.01013946533203125|unsuper_loss: 0.0
+average reward score: 5.2734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.92%) |Training time=0.47s (21.53%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.44
+[2023-04-14 10:20:29,556] [INFO] [logging.py:96:log_dist] [Rank 0] step=2520, skipped=30, lr=[7.747514667107269e-06, 7.747514667107269e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:20:29,574] [INFO] [timer.py:199:stop] epoch=0/micro_step=2520/global_step=2520, RunningAvgSamplesPerSec=106.65882156515856, CurrSamplesPerSec=102.7891293792571, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:20:29,667] [INFO] [logging.py:96:log_dist] [Rank 0] step=2520, skipped=38, lr=[4.020375919897733e-06, 4.020375919897733e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2519|ppo_ep: 1|act_loss: 0.005268096923828125|cri_loss: 0.003143310546875|unsuper_loss: 0.0
+average reward score: 5.7578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.89%) |Training time=0.47s (21.65%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2520|ppo_ep: 1|act_loss: 0.034912109375|cri_loss: 0.0180206298828125|unsuper_loss: 0.0
+average reward score: 5.859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.09%) |Training time=0.47s (20.58%) |Others=0.10 (4.33%)|CurSamplesPerSec=14.10 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2521|ppo_ep: 1|act_loss: 0.02166748046875|cri_loss: 0.0110626220703125|unsuper_loss: 0.0
+average reward score: 5.53515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.97%) |Training time=0.47s (21.50%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2522|ppo_ep: 1|act_loss: 0.0024871826171875|cri_loss: 0.0013885498046875|unsuper_loss: 0.0
+average reward score: 5.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.63%) |Training time=0.47s (21.84%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2523|ppo_ep: 1|act_loss: -0.024139404296875|cri_loss: -0.01153564453125|unsuper_loss: 0.0
+average reward score: 5.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.85%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2524|ppo_ep: 1|act_loss: -0.0002079010009765625|cri_loss: 0.0004534721374511719|unsuper_loss: 0.0
+average reward score: 5.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.47s (21.96%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2525|ppo_ep: 1|act_loss: -0.00968170166015625|cri_loss: -0.0042877197265625|unsuper_loss: 0.0
+average reward score: 5.015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.48s (22.08%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2526|ppo_ep: 1|act_loss: -0.06610107421875|cri_loss: -0.03070068359375|unsuper_loss: 0.0
+average reward score: 5.44140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.31%) |Training time=0.48s (22.12%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2527|ppo_ep: 1|act_loss: -0.04644775390625|cri_loss: -0.0222320556640625|unsuper_loss: 0.0
+average reward score: 5.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.56%) |Training time=0.47s (21.46%) |Others=0.13 (5.99%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2528|ppo_ep: 1|act_loss: -0.002399444580078125|cri_loss: -0.00034809112548828125|unsuper_loss: 0.0
+average reward score: 5.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.71s (74.85%) |Training time=0.47s (20.52%) |Others=0.11 (4.63%)|CurSamplesPerSec=14.00 |AvgSamplesPerSec=14.45
+[2023-04-14 10:20:51,418] [INFO] [logging.py:96:log_dist] [Rank 0] step=2530, skipped=30, lr=[7.732712120529913e-06, 7.732712120529913e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:20:51,436] [INFO] [timer.py:199:stop] epoch=0/micro_step=2530/global_step=2530, RunningAvgSamplesPerSec=106.64682603841231, CurrSamplesPerSec=102.19486489141136, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:20:51,529] [INFO] [logging.py:96:log_dist] [Rank 0] step=2530, skipped=38, lr=[4.012724153511568e-06, 4.012724153511568e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2529|ppo_ep: 1|act_loss: -0.02001953125|cri_loss: -0.00980377197265625|unsuper_loss: 0.0
+average reward score: 6.09765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.48s (22.08%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2530|ppo_ep: 1|act_loss: 0.0421142578125|cri_loss: 0.022308349609375|unsuper_loss: 0.0
+average reward score: 5.67578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.66%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2531|ppo_ep: 1|act_loss: 0.024688720703125|cri_loss: 0.0128326416015625|unsuper_loss: 0.0
+average reward score: 5.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.82%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
+[2023-04-14 10:20:57,867] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 2532|ppo_ep: 1|act_loss: 0.07666015625|cri_loss: 0.041778564453125|unsuper_loss: 0.0
+average reward score: 5.01953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.67%) |Training time=0.44s (20.73%) |Others=0.10 (4.60%)|CurSamplesPerSec=15.09 |AvgSamplesPerSec=14.45
+[2023-04-14 10:20:59,996] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 2533|ppo_ep: 1|act_loss: 0.046142578125|cri_loss: 0.0245513916015625|unsuper_loss: 0.0
+average reward score: 5.87890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.53%) |Training time=0.44s (20.87%) |Others=0.10 (4.60%)|CurSamplesPerSec=15.02 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2534|ppo_ep: 1|act_loss: -7.677078247070312e-05|cri_loss: 0.0001392364501953125|unsuper_loss: 0.0
+average reward score: 5.8984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.60s |Gather latency=0.00s (0.00%) |Generate time=1.58s (60.80%) |Training time=0.48s (18.31%) |Others=0.54 (20.90%)|CurSamplesPerSec=12.29 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2535|ppo_ep: 1|act_loss: 0.00254058837890625|cri_loss: 0.0013990402221679688|unsuper_loss: 0.0
+average reward score: 5.76171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.86%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2536|ppo_ep: 1|act_loss: -0.01519012451171875|cri_loss: -0.00730133056640625|unsuper_loss: 0.0
+average reward score: 5.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.61%) |Training time=0.45s (20.84%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2537|ppo_ep: 1|act_loss: -0.02679443359375|cri_loss: -0.01297760009765625|unsuper_loss: 0.0
+average reward score: 5.33984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.51%) |Training time=0.45s (20.89%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2538|ppo_ep: 1|act_loss: -0.04571533203125|cri_loss: -0.022308349609375|unsuper_loss: 0.0
+average reward score: 5.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.64%) |Training time=0.47s (21.80%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
+[2023-04-14 10:21:13,351] [INFO] [logging.py:96:log_dist] [Rank 0] step=2540, skipped=32, lr=[7.720839041213051e-06, 7.720839041213051e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:21:13,369] [INFO] [timer.py:199:stop] epoch=0/micro_step=2540/global_step=2540, RunningAvgSamplesPerSec=106.64877381183305, CurrSamplesPerSec=102.9472844528714, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:21:13,462] [INFO] [logging.py:96:log_dist] [Rank 0] step=2540, skipped=38, lr=[4.0050499649041985e-06, 4.0050499649041985e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2539|ppo_ep: 1|act_loss: -0.027313232421875|cri_loss: -0.01280975341796875|unsuper_loss: 0.0
+average reward score: 5.37109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.94%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2540|ppo_ep: 1|act_loss: 0.001941680908203125|cri_loss: 0.0014667510986328125|unsuper_loss: 0.0
+average reward score: 5.3671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.54%) |Training time=0.47s (21.90%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2541|ppo_ep: 1|act_loss: 0.00600433349609375|cri_loss: 0.003421783447265625|unsuper_loss: 0.0
+average reward score: 5.39453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.44%) |Training time=0.47s (22.01%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2542|ppo_ep: 1|act_loss: 0.00748443603515625|cri_loss: 0.004150390625|unsuper_loss: 0.0
+average reward score: 5.57421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.58%) |Training time=0.47s (20.19%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.80 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2543|ppo_ep: 1|act_loss: 0.025390625|cri_loss: 0.014404296875|unsuper_loss: 0.0
+average reward score: 5.6015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.86%) |Training time=0.46s (21.58%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2544|ppo_ep: 1|act_loss: 0.011566162109375|cri_loss: 0.00630950927734375|unsuper_loss: 0.0
+average reward score: 5.546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.73%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2545|ppo_ep: 1|act_loss: 0.0166015625|cri_loss: 0.0087432861328125|unsuper_loss: 0.0
+average reward score: 4.2734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.50%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2546|ppo_ep: 1|act_loss: 0.0025577545166015625|cri_loss: 0.001453399658203125|unsuper_loss: 0.0
+average reward score: 5.66015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.74%) |Training time=0.47s (21.70%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2547|ppo_ep: 1|act_loss: -0.005771636962890625|cri_loss: -0.002132415771484375|unsuper_loss: 0.0
+average reward score: 5.41015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.67%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2548|ppo_ep: 1|act_loss: -0.0005550384521484375|cri_loss: 0.0006742477416992188|unsuper_loss: 0.0
+average reward score: 5.48046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.15%) |Training time=0.46s (21.34%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.45
+[2023-04-14 10:21:35,117] [INFO] [logging.py:96:log_dist] [Rank 0] step=2550, skipped=32, lr=[7.705959074285325e-06, 7.705959074285325e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:21:35,135] [INFO] [timer.py:199:stop] epoch=0/micro_step=2550/global_step=2550, RunningAvgSamplesPerSec=106.64088588508935, CurrSamplesPerSec=103.17353387250142, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:21:35,228] [INFO] [logging.py:96:log_dist] [Rank 0] step=2550, skipped=38, lr=[3.997353467825614e-06, 3.997353467825614e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2549|ppo_ep: 1|act_loss: -0.033782958984375|cri_loss: -0.016387939453125|unsuper_loss: 0.0
+average reward score: 5.765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.02%) |Training time=0.47s (21.52%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2550|ppo_ep: 1|act_loss: -0.016998291015625|cri_loss: -0.0077667236328125|unsuper_loss: 0.0
+average reward score: 5.6484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.46s (21.61%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2551|ppo_ep: 1|act_loss: -0.00411224365234375|cri_loss: -0.0017375946044921875|unsuper_loss: 0.0
+average reward score: 5.66015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.13%) |Training time=0.46s (21.29%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2552|ppo_ep: 1|act_loss: 0.027069091796875|cri_loss: 0.01416778564453125|unsuper_loss: 0.0
+average reward score: 5.40234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.59%) |Training time=0.45s (20.85%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2553|ppo_ep: 1|act_loss: 0.00783538818359375|cri_loss: 0.004398345947265625|unsuper_loss: 0.0
+average reward score: 5.63671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.28%) |Training time=0.46s (21.15%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2554|ppo_ep: 1|act_loss: -0.0142974853515625|cri_loss: -0.00685882568359375|unsuper_loss: 0.0
+average reward score: 5.29296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.08%) |Training time=0.46s (21.34%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2555|ppo_ep: 1|act_loss: -0.004123687744140625|cri_loss: -0.001667022705078125|unsuper_loss: 0.0
+average reward score: 5.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.69%) |Training time=0.45s (20.73%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2556|ppo_ep: 1|act_loss: -0.0052032470703125|cri_loss: -0.0022716522216796875|unsuper_loss: 0.0
+average reward score: 5.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.17%) |Training time=0.46s (21.26%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2557|ppo_ep: 1|act_loss: -0.01218414306640625|cri_loss: -0.0057220458984375|unsuper_loss: 0.0
+average reward score: 5.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.84s |Gather latency=0.00s (0.00%) |Generate time=1.77s (62.32%) |Training time=0.45s (16.02%) |Others=0.61 (21.65%)|CurSamplesPerSec=11.27 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2558|ppo_ep: 1|act_loss: 0.0281524658203125|cri_loss: 0.0150299072265625|unsuper_loss: 0.0
+average reward score: 5.12890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.94%) |Training time=0.47s (21.51%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.45
+[2023-04-14 10:21:57,356] [INFO] [logging.py:96:log_dist] [Rank 0] step=2560, skipped=32, lr=[7.691036404594183e-06, 7.691036404594183e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:21:57,374] [INFO] [timer.py:199:stop] epoch=0/micro_step=2560/global_step=2560, RunningAvgSamplesPerSec=106.64851899366893, CurrSamplesPerSec=108.4322206360786, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:21:57,467] [INFO] [logging.py:96:log_dist] [Rank 0] step=2560, skipped=38, lr=[3.989634776356468e-06, 3.989634776356468e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2559|ppo_ep: 1|act_loss: -0.00821685791015625|cri_loss: -0.0036067962646484375|unsuper_loss: 0.0
+average reward score: 4.890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.26%) |Training time=0.46s (21.21%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2560|ppo_ep: 1|act_loss: -0.01593017578125|cri_loss: -0.0075836181640625|unsuper_loss: 0.0
+average reward score: 5.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.24%) |Training time=0.46s (21.20%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2561|ppo_ep: 1|act_loss: 0.012786865234375|cri_loss: 0.006656646728515625|unsuper_loss: 0.0
+average reward score: 5.96875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.29%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2562|ppo_ep: 1|act_loss: 0.0024127960205078125|cri_loss: 0.0014629364013671875|unsuper_loss: 0.0
+average reward score: 5.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.30%) |Training time=0.45s (20.98%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2563|ppo_ep: 1|act_loss: -0.028656005859375|cri_loss: -0.0138397216796875|unsuper_loss: 0.0
+average reward score: 4.98046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.35%) |Training time=0.45s (21.08%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2564|ppo_ep: 1|act_loss: 0.01380157470703125|cri_loss: 0.007137298583984375|unsuper_loss: 0.0
+average reward score: 5.71875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.32%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2565|ppo_ep: 1|act_loss: 0.05413818359375|cri_loss: 0.0276336669921875|unsuper_loss: 0.0
+average reward score: 5.00390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.49%) |Training time=0.45s (20.94%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2566|ppo_ep: 1|act_loss: 0.01453399658203125|cri_loss: 0.009857177734375|unsuper_loss: 0.0
+average reward score: 6.71875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.51%) |Training time=0.45s (20.92%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2567|ppo_ep: 1|act_loss: -0.01490020751953125|cri_loss: -0.0070953369140625|unsuper_loss: 0.0
+average reward score: 5.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.22%) |Training time=0.43s (20.18%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2568|ppo_ep: 1|act_loss: -0.03192138671875|cri_loss: -0.015533447265625|unsuper_loss: 0.0
+average reward score: 5.5
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.41%) |Training time=0.45s (21.03%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
+[2023-04-14 10:22:18,902] [INFO] [logging.py:96:log_dist] [Rank 0] step=2570, skipped=32, lr=[7.676071253329589e-06, 7.676071253329589e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:22:18,921] [INFO] [timer.py:199:stop] epoch=0/micro_step=2570/global_step=2570, RunningAvgSamplesPerSec=106.66218526568217, CurrSamplesPerSec=109.55198828552562, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:22:19,013] [INFO] [logging.py:96:log_dist] [Rank 0] step=2570, skipped=38, lr=[3.981894004906388e-06, 3.981894004906388e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2569|ppo_ep: 1|act_loss: -0.0310821533203125|cri_loss: -0.0146331787109375|unsuper_loss: 0.0
+average reward score: 5.37890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.33%) |Training time=0.45s (21.12%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2570|ppo_ep: 1|act_loss: 0.0008916854858398438|cri_loss: 0.0008840560913085938|unsuper_loss: 0.0
+average reward score: 5.7109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.32%) |Training time=0.45s (21.10%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2571|ppo_ep: 1|act_loss: 0.0136871337890625|cri_loss: 0.007045745849609375|unsuper_loss: 0.0
+average reward score: 5.78515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.47%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2572|ppo_ep: 1|act_loss: 0.0029239654541015625|cri_loss: 0.0018987655639648438|unsuper_loss: 0.0
+average reward score: 5.28125
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.68%) |Training time=0.47s (20.12%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2573|ppo_ep: 1|act_loss: 0.03326416015625|cri_loss: 0.0170135498046875|unsuper_loss: 0.0
+average reward score: 5.41796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.32%) |Training time=0.45s (21.11%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2574|ppo_ep: 1|act_loss: 0.0264892578125|cri_loss: 0.0137939453125|unsuper_loss: 0.0
+average reward score: 5.44921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.24%) |Training time=0.45s (21.09%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2575|ppo_ep: 1|act_loss: -0.0259246826171875|cri_loss: -0.01274871826171875|unsuper_loss: 0.0
+average reward score: 5.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.65%) |Training time=0.45s (20.83%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2576|ppo_ep: 1|act_loss: -0.0147552490234375|cri_loss: -0.006183624267578125|unsuper_loss: 0.0
+average reward score: 5.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.46s (21.19%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2577|ppo_ep: 1|act_loss: -0.03240966796875|cri_loss: -0.0154571533203125|unsuper_loss: 0.0
+average reward score: 5.23046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.39%) |Training time=0.46s (21.07%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2578|ppo_ep: 1|act_loss: -0.04736328125|cri_loss: -0.0228118896484375|unsuper_loss: 0.0
+average reward score: 5.10546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.38%) |Training time=0.47s (21.17%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.45
+[2023-04-14 10:22:40,839] [INFO] [logging.py:96:log_dist] [Rank 0] step=2580, skipped=32, lr=[7.661063842311183e-06, 7.661063842311183e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:22:40,858] [INFO] [timer.py:199:stop] epoch=0/micro_step=2580/global_step=2580, RunningAvgSamplesPerSec=106.6658442359836, CurrSamplesPerSec=105.24808684721674, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:22:40,951] [INFO] [logging.py:96:log_dist] [Rank 0] step=2580, skipped=38, lr=[3.97413126821228e-06, 3.97413126821228e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2579|ppo_ep: 1|act_loss: -0.0352783203125|cri_loss: -0.017059326171875|unsuper_loss: 0.0
+average reward score: 5.04296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.16%) |Training time=0.47s (20.53%) |Others=0.10 (4.32%)|CurSamplesPerSec=14.07 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2580|ppo_ep: 1|act_loss: -0.022186279296875|cri_loss: -0.0107574462890625|unsuper_loss: 0.0
+average reward score: 5.1796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.46s (21.23%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2581|ppo_ep: 1|act_loss: 0.00836181640625|cri_loss: 0.00673675537109375|unsuper_loss: 0.0
+average reward score: 5.33984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.71s |Gather latency=0.00s (0.00%) |Generate time=1.62s (59.69%) |Training time=0.44s (16.41%) |Others=0.65 (23.90%)|CurSamplesPerSec=11.80 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2582|ppo_ep: 1|act_loss: -0.00373077392578125|cri_loss: -0.0012674331665039062|unsuper_loss: 0.0
+average reward score: 5.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.29%) |Training time=0.46s (21.17%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2583|ppo_ep: 1|act_loss: 0.018829345703125|cri_loss: 0.0101776123046875|unsuper_loss: 0.0
+average reward score: 5.19921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.86%) |Training time=0.47s (21.59%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2584|ppo_ep: 1|act_loss: 0.004974365234375|cri_loss: 0.0033092498779296875|unsuper_loss: 0.0
+average reward score: 5.65234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.31%) |Training time=0.45s (21.10%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2585|ppo_ep: 1|act_loss: -0.0110931396484375|cri_loss: -0.00537109375|unsuper_loss: 0.0
+average reward score: 5.1015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.49%) |Training time=0.45s (20.93%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2586|ppo_ep: 1|act_loss: -0.0142364501953125|cri_loss: -0.0065460205078125|unsuper_loss: 0.0
+average reward score: 4.75390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.89s |Gather latency=0.00s (0.00%) |Generate time=1.76s (60.97%) |Training time=0.48s (16.67%) |Others=0.65 (22.36%)|CurSamplesPerSec=11.08 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2587|ppo_ep: 1|act_loss: 0.0120391845703125|cri_loss: 0.00667572021484375|unsuper_loss: 0.0
+average reward score: 5.50390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.46s (21.22%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2588|ppo_ep: 1|act_loss: -0.03863525390625|cri_loss: -0.017364501953125|unsuper_loss: 0.0
+average reward score: 5.12109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (21.04%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
+[2023-04-14 10:23:03,693] [INFO] [logging.py:96:log_dist] [Rank 0] step=2590, skipped=32, lr=[7.646014393985005e-06, 7.646014393985005e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:23:03,712] [INFO] [timer.py:199:stop] epoch=0/micro_step=2590/global_step=2590, RunningAvgSamplesPerSec=106.671823033167, CurrSamplesPerSec=107.67645173531136, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:23:03,805] [INFO] [logging.py:96:log_dist] [Rank 0] step=2590, skipped=38, lr=[3.96634668133663e-06, 3.96634668133663e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2589|ppo_ep: 1|act_loss: -0.048431396484375|cri_loss: -0.0231170654296875|unsuper_loss: 0.0
+average reward score: 5.61328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.13%) |Training time=0.46s (21.32%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2590|ppo_ep: 1|act_loss: -0.047698974609375|cri_loss: -0.02325439453125|unsuper_loss: 0.0
+average reward score: 5.1953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.32%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2591|ppo_ep: 1|act_loss: -0.00933837890625|cri_loss: -0.00406646728515625|unsuper_loss: 0.0
+average reward score: 5.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.10%) |Training time=0.46s (21.33%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2592|ppo_ep: 1|act_loss: 0.00988006591796875|cri_loss: 0.00601959228515625|unsuper_loss: 0.0
+average reward score: 4.85546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.60s |Gather latency=0.00s (0.00%) |Generate time=1.59s (61.40%) |Training time=0.46s (17.59%) |Others=0.55 (21.01%)|CurSamplesPerSec=12.33 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2593|ppo_ep: 1|act_loss: 0.0369873046875|cri_loss: 0.019439697265625|unsuper_loss: 0.0
+average reward score: 5.5703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.16%) |Training time=0.46s (21.25%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2594|ppo_ep: 1|act_loss: 0.053192138671875|cri_loss: 0.0272216796875|unsuper_loss: 0.0
+average reward score: 5.52734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.38%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2595|ppo_ep: 1|act_loss: 0.025787353515625|cri_loss: 0.0153961181640625|unsuper_loss: 0.0
+average reward score: 5.41796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.46s (21.23%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2596|ppo_ep: 1|act_loss: 0.021759033203125|cri_loss: 0.011383056640625|unsuper_loss: 0.0
+average reward score: 5.75
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.86%) |Training time=0.47s (21.63%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2597|ppo_ep: 1|act_loss: 0.0087890625|cri_loss: 0.0050811767578125|unsuper_loss: 0.0
+average reward score: 5.16796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.04%) |Training time=0.46s (21.41%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2598|ppo_ep: 1|act_loss: 0.021331787109375|cri_loss: 0.01171112060546875|unsuper_loss: 0.0
+average reward score: 5.20703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.85%) |Training time=0.45s (20.49%) |Others=0.17 (7.66%)|CurSamplesPerSec=14.42 |AvgSamplesPerSec=14.45
+[2023-04-14 10:23:25,756] [INFO] [logging.py:96:log_dist] [Rank 0] step=2600, skipped=32, lr=[7.630923131420182e-06, 7.630923131420182e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:23:25,775] [INFO] [timer.py:199:stop] epoch=0/micro_step=2600/global_step=2600, RunningAvgSamplesPerSec=106.67467146215988, CurrSamplesPerSec=104.68854435844617, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:23:25,867] [INFO] [logging.py:96:log_dist] [Rank 0] step=2600, skipped=38, lr=[3.958540359665793e-06, 3.958540359665793e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2599|ppo_ep: 1|act_loss: -0.0157318115234375|cri_loss: -0.007659912109375|unsuper_loss: 0.0
+average reward score: 5.703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.83%) |Training time=0.47s (21.63%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2600|ppo_ep: 1|act_loss: -0.0126190185546875|cri_loss: -0.005321502685546875|unsuper_loss: 0.0
+average reward score: 5.625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.98%) |Training time=0.46s (21.46%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2601|ppo_ep: 1|act_loss: -0.0704345703125|cri_loss: -0.033203125|unsuper_loss: 0.0
+average reward score: 5.10546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.28%) |Training time=0.48s (20.52%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.66 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2602|ppo_ep: 1|act_loss: -0.0760498046875|cri_loss: -0.03662109375|unsuper_loss: 0.0
+average reward score: 5.58984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.17%) |Training time=0.46s (21.32%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2603|ppo_ep: 1|act_loss: 0.004253387451171875|cri_loss: 0.002468109130859375|unsuper_loss: 0.0
+average reward score: 5.8828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.21%) |Training time=0.46s (21.21%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2604|ppo_ep: 1|act_loss: -0.00458526611328125|cri_loss: -0.002124786376953125|unsuper_loss: 0.0
+average reward score: 5.83203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.61s |Gather latency=0.00s (0.00%) |Generate time=1.60s (61.13%) |Training time=0.45s (17.40%) |Others=0.56 (21.47%)|CurSamplesPerSec=12.26 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2605|ppo_ep: 1|act_loss: 0.004978179931640625|cri_loss: 0.0028743743896484375|unsuper_loss: 0.0
+average reward score: 4.859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.51%) |Training time=0.45s (20.93%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2606|ppo_ep: 1|act_loss: 0.0413818359375|cri_loss: 0.0211334228515625|unsuper_loss: 0.0
+average reward score: 5.69921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.77%) |Training time=0.47s (21.51%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2607|ppo_ep: 1|act_loss: 0.01214599609375|cri_loss: 0.00688934326171875|unsuper_loss: 0.0
+average reward score: 5.5703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.62s (72.09%) |Training time=0.53s (23.53%) |Others=0.10 (4.38%)|CurSamplesPerSec=14.24 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2608|ppo_ep: 1|act_loss: 0.032012939453125|cri_loss: 0.0164794921875|unsuper_loss: 0.0
+average reward score: 5.046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.49%) |Training time=0.45s (20.94%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
+[2023-04-14 10:23:48,076] [INFO] [logging.py:96:log_dist] [Rank 0] step=2610, skipped=32, lr=[7.615790278305629e-06, 7.615790278305629e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:23:48,095] [INFO] [timer.py:199:stop] epoch=0/micro_step=2610/global_step=2610, RunningAvgSamplesPerSec=106.66852722864914, CurrSamplesPerSec=108.16473682853238, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:23:48,187] [INFO] [logging.py:96:log_dist] [Rank 0] step=2610, skipped=38, lr=[3.95071241890829e-06, 3.95071241890829e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2609|ppo_ep: 1|act_loss: 0.00861358642578125|cri_loss: 0.0045623779296875|unsuper_loss: 0.0
+average reward score: 4.97265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.26%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2610|ppo_ep: 1|act_loss: -0.0095062255859375|cri_loss: -0.00423431396484375|unsuper_loss: 0.0
+average reward score: 5.671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.51s |Gather latency=0.00s (0.00%) |Generate time=1.59s (63.11%) |Training time=0.47s (18.51%) |Others=0.46 (18.38%)|CurSamplesPerSec=12.73 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2611|ppo_ep: 1|act_loss: -0.018829345703125|cri_loss: -0.00910186767578125|unsuper_loss: 0.0
+average reward score: 5.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.47%) |Training time=0.48s (21.95%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2612|ppo_ep: 1|act_loss: -0.0234527587890625|cri_loss: -0.01035308837890625|unsuper_loss: 0.0
+average reward score: 5.23046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.12%) |Training time=0.48s (22.33%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2613|ppo_ep: 1|act_loss: 0.005229949951171875|cri_loss: 0.00333404541015625|unsuper_loss: 0.0
+average reward score: 5.24609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (21.99%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2614|ppo_ep: 1|act_loss: -0.0267486572265625|cri_loss: -0.01274871826171875|unsuper_loss: 0.0
+average reward score: 5.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.52%) |Training time=0.47s (21.91%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2615|ppo_ep: 1|act_loss: 0.0052490234375|cri_loss: 0.003108978271484375|unsuper_loss: 0.0
+average reward score: 5.3671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.54s |Gather latency=0.00s (0.00%) |Generate time=1.62s (63.71%) |Training time=0.63s (24.65%) |Others=0.30 (11.64%)|CurSamplesPerSec=12.60 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2616|ppo_ep: 1|act_loss: 0.04046630859375|cri_loss: 0.021759033203125|unsuper_loss: 0.0
+average reward score: 5.09375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.68%) |Training time=0.47s (21.73%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2617|ppo_ep: 1|act_loss: -0.0025787353515625|cri_loss: 0.0013027191162109375|unsuper_loss: 0.0
+average reward score: 5.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.74%) |Training time=0.47s (21.69%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
+[2023-04-14 10:24:08,322] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 2618|ppo_ep: 1|act_loss: 0.017486572265625|cri_loss: 0.00926971435546875|unsuper_loss: 0.0
+average reward score: 5.20703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.09%) |Training time=0.47s (21.81%) |Others=0.09 (4.10%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.45
+[2023-04-14 10:24:10,360] [INFO] [logging.py:96:log_dist] [Rank 0] step=2620, skipped=32, lr=[7.600616058946736e-06, 7.600616058946736e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:24:10,378] [INFO] [timer.py:199:stop] epoch=0/micro_step=2620/global_step=2620, RunningAvgSamplesPerSec=106.64397854881977, CurrSamplesPerSec=103.47651192983646, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:24:10,462] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+[2023-04-14 10:24:10,463] [INFO] [logging.py:96:log_dist] [Rank 0] step=2620, skipped=40, lr=[3.9444345785206285e-06, 3.9444345785206285e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2619|ppo_ep: 1|act_loss: 0.020050048828125|cri_loss: 0.010467529296875|unsuper_loss: 0.0
+average reward score: 5.234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.88%) |Training time=0.47s (22.05%) |Others=0.09 (4.08%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2620|ppo_ep: 1|act_loss: 0.01495361328125|cri_loss: 0.0078277587890625|unsuper_loss: 0.0
+average reward score: 5.56640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.54%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2621|ppo_ep: 1|act_loss: 0.04327392578125|cri_loss: 0.0222015380859375|unsuper_loss: 0.0
+average reward score: 5.6640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.58s (69.79%) |Training time=0.47s (20.78%) |Others=0.21 (9.43%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2622|ppo_ep: 1|act_loss: 0.0241546630859375|cri_loss: 0.01229095458984375|unsuper_loss: 0.0
+average reward score: 4.94140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (22.00%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2623|ppo_ep: 1|act_loss: -0.0059356689453125|cri_loss: -0.0025787353515625|unsuper_loss: 0.0
+average reward score: 5.203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.63%) |Training time=0.47s (21.81%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2624|ppo_ep: 1|act_loss: -0.040802001953125|cri_loss: -0.0193634033203125|unsuper_loss: 0.0
+average reward score: 5.33984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.49s (22.41%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2625|ppo_ep: 1|act_loss: -0.048248291015625|cri_loss: -0.022308349609375|unsuper_loss: 0.0
+average reward score: 5.32421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.80%) |Training time=0.46s (21.62%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2626|ppo_ep: 1|act_loss: 0.04718017578125|cri_loss: 0.024810791015625|unsuper_loss: 0.0
+average reward score: 5.44140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.76%) |Training time=0.44s (20.64%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2627|ppo_ep: 1|act_loss: 0.0007905960083007812|cri_loss: 0.0007834434509277344|unsuper_loss: 0.0
+average reward score: 5.34765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.13%) |Training time=0.47s (21.40%) |Others=0.14 (6.47%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2628|ppo_ep: 1|act_loss: -0.0053253173828125|cri_loss: -0.0022602081298828125|unsuper_loss: 0.0
+average reward score: 5.625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.94%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
+[2023-04-14 10:24:32,030] [INFO] [logging.py:96:log_dist] [Rank 0] step=2630, skipped=32, lr=[7.5854006982620415e-06, 7.5854006982620415e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:24:32,048] [INFO] [timer.py:199:stop] epoch=0/micro_step=2630/global_step=2630, RunningAvgSamplesPerSec=106.63587194700445, CurrSamplesPerSec=101.95049749373148, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:24:32,141] [INFO] [logging.py:96:log_dist] [Rank 0] step=2630, skipped=40, lr=[3.9365680160143595e-06, 3.9365680160143595e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2629|ppo_ep: 1|act_loss: 0.0084381103515625|cri_loss: 0.004375457763671875|unsuper_loss: 0.0
+average reward score: 5.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.57%) |Training time=0.48s (21.92%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2630|ppo_ep: 1|act_loss: 0.0084686279296875|cri_loss: 0.004428863525390625|unsuper_loss: 0.0
+average reward score: 5.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.90%) |Training time=0.49s (20.89%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2631|ppo_ep: 1|act_loss: 0.016143798828125|cri_loss: 0.00824737548828125|unsuper_loss: 0.0
+average reward score: 5.921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.72%) |Training time=0.46s (21.59%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2632|ppo_ep: 1|act_loss: -0.01343536376953125|cri_loss: -0.006603240966796875|unsuper_loss: 0.0
+average reward score: 5.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.60%) |Training time=0.47s (21.83%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2633|ppo_ep: 1|act_loss: -0.0085601806640625|cri_loss: -0.003986358642578125|unsuper_loss: 0.0
+average reward score: 5.5
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.58s (67.05%) |Training time=0.47s (19.71%) |Others=0.31 (13.24%)|CurSamplesPerSec=13.55 |AvgSamplesPerSec=14.45
+[2023-04-14 10:24:43,163] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 2634|ppo_ep: 1|act_loss: 0.01061248779296875|cri_loss: 0.00551605224609375|unsuper_loss: 0.0
+average reward score: 5.1875
+-------------------------------------------------------------------------------------
+|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.81%) |Training time=0.44s (20.57%) |Others=0.10 (4.62%)|CurSamplesPerSec=15.09 |AvgSamplesPerSec=14.45
+[2023-04-14 10:24:45,295] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 2635|ppo_ep: 1|act_loss: 0.00872039794921875|cri_loss: 0.005645751953125|unsuper_loss: 0.0
+average reward score: 4.89453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.91%) |Training time=0.44s (20.51%) |Others=0.10 (4.58%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2636|ppo_ep: 1|act_loss: 0.0165863037109375|cri_loss: 0.0097808837890625|unsuper_loss: 0.0
+average reward score: 5.81640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.19%) |Training time=0.47s (21.32%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2637|ppo_ep: 1|act_loss: 0.00852203369140625|cri_loss: 0.004772186279296875|unsuper_loss: 0.0
+average reward score: 5.375
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.22%) |Training time=0.49s (21.46%) |Others=0.10 (4.32%)|CurSamplesPerSec=14.08 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2638|ppo_ep: 1|act_loss: -0.0084228515625|cri_loss: -0.003665924072265625|unsuper_loss: 0.0
+average reward score: 5.5234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.63%) |Training time=0.47s (21.80%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
+[2023-04-14 10:24:54,042] [INFO] [logging.py:96:log_dist] [Rank 0] step=2640, skipped=34, lr=[7.573198939494354e-06, 7.573198939494354e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:24:54,736] [INFO] [timer.py:199:stop] epoch=0/micro_step=2640/global_step=2640, RunningAvgSamplesPerSec=106.54140046893538, CurrSamplesPerSec=32.69383225358878, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:24:54,829] [INFO] [logging.py:96:log_dist] [Rank 0] step=2640, skipped=40, lr=[3.928680160104563e-06, 3.928680160104563e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2639|ppo_ep: 1|act_loss: 0.0277099609375|cri_loss: 0.01422119140625|unsuper_loss: 0.0
+average reward score: 5.0859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.83s |Gather latency=0.00s (0.00%) |Generate time=1.59s (56.17%) |Training time=1.14s (40.35%) |Others=0.10 (3.48%)|CurSamplesPerSec=11.31 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2640|ppo_ep: 1|act_loss: 0.0245819091796875|cri_loss: 0.01282501220703125|unsuper_loss: 0.0
+average reward score: 5.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.23%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2641|ppo_ep: 1|act_loss: -0.00899505615234375|cri_loss: -0.00397491455078125|unsuper_loss: 0.0
+average reward score: 5.80078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.14%) |Training time=0.44s (20.29%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2642|ppo_ep: 1|act_loss: -0.00818634033203125|cri_loss: -0.00392913818359375|unsuper_loss: 0.0
+average reward score: 5.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.94%) |Training time=0.46s (21.48%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2643|ppo_ep: 1|act_loss: -0.0084381103515625|cri_loss: -0.003757476806640625|unsuper_loss: 0.0
+average reward score: 5.59375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.68%) |Training time=0.47s (21.74%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2644|ppo_ep: 1|act_loss: 0.021514892578125|cri_loss: 0.0110626220703125|unsuper_loss: 0.0
+average reward score: 4.75390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.47s (21.96%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2645|ppo_ep: 1|act_loss: 0.02435302734375|cri_loss: 0.01253509521484375|unsuper_loss: 0.0
+average reward score: 5.77734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.58s (70.90%) |Training time=0.49s (21.96%) |Others=0.16 (7.14%)|CurSamplesPerSec=14.34 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2646|ppo_ep: 1|act_loss: 0.00466156005859375|cri_loss: 0.002773284912109375|unsuper_loss: 0.0
+average reward score: 5.30859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.36%) |Training time=0.48s (21.81%) |Others=0.11 (4.83%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2647|ppo_ep: 1|act_loss: -0.0088958740234375|cri_loss: -0.0033321380615234375|unsuper_loss: 0.0
+average reward score: 5.34765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.37%) |Training time=0.47s (22.05%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2648|ppo_ep: 1|act_loss: 0.00727081298828125|cri_loss: 0.004428863525390625|unsuper_loss: 0.0
+average reward score: 5.75
+-------------------------------------------------------------------------------------
+|E2E latency=3.23s |Gather latency=0.00s (0.00%) |Generate time=1.58s (49.01%) |Training time=0.47s (14.46%) |Others=1.18 (36.53%)|CurSamplesPerSec=9.92 |AvgSamplesPerSec=14.45
+[2023-04-14 10:25:17,411] [INFO] [logging.py:96:log_dist] [Rank 0] step=2650, skipped=34, lr=[7.557910093162319e-06, 7.557910093162319e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:25:17,429] [INFO] [timer.py:199:stop] epoch=0/micro_step=2650/global_step=2650, RunningAvgSamplesPerSec=106.53553553330283, CurrSamplesPerSec=104.7835965849119, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:25:17,522] [INFO] [logging.py:96:log_dist] [Rank 0] step=2650, skipped=40, lr=[3.9207711277082896e-06, 3.9207711277082896e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2649|ppo_ep: 1|act_loss: -0.00821685791015625|cri_loss: -0.003902435302734375|unsuper_loss: 0.0
+average reward score: 5.953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.62%) |Training time=0.47s (21.81%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2650|ppo_ep: 1|act_loss: -0.0178070068359375|cri_loss: -0.007549285888671875|unsuper_loss: 0.0
+average reward score: 5.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.45%) |Training time=0.48s (22.02%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2651|ppo_ep: 1|act_loss: 0.04248046875|cri_loss: 0.02374267578125|unsuper_loss: 0.0
+average reward score: 5.234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.65%) |Training time=0.47s (21.76%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2652|ppo_ep: 1|act_loss: 0.02752685546875|cri_loss: 0.01483917236328125|unsuper_loss: 0.0
+average reward score: 5.07421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.47s (22.06%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2653|ppo_ep: 1|act_loss: 0.027435302734375|cri_loss: 0.01416015625|unsuper_loss: 0.0
+average reward score: 5.66015625
+-------------------------------------------------------------------------------------
+|E2E latency=3.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (49.52%) |Training time=0.48s (14.95%) |Others=1.13 (35.54%)|CurSamplesPerSec=10.06 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2654|ppo_ep: 1|act_loss: 0.0149993896484375|cri_loss: 0.0079498291015625|unsuper_loss: 0.0
+average reward score: 5.51953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.48%) |Training time=0.47s (21.93%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2655|ppo_ep: 1|act_loss: 0.026458740234375|cri_loss: 0.014007568359375|unsuper_loss: 0.0
+average reward score: 4.9140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.92%) |Training time=0.49s (22.55%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2656|ppo_ep: 1|act_loss: 0.0091094970703125|cri_loss: 0.0047760009765625|unsuper_loss: 0.0
+average reward score: 5.56640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.24%) |Training time=0.49s (22.27%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2657|ppo_ep: 1|act_loss: -0.015716552734375|cri_loss: -0.007526397705078125|unsuper_loss: 0.0
+average reward score: 5.484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.54%) |Training time=0.47s (21.89%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2658|ppo_ep: 1|act_loss: 0.0038909912109375|cri_loss: 0.0020961761474609375|unsuper_loss: 0.0
+average reward score: 5.37890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.73s |Gather latency=0.00s (0.00%) |Generate time=1.58s (57.72%) |Training time=0.48s (17.49%) |Others=0.68 (24.79%)|CurSamplesPerSec=11.71 |AvgSamplesPerSec=14.44
+[2023-04-14 10:25:40,790] [INFO] [logging.py:96:log_dist] [Rank 0] step=2660, skipped=34, lr=[7.542580738509916e-06, 7.542580738509916e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:25:40,809] [INFO] [timer.py:199:stop] epoch=0/micro_step=2660/global_step=2660, RunningAvgSamplesPerSec=106.51279863471905, CurrSamplesPerSec=93.23595653916608, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:25:40,901] [INFO] [logging.py:96:log_dist] [Rank 0] step=2660, skipped=40, lr=[3.91284103605648e-06, 3.91284103605648e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2659|ppo_ep: 1|act_loss: -0.034912109375|cri_loss: -0.01702880859375|unsuper_loss: 0.0
+average reward score: 5.25390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.23%) |Training time=0.51s (21.59%) |Others=0.10 (4.18%)|CurSamplesPerSec=13.65 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2660|ppo_ep: 1|act_loss: -0.05914306640625|cri_loss: -0.029052734375|unsuper_loss: 0.0
+average reward score: 5.34765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.74%) |Training time=0.47s (21.69%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2661|ppo_ep: 1|act_loss: 0.003200531005859375|cri_loss: 0.0020351409912109375|unsuper_loss: 0.0
+average reward score: 5.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.74%) |Training time=0.47s (21.70%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2662|ppo_ep: 1|act_loss: 0.0149688720703125|cri_loss: 0.0079803466796875|unsuper_loss: 0.0
+average reward score: 5.62109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.52%) |Training time=0.47s (21.90%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2663|ppo_ep: 1|act_loss: 0.048736572265625|cri_loss: 0.02532958984375|unsuper_loss: 0.0
+average reward score: 6.01171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (22.09%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2664|ppo_ep: 1|act_loss: 0.064208984375|cri_loss: 0.03277587890625|unsuper_loss: 0.0
+average reward score: 5.31640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.64%) |Training time=0.48s (21.92%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2665|ppo_ep: 1|act_loss: 0.0640869140625|cri_loss: 0.034332275390625|unsuper_loss: 0.0
+average reward score: 5.02734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.68s (73.92%) |Training time=0.50s (21.75%) |Others=0.10 (4.32%)|CurSamplesPerSec=14.05 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2666|ppo_ep: 1|act_loss: -0.00470733642578125|cri_loss: -0.001983642578125|unsuper_loss: 0.0
+average reward score: 4.765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.08%) |Training time=0.46s (21.31%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2667|ppo_ep: 1|act_loss: -0.029388427734375|cri_loss: -0.0142974853515625|unsuper_loss: 0.0
+average reward score: 5.41015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.66%) |Training time=0.44s (20.74%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2668|ppo_ep: 1|act_loss: -0.01666259765625|cri_loss: -0.007843017578125|unsuper_loss: 0.0
+average reward score: 5.3515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.00%) |Training time=0.46s (21.24%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44
+[2023-04-14 10:26:02,506] [INFO] [logging.py:96:log_dist] [Rank 0] step=2670, skipped=34, lr=[7.5272111027551624e-06, 7.5272111027551624e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:26:02,524] [INFO] [timer.py:199:stop] epoch=0/micro_step=2670/global_step=2670, RunningAvgSamplesPerSec=106.50561698981126, CurrSamplesPerSec=109.33183449574705, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:26:02,618] [INFO] [logging.py:96:log_dist] [Rank 0] step=2670, skipped=40, lr=[3.904890002692223e-06, 3.904890002692223e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2669|ppo_ep: 1|act_loss: -0.06109619140625|cri_loss: -0.0295562744140625|unsuper_loss: 0.0
+average reward score: 5.3046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.23%) |Training time=0.46s (21.19%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2670|ppo_ep: 1|act_loss: -0.0343017578125|cri_loss: -0.0163726806640625|unsuper_loss: 0.0
+average reward score: 5.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.63%) |Training time=0.48s (21.84%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2671|ppo_ep: 1|act_loss: 0.048797607421875|cri_loss: 0.027435302734375|unsuper_loss: 0.0
+average reward score: 5.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.31%) |Training time=0.48s (22.12%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2672|ppo_ep: 1|act_loss: -0.0225067138671875|cri_loss: -0.01085662841796875|unsuper_loss: 0.0
+average reward score: 5.3203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.51%) |Training time=0.47s (21.91%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2673|ppo_ep: 1|act_loss: 0.0294189453125|cri_loss: 0.01519012451171875|unsuper_loss: 0.0
+average reward score: 5.8359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.48s (22.07%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2674|ppo_ep: 1|act_loss: 0.05877685546875|cri_loss: 0.03143310546875|unsuper_loss: 0.0
+average reward score: 5.453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.68s |Gather latency=0.00s (0.00%) |Generate time=1.58s (58.89%) |Training time=0.49s (18.37%) |Others=0.61 (22.75%)|CurSamplesPerSec=11.95 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2675|ppo_ep: 1|act_loss: -0.0154571533203125|cri_loss: -0.00719451904296875|unsuper_loss: 0.0
+average reward score: 5.52734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.23%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2676|ppo_ep: 1|act_loss: -0.0118865966796875|cri_loss: -0.004611968994140625|unsuper_loss: 0.0
+average reward score: 5.19921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.30%) |Training time=0.48s (22.12%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2677|ppo_ep: 1|act_loss: 0.06536865234375|cri_loss: 0.0384521484375|unsuper_loss: 0.0
+average reward score: 5.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.20%) |Training time=0.48s (22.24%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2678|ppo_ep: 1|act_loss: -0.0438232421875|cri_loss: -0.021148681640625|unsuper_loss: 0.0
+average reward score: 5.61328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.23%) |Training time=0.48s (22.20%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
+[2023-04-14 10:26:24,579] [INFO] [logging.py:96:log_dist] [Rank 0] step=2680, skipped=34, lr=[7.511801413713135e-06, 7.511801413713135e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:26:24,597] [INFO] [timer.py:199:stop] epoch=0/micro_step=2680/global_step=2680, RunningAvgSamplesPerSec=106.48575335734733, CurrSamplesPerSec=102.37766112435794, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:26:24,690] [INFO] [logging.py:96:log_dist] [Rank 0] step=2680, skipped=40, lr=[3.896918145469013e-06, 3.896918145469013e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2679|ppo_ep: 1|act_loss: -0.02886962890625|cri_loss: -0.014129638671875|unsuper_loss: 0.0
+average reward score: 5.70703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.31%) |Training time=0.47s (22.13%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2680|ppo_ep: 1|act_loss: 0.028411865234375|cri_loss: 0.0149383544921875|unsuper_loss: 0.0
+average reward score: 5.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.57s (69.21%) |Training time=0.48s (21.17%) |Others=0.22 (9.62%)|CurSamplesPerSec=14.07 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2681|ppo_ep: 1|act_loss: -0.0035610198974609375|cri_loss: -0.0012836456298828125|unsuper_loss: 0.0
+average reward score: 5.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.07%) |Training time=0.48s (22.37%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2682|ppo_ep: 1|act_loss: 0.0278167724609375|cri_loss: 0.01442718505859375|unsuper_loss: 0.0
+average reward score: 5.39453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.13%) |Training time=0.48s (22.30%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2683|ppo_ep: 1|act_loss: 0.00504302978515625|cri_loss: 0.003444671630859375|unsuper_loss: 0.0
+average reward score: 5.39453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.28%) |Training time=0.48s (22.21%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2684|ppo_ep: 1|act_loss: 0.0285491943359375|cri_loss: 0.0146942138671875|unsuper_loss: 0.0
+average reward score: 5.25
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.19%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2685|ppo_ep: 1|act_loss: -0.033966064453125|cri_loss: -0.016143798828125|unsuper_loss: 0.0
+average reward score: 5.8046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.01%) |Training time=0.49s (22.46%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2686|ppo_ep: 1|act_loss: 0.04290771484375|cri_loss: 0.02264404296875|unsuper_loss: 0.0
+average reward score: 5.5546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.41%) |Training time=0.47s (22.02%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2687|ppo_ep: 1|act_loss: -0.041107177734375|cri_loss: -0.0183868408203125|unsuper_loss: 0.0
+average reward score: 5.41796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.47s (22.08%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2688|ppo_ep: 1|act_loss: -0.061004638671875|cri_loss: -0.029205322265625|unsuper_loss: 0.0
+average reward score: 5.61328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.48s (22.11%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
+[2023-04-14 10:26:46,274] [INFO] [logging.py:96:log_dist] [Rank 0] step=2690, skipped=34, lr=[7.496351899792602e-06, 7.496351899792602e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:26:46,292] [INFO] [timer.py:199:stop] epoch=0/micro_step=2690/global_step=2690, RunningAvgSamplesPerSec=106.46413319981575, CurrSamplesPerSec=102.74899790854556, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:26:46,385] [INFO] [logging.py:96:log_dist] [Rank 0] step=2690, skipped=40, lr=[3.888925582549006e-06, 3.888925582549006e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2689|ppo_ep: 1|act_loss: 0.00598907470703125|cri_loss: 0.00348663330078125|unsuper_loss: 0.0
+average reward score: 5.0078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.41%) |Training time=0.47s (22.04%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2690|ppo_ep: 1|act_loss: -0.04083251953125|cri_loss: -0.019989013671875|unsuper_loss: 0.0
+average reward score: 5.6796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.05%) |Training time=0.50s (20.98%) |Others=0.12 (4.97%)|CurSamplesPerSec=13.55 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2691|ppo_ep: 1|act_loss: -0.048583984375|cri_loss: -0.022552490234375|unsuper_loss: 0.0
+average reward score: 5.2109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.47s (21.66%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2692|ppo_ep: 1|act_loss: -0.016754150390625|cri_loss: -0.00821685791015625|unsuper_loss: 0.0
+average reward score: 5.5546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.31%) |Training time=0.47s (21.88%) |Others=0.10 (4.81%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2693|ppo_ep: 1|act_loss: -0.0209808349609375|cri_loss: -0.01012420654296875|unsuper_loss: 0.0
+average reward score: 5.796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.22%) |Training time=0.49s (22.27%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2694|ppo_ep: 1|act_loss: -0.0040435791015625|cri_loss: -0.0018329620361328125|unsuper_loss: 0.0
+average reward score: 5.69140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.04%) |Training time=0.49s (21.66%) |Others=0.10 (4.31%)|CurSamplesPerSec=14.05 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2695|ppo_ep: 1|act_loss: -0.008453369140625|cri_loss: -0.003986358642578125|unsuper_loss: 0.0
+average reward score: 5.58984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.97%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2696|ppo_ep: 1|act_loss: 0.064208984375|cri_loss: 0.03363037109375|unsuper_loss: 0.0
+average reward score: 5.484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.51%) |Training time=0.47s (21.92%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2697|ppo_ep: 1|act_loss: 0.016510009765625|cri_loss: 0.00865936279296875|unsuper_loss: 0.0
+average reward score: 5.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.55%) |Training time=0.47s (21.90%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2698|ppo_ep: 1|act_loss: -0.002437591552734375|cri_loss: -0.0010318756103515625|unsuper_loss: 0.0
+average reward score: 5.7421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.98%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
+[2023-04-14 10:27:08,167] [INFO] [logging.py:96:log_dist] [Rank 0] step=2700, skipped=34, lr=[7.480862789992629e-06, 7.480862789992629e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:27:08,185] [INFO] [timer.py:199:stop] epoch=0/micro_step=2700/global_step=2700, RunningAvgSamplesPerSec=106.4442632540804, CurrSamplesPerSec=101.70059117973773, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:27:08,278] [INFO] [logging.py:96:log_dist] [Rank 0] step=2700, skipped=40, lr=[3.880912432401265e-06, 3.880912432401265e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2699|ppo_ep: 1|act_loss: -0.00817108154296875|cri_loss: -0.003662109375|unsuper_loss: 0.0
+average reward score: 5.45703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.27%) |Training time=0.48s (22.18%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2700|ppo_ep: 1|act_loss: -0.0207061767578125|cri_loss: -0.00963592529296875|unsuper_loss: 0.0
+average reward score: 5.48046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.44%) |Training time=0.48s (22.05%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2701|ppo_ep: 1|act_loss: 0.0119171142578125|cri_loss: 0.0084075927734375|unsuper_loss: 0.0
+average reward score: 5.1640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.82%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2702|ppo_ep: 1|act_loss: 0.043212890625|cri_loss: 0.0235595703125|unsuper_loss: 0.0
+average reward score: 5.390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.53s |Gather latency=0.00s (0.00%) |Generate time=1.58s (62.43%) |Training time=0.48s (18.84%) |Others=0.47 (18.73%)|CurSamplesPerSec=12.64 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2703|ppo_ep: 1|act_loss: 0.019866943359375|cri_loss: 0.011932373046875|unsuper_loss: 0.0
+average reward score: 5.12890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.62%) |Training time=0.47s (21.80%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2704|ppo_ep: 1|act_loss: -0.0029621124267578125|cri_loss: -0.0009555816650390625|unsuper_loss: 0.0
+average reward score: 5.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.68s (71.74%) |Training time=0.56s (24.05%) |Others=0.10 (4.21%)|CurSamplesPerSec=13.64 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2705|ppo_ep: 1|act_loss: -0.01299285888671875|cri_loss: -0.006378173828125|unsuper_loss: 0.0
+average reward score: 5.4296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (22.00%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2706|ppo_ep: 1|act_loss: -0.016571044921875|cri_loss: -0.0079193115234375|unsuper_loss: 0.0
+average reward score: 5.5234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.59%) |Training time=0.47s (21.84%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2707|ppo_ep: 1|act_loss: -0.016357421875|cri_loss: -0.00804901123046875|unsuper_loss: 0.0
+average reward score: 5.52734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.53%) |Training time=0.47s (21.90%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2708|ppo_ep: 1|act_loss: 0.13623046875|cri_loss: 0.07940673828125|unsuper_loss: 0.0
+average reward score: 4.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.48s (22.07%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
+[2023-04-14 10:27:30,285] [INFO] [logging.py:96:log_dist] [Rank 0] step=2710, skipped=34, lr=[7.465334313899188e-06, 7.465334313899188e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:27:30,304] [INFO] [timer.py:199:stop] epoch=0/micro_step=2710/global_step=2710, RunningAvgSamplesPerSec=106.42161862003059, CurrSamplesPerSec=104.10446923041124, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:27:30,396] [INFO] [logging.py:96:log_dist] [Rank 0] step=2710, skipped=40, lr=[3.8728788138000064e-06, 3.8728788138000064e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2709|ppo_ep: 1|act_loss: -0.020050048828125|cri_loss: -0.00969696044921875|unsuper_loss: 0.0
+average reward score: 5.328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.62%) |Training time=0.47s (21.83%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2710|ppo_ep: 1|act_loss: 0.007110595703125|cri_loss: 0.003948211669921875|unsuper_loss: 0.0
+average reward score: 5.4921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.88%) |Training time=0.49s (22.58%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2711|ppo_ep: 1|act_loss: -0.0193634033203125|cri_loss: -0.007534027099609375|unsuper_loss: 0.0
+average reward score: 5.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.39%) |Training time=0.48s (22.04%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2712|ppo_ep: 1|act_loss: 0.0090179443359375|cri_loss: 0.004947662353515625|unsuper_loss: 0.0
+average reward score: 5.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.68%) |Training time=0.47s (21.75%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2713|ppo_ep: 1|act_loss: -0.0018482208251953125|cri_loss: -0.00031375885009765625|unsuper_loss: 0.0
+average reward score: 5.57421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.46s |Gather latency=0.00s (0.00%) |Generate time=1.58s (64.20%) |Training time=0.47s (19.22%) |Others=0.41 (16.59%)|CurSamplesPerSec=13.00 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2714|ppo_ep: 1|act_loss: -0.03118896484375|cri_loss: -0.01531982421875|unsuper_loss: 0.0
+average reward score: 6.10546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.96%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2715|ppo_ep: 1|act_loss: -0.035400390625|cri_loss: -0.016998291015625|unsuper_loss: 0.0
+average reward score: 5.68359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.51%) |Training time=0.48s (21.96%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2716|ppo_ep: 1|act_loss: -0.0191192626953125|cri_loss: -0.0081634521484375|unsuper_loss: 0.0
+average reward score: 5.05859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.32%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2717|ppo_ep: 1|act_loss: 0.01910400390625|cri_loss: 0.010162353515625|unsuper_loss: 0.0
+average reward score: 5.80859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.18%) |Training time=0.48s (22.27%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2718|ppo_ep: 1|act_loss: 0.0635986328125|cri_loss: 0.035491943359375|unsuper_loss: 0.0
+average reward score: 5.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.50%) |Training time=0.47s (21.94%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
+[2023-04-14 10:27:52,375] [INFO] [logging.py:96:log_dist] [Rank 0] step=2720, skipped=34, lr=[7.4497667016817536e-06, 7.4497667016817536e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:27:52,393] [INFO] [timer.py:199:stop] epoch=0/micro_step=2720/global_step=2720, RunningAvgSamplesPerSec=106.40045873498524, CurrSamplesPerSec=95.01305938452128, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:27:52,486] [INFO] [logging.py:96:log_dist] [Rank 0] step=2720, skipped=40, lr=[3.864824845822837e-06, 3.864824845822837e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2719|ppo_ep: 1|act_loss: 0.00815582275390625|cri_loss: 0.004367828369140625|unsuper_loss: 0.0
+average reward score: 5.6328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.53%) |Training time=0.50s (21.30%) |Others=0.10 (4.18%)|CurSamplesPerSec=13.64 |AvgSamplesPerSec=14.45
+[2023-04-14 10:27:54,627] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 2720|ppo_ep: 1|act_loss: 0.1143798828125|cri_loss: 0.06793212890625|unsuper_loss: 0.0
+average reward score: 5.37890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.05%) |Training time=0.47s (21.86%) |Others=0.09 (4.09%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.45
+[2023-04-14 10:27:56,766] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 2721|ppo_ep: 1|act_loss: -0.01043701171875|cri_loss: -0.004398345947265625|unsuper_loss: 0.0
+average reward score: 5.41796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.16%) |Training time=0.47s (21.75%) |Others=0.09 (4.09%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2722|ppo_ep: 1|act_loss: 0.0159454345703125|cri_loss: 0.0082550048828125|unsuper_loss: 0.0
+average reward score: 5.39453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.23%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2723|ppo_ep: 1|act_loss: 0.03460693359375|cri_loss: 0.01800537109375|unsuper_loss: 0.0
+average reward score: 4.86328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.61s (71.37%) |Training time=0.55s (24.27%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.23 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2724|ppo_ep: 1|act_loss: 0.005645751953125|cri_loss: 0.003017425537109375|unsuper_loss: 0.0
+average reward score: 5.21484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.51%) |Training time=0.47s (21.91%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2725|ppo_ep: 1|act_loss: 0.01123046875|cri_loss: 0.006679534912109375|unsuper_loss: 0.0
+average reward score: 5.51171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.19%) |Training time=0.48s (22.22%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2726|ppo_ep: 1|act_loss: -0.03509521484375|cri_loss: -0.01535797119140625|unsuper_loss: 0.0
+average reward score: 5.453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.10%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2727|ppo_ep: 1|act_loss: -0.026458740234375|cri_loss: -0.01274871826171875|unsuper_loss: 0.0
+average reward score: 5.49609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.48%) |Training time=0.47s (21.94%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2728|ppo_ep: 1|act_loss: -0.011383056640625|cri_loss: -0.005321502685546875|unsuper_loss: 0.0
+average reward score: 5.37890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.55s |Gather latency=0.00s (0.00%) |Generate time=1.57s (61.74%) |Training time=0.48s (18.74%) |Others=0.50 (19.52%)|CurSamplesPerSec=12.54 |AvgSamplesPerSec=14.45
+[2023-04-14 10:28:14,381] [INFO] [logging.py:96:log_dist] [Rank 0] step=2730, skipped=34, lr=[7.434160184089898e-06, 7.434160184089898e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:28:14,399] [INFO] [timer.py:199:stop] epoch=0/micro_step=2730/global_step=2730, RunningAvgSamplesPerSec=106.37902719012551, CurrSamplesPerSec=101.46724635971965, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:28:14,492] [INFO] [logging.py:96:log_dist] [Rank 0] step=2730, skipped=42, lr=[3.8583671001032015e-06, 3.8583671001032015e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2729|ppo_ep: 1|act_loss: 0.02996826171875|cri_loss: 0.0161590576171875|unsuper_loss: 0.0
+average reward score: 5.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.22%) |Training time=0.48s (22.20%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2730|ppo_ep: 1|act_loss: -0.06396484375|cri_loss: -0.0307159423828125|unsuper_loss: 0.0
+average reward score: 5.01953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.33%) |Training time=0.46s (21.11%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2731|ppo_ep: 1|act_loss: 0.0201263427734375|cri_loss: 0.0107421875|unsuper_loss: 0.0
+average reward score: 5.6328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.28%) |Training time=0.46s (21.14%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2732|ppo_ep: 1|act_loss: 0.0035247802734375|cri_loss: 0.002044677734375|unsuper_loss: 0.0
+average reward score: 5.04296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.99%) |Training time=0.46s (21.43%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2733|ppo_ep: 1|act_loss: 0.03350830078125|cri_loss: 0.017333984375|unsuper_loss: 0.0
+average reward score: 5.53515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.05%) |Training time=0.52s (23.51%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.42 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2734|ppo_ep: 1|act_loss: 0.0076904296875|cri_loss: 0.004077911376953125|unsuper_loss: 0.0
+average reward score: 5.3671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.28%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2735|ppo_ep: 1|act_loss: 0.029144287109375|cri_loss: 0.01526641845703125|unsuper_loss: 0.0
+average reward score: 5.296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.90%) |Training time=0.47s (21.54%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
+[2023-04-14 10:28:29,527] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 2736|ppo_ep: 1|act_loss: 0.005260467529296875|cri_loss: 0.00360870361328125|unsuper_loss: 0.0
+average reward score: 5.15625
+-------------------------------------------------------------------------------------
+|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.33%) |Training time=0.43s (20.06%) |Others=0.10 (4.61%)|CurSamplesPerSec=15.09 |AvgSamplesPerSec=14.45
+[2023-04-14 10:28:31,651] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 2737|ppo_ep: 1|act_loss: -0.044464111328125|cri_loss: -0.0214996337890625|unsuper_loss: 0.0
+average reward score: 4.6875
+-------------------------------------------------------------------------------------
+|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.31%) |Training time=0.43s (20.06%) |Others=0.10 (4.63%)|CurSamplesPerSec=15.07 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2738|ppo_ep: 1|act_loss: 0.0011739730834960938|cri_loss: 0.000949859619140625|unsuper_loss: 0.0
+average reward score: 5.28515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.01%) |Training time=0.47s (21.49%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.45
+[2023-04-14 10:28:35,964] [INFO] [logging.py:96:log_dist] [Rank 0] step=2740, skipped=36, lr=[7.421647113578771e-06, 7.421647113578771e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:28:35,983] [INFO] [timer.py:199:stop] epoch=0/micro_step=2740/global_step=2740, RunningAvgSamplesPerSec=106.38762931965792, CurrSamplesPerSec=108.80439440355586, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:28:36,075] [INFO] [logging.py:96:log_dist] [Rank 0] step=2740, skipped=42, lr=[3.850276804286629e-06, 3.850276804286629e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2739|ppo_ep: 1|act_loss: -0.00032520294189453125|cri_loss: 0.0002455711364746094|unsuper_loss: 0.0
+average reward score: 5.57421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.22%) |Training time=0.46s (21.23%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2740|ppo_ep: 1|act_loss: -0.03570556640625|cri_loss: -0.0167694091796875|unsuper_loss: 0.0
+average reward score: 5.21875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.46s (21.15%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2741|ppo_ep: 1|act_loss: -0.00420379638671875|cri_loss: 0.002628326416015625|unsuper_loss: 0.0
+average reward score: 5.55859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.20%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2742|ppo_ep: 1|act_loss: 0.03314208984375|cri_loss: 0.0172271728515625|unsuper_loss: 0.0
+average reward score: 5.1796875
+-------------------------------------------------------------------------------------
+|E2E latency=3.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (50.06%) |Training time=0.46s (14.54%) |Others=1.13 (35.40%)|CurSamplesPerSec=10.05 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2743|ppo_ep: 1|act_loss: 0.0535888671875|cri_loss: 0.030548095703125|unsuper_loss: 0.0
+average reward score: 4.8125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.16%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2744|ppo_ep: 1|act_loss: 0.04168701171875|cri_loss: 0.021636962890625|unsuper_loss: 0.0
+average reward score: 5.18359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.46%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2745|ppo_ep: 1|act_loss: 0.0287628173828125|cri_loss: 0.01482391357421875|unsuper_loss: 0.0
+average reward score: 6.07421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.24%) |Training time=0.46s (21.22%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2746|ppo_ep: 1|act_loss: 0.0121002197265625|cri_loss: 0.006809234619140625|unsuper_loss: 0.0
+average reward score: 5.1875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.41%) |Training time=0.45s (21.00%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2747|ppo_ep: 1|act_loss: -0.0285797119140625|cri_loss: -0.0139007568359375|unsuper_loss: 0.0
+average reward score: 5.5859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.44s |Gather latency=0.00s (0.00%) |Generate time=1.60s (65.48%) |Training time=0.45s (18.31%) |Others=0.40 (16.21%)|CurSamplesPerSec=13.13 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2748|ppo_ep: 1|act_loss: -0.047943115234375|cri_loss: -0.0233612060546875|unsuper_loss: 0.0
+average reward score: 5.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.76s (76.32%) |Training time=0.45s (19.41%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.87 |AvgSamplesPerSec=14.44
+[2023-04-14 10:28:58,976] [INFO] [logging.py:96:log_dist] [Rank 0] step=2750, skipped=36, lr=[7.405971149640408e-06, 7.405971149640408e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:28:58,994] [INFO] [timer.py:199:stop] epoch=0/micro_step=2750/global_step=2750, RunningAvgSamplesPerSec=106.39908594796809, CurrSamplesPerSec=114.45823124842768, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:28:59,087] [INFO] [logging.py:96:log_dist] [Rank 0] step=2750, skipped=42, lr=[3.842166494110451e-06, 3.842166494110451e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2749|ppo_ep: 1|act_loss: 0.000629425048828125|cri_loss: 0.002536773681640625|unsuper_loss: 0.0
+average reward score: 5.46484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.82%) |Training time=0.44s (20.61%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2750|ppo_ep: 1|act_loss: -0.030181884765625|cri_loss: -0.013763427734375|unsuper_loss: 0.0
+average reward score: 5.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.41%) |Training time=0.45s (21.01%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2751|ppo_ep: 1|act_loss: -0.0014810562133789062|cri_loss: -0.00019550323486328125|unsuper_loss: 0.0
+average reward score: 5.28125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.31%) |Training time=0.45s (21.08%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2752|ppo_ep: 1|act_loss: 0.04644775390625|cri_loss: 0.02606201171875|unsuper_loss: 0.0
+average reward score: 5.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.69%) |Training time=0.45s (19.96%) |Others=0.10 (4.35%)|CurSamplesPerSec=14.13 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2753|ppo_ep: 1|act_loss: 0.08673095703125|cri_loss: 0.046783447265625|unsuper_loss: 0.0
+average reward score: 5.1171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.51%) |Training time=0.45s (20.91%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2754|ppo_ep: 1|act_loss: -0.0129241943359375|cri_loss: -0.00525665283203125|unsuper_loss: 0.0
+average reward score: 6.1953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.83s |Gather latency=0.00s (0.00%) |Generate time=1.60s (56.37%) |Training time=0.45s (15.83%) |Others=0.79 (27.80%)|CurSamplesPerSec=11.29 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2755|ppo_ep: 1|act_loss: 0.030029296875|cri_loss: 0.0172119140625|unsuper_loss: 0.0
+average reward score: 5.08984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.58%) |Training time=0.45s (20.83%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2756|ppo_ep: 1|act_loss: -0.00177001953125|cri_loss: -0.0005936622619628906|unsuper_loss: 0.0
+average reward score: 4.97265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.64%) |Training time=0.45s (20.79%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2757|ppo_ep: 1|act_loss: 0.004314422607421875|cri_loss: 0.00274658203125|unsuper_loss: 0.0
+average reward score: 4.90234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.58%) |Training time=0.45s (20.82%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2758|ppo_ep: 1|act_loss: -0.0126495361328125|cri_loss: -0.0058746337890625|unsuper_loss: 0.0
+average reward score: 5.8046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (21.04%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
+[2023-04-14 10:29:21,250] [INFO] [logging.py:96:log_dist] [Rank 0] step=2760, skipped=36, lr=[7.390256929483354e-06, 7.390256929483354e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:29:21,268] [INFO] [timer.py:199:stop] epoch=0/micro_step=2760/global_step=2760, RunningAvgSamplesPerSec=106.42223778280272, CurrSamplesPerSec=131.88637259169658, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:29:21,361] [INFO] [logging.py:96:log_dist] [Rank 0] step=2760, skipped=42, lr=[3.83403628978903e-06, 3.83403628978903e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2759|ppo_ep: 1|act_loss: -0.024261474609375|cri_loss: -0.00824737548828125|unsuper_loss: 0.0
+average reward score: 5.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.47%) |Training time=0.41s (18.93%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2760|ppo_ep: 1|act_loss: 0.00678253173828125|cri_loss: 0.003955841064453125|unsuper_loss: 0.0
+average reward score: 5.1875
+-------------------------------------------------------------------------------------
+|E2E latency=3.42s |Gather latency=0.00s (0.00%) |Generate time=1.61s (47.01%) |Training time=0.43s (12.67%) |Others=1.38 (40.32%)|CurSamplesPerSec=9.35 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2761|ppo_ep: 1|act_loss: -0.016937255859375|cri_loss: -0.0078582763671875|unsuper_loss: 0.0
+average reward score: 5.7109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.11%) |Training time=0.50s (22.90%) |Others=0.11 (4.99%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2762|ppo_ep: 1|act_loss: 0.0010814666748046875|cri_loss: 0.0017833709716796875|unsuper_loss: 0.0
+average reward score: 4.83203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.38%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2763|ppo_ep: 1|act_loss: 0.028076171875|cri_loss: 0.014312744140625|unsuper_loss: 0.0
+average reward score: 5.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.86%) |Training time=0.46s (21.56%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2764|ppo_ep: 1|act_loss: 0.03607177734375|cri_loss: 0.0192108154296875|unsuper_loss: 0.0
+average reward score: 5.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.92%) |Training time=0.47s (21.53%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2765|ppo_ep: 1|act_loss: 0.00830078125|cri_loss: 0.00431060791015625|unsuper_loss: 0.0
+average reward score: 6.3828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.45s (21.21%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2766|ppo_ep: 1|act_loss: -0.0135498046875|cri_loss: -0.004924774169921875|unsuper_loss: 0.0
+average reward score: 5.12890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.60%) |Training time=0.47s (21.83%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2767|ppo_ep: 1|act_loss: -0.008453369140625|cri_loss: -0.003582000732421875|unsuper_loss: 0.0
+average reward score: 5.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.28%) |Training time=0.48s (22.15%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2768|ppo_ep: 1|act_loss: -0.0152740478515625|cri_loss: -0.006717681884765625|unsuper_loss: 0.0
+average reward score: 5.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.62%) |Training time=0.47s (21.79%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.44
+[2023-04-14 10:29:44,042] [INFO] [logging.py:96:log_dist] [Rank 0] step=2770, skipped=36, lr=[7.37450468603026e-06, 7.37450468603026e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:29:44,060] [INFO] [timer.py:199:stop] epoch=0/micro_step=2770/global_step=2770, RunningAvgSamplesPerSec=106.41993681795651, CurrSamplesPerSec=104.47658694697775, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:29:44,153] [INFO] [logging.py:96:log_dist] [Rank 0] step=2770, skipped=42, lr=[3.825886311831606e-06, 3.825886311831606e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2769|ppo_ep: 1|act_loss: 0.055755615234375|cri_loss: 0.0288848876953125|unsuper_loss: 0.0
+average reward score: 5.76171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.60%) |Training time=0.47s (21.84%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2770|ppo_ep: 1|act_loss: -0.013946533203125|cri_loss: -0.006404876708984375|unsuper_loss: 0.0
+average reward score: 5.30859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.43s |Gather latency=0.00s (0.00%) |Generate time=1.58s (65.02%) |Training time=0.47s (19.46%) |Others=0.38 (15.52%)|CurSamplesPerSec=13.19 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2771|ppo_ep: 1|act_loss: 0.01171875|cri_loss: 0.006114959716796875|unsuper_loss: 0.0
+average reward score: 5.0859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.53%) |Training time=0.47s (21.90%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2772|ppo_ep: 1|act_loss: -0.00611114501953125|cri_loss: -0.0029449462890625|unsuper_loss: 0.0
+average reward score: 5.19921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.20%) |Training time=0.48s (22.24%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2773|ppo_ep: 1|act_loss: 0.00923919677734375|cri_loss: 0.005191802978515625|unsuper_loss: 0.0
+average reward score: 5.0859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.35%) |Training time=0.47s (22.06%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2774|ppo_ep: 1|act_loss: -0.0199432373046875|cri_loss: -0.00905609130859375|unsuper_loss: 0.0
+average reward score: 5.078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.46s (21.26%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2775|ppo_ep: 1|act_loss: -0.026885986328125|cri_loss: -0.01313018798828125|unsuper_loss: 0.0
+average reward score: 5.03515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.07%) |Training time=0.46s (21.31%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2776|ppo_ep: 1|act_loss: -0.0113983154296875|cri_loss: -0.00531005859375|unsuper_loss: 0.0
+average reward score: 5.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.83%) |Training time=0.46s (21.57%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2777|ppo_ep: 1|act_loss: -0.0291748046875|cri_loss: -0.014312744140625|unsuper_loss: 0.0
+average reward score: 5.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.08%) |Training time=0.50s (22.59%) |Others=0.12 (5.33%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2778|ppo_ep: 1|act_loss: 0.044647216796875|cri_loss: 0.0236968994140625|unsuper_loss: 0.0
+average reward score: 5.8828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.46s (21.28%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.44
+[2023-04-14 10:30:05,837] [INFO] [logging.py:96:log_dist] [Rank 0] step=2780, skipped=36, lr=[7.358714652767375e-06, 7.358714652767375e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:30:05,856] [INFO] [timer.py:199:stop] epoch=0/micro_step=2780/global_step=2780, RunningAvgSamplesPerSec=106.41343644976286, CurrSamplesPerSec=105.34027242011247, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:30:05,955] [INFO] [logging.py:96:log_dist] [Rank 0] step=2780, skipped=42, lr=[3.817716681040515e-06, 3.817716681040515e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2779|ppo_ep: 1|act_loss: 0.07403564453125|cri_loss: 0.03955078125|unsuper_loss: 0.0
+average reward score: 5.375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.58%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2780|ppo_ep: 1|act_loss: 0.03387451171875|cri_loss: 0.0186920166015625|unsuper_loss: 0.0
+average reward score: 4.8359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.51%) |Training time=0.46s (21.19%) |Others=0.12 (5.29%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2781|ppo_ep: 1|act_loss: 0.061767578125|cri_loss: 0.032928466796875|unsuper_loss: 0.0
+average reward score: 5.06640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.51%) |Training time=0.45s (20.13%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.21 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2782|ppo_ep: 1|act_loss: -0.02105712890625|cri_loss: -0.007843017578125|unsuper_loss: 0.0
+average reward score: 4.9453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.91%) |Training time=0.46s (21.49%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2783|ppo_ep: 1|act_loss: -0.0007939338684082031|cri_loss: -0.00011396408081054688|unsuper_loss: 0.0
+average reward score: 5.2734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.07%) |Training time=0.46s (21.34%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2784|ppo_ep: 1|act_loss: -0.057586669921875|cri_loss: -0.0283203125|unsuper_loss: 0.0
+average reward score: 5.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.14%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2785|ppo_ep: 1|act_loss: -0.04052734375|cri_loss: -0.019744873046875|unsuper_loss: 0.0
+average reward score: 5.296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.70%) |Training time=0.47s (21.72%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2786|ppo_ep: 1|act_loss: 0.03509521484375|cri_loss: 0.0211029052734375|unsuper_loss: 0.0
+average reward score: 4.828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.43s |Gather latency=0.00s (0.00%) |Generate time=1.58s (65.20%) |Training time=0.46s (19.06%) |Others=0.38 (15.74%)|CurSamplesPerSec=13.17 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2787|ppo_ep: 1|act_loss: -0.0209197998046875|cri_loss: -0.0095672607421875|unsuper_loss: 0.0
+average reward score: 5.1328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.47%) |Training time=0.45s (20.92%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2788|ppo_ep: 1|act_loss: 0.0149383544921875|cri_loss: 0.00780487060546875|unsuper_loss: 0.0
+average reward score: 5.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.78%) |Training time=0.46s (21.62%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.44
+[2023-04-14 10:30:27,697] [INFO] [logging.py:96:log_dist] [Rank 0] step=2790, skipped=36, lr=[7.34288706374108e-06, 7.34288706374108e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:30:27,715] [INFO] [timer.py:199:stop] epoch=0/micro_step=2790/global_step=2790, RunningAvgSamplesPerSec=106.42220408681138, CurrSamplesPerSec=112.92798048000672, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:30:27,808] [INFO] [logging.py:96:log_dist] [Rank 0] step=2790, skipped=42, lr=[3.8095275185093927e-06, 3.8095275185093927e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2789|ppo_ep: 1|act_loss: -0.0347900390625|cri_loss: -0.0169677734375|unsuper_loss: 0.0
+average reward score: 5.078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.51%) |Training time=0.45s (20.90%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2790|ppo_ep: 1|act_loss: -0.03375244140625|cri_loss: -0.01496124267578125|unsuper_loss: 0.0
+average reward score: 5.703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.27%) |Training time=0.45s (21.13%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2791|ppo_ep: 1|act_loss: 0.0228271484375|cri_loss: 0.012115478515625|unsuper_loss: 0.0
+average reward score: 5.27734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.55%) |Training time=0.47s (21.88%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2792|ppo_ep: 1|act_loss: 0.0075836181640625|cri_loss: 0.00460052490234375|unsuper_loss: 0.0
+average reward score: 5.42578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.52s |Gather latency=0.00s (0.00%) |Generate time=1.77s (69.97%) |Training time=0.46s (18.37%) |Others=0.29 (11.66%)|CurSamplesPerSec=12.68 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2793|ppo_ep: 1|act_loss: 0.034515380859375|cri_loss: 0.018157958984375|unsuper_loss: 0.0
+average reward score: 4.91015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.72%) |Training time=0.47s (21.70%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2794|ppo_ep: 1|act_loss: -0.01983642578125|cri_loss: -0.0094757080078125|unsuper_loss: 0.0
+average reward score: 5.43359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.94%) |Training time=0.46s (21.46%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2795|ppo_ep: 1|act_loss: -0.021636962890625|cri_loss: -0.01044464111328125|unsuper_loss: 0.0
+average reward score: 5.48828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.67%) |Training time=0.47s (21.74%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2796|ppo_ep: 1|act_loss: -0.01271820068359375|cri_loss: -0.0045928955078125|unsuper_loss: 0.0
+average reward score: 5.3203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.63%) |Training time=0.47s (21.80%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2797|ppo_ep: 1|act_loss: 0.015655517578125|cri_loss: 0.0091094970703125|unsuper_loss: 0.0
+average reward score: 5.40234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.56%) |Training time=0.47s (21.87%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2798|ppo_ep: 1|act_loss: 0.00893402099609375|cri_loss: 0.004947662353515625|unsuper_loss: 0.0
+average reward score: 5.15234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (21.99%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
+[2023-04-14 10:30:49,522] [INFO] [logging.py:96:log_dist] [Rank 0] step=2800, skipped=36, lr=[7.327022153554431e-06, 7.327022153554431e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:30:49,540] [INFO] [timer.py:199:stop] epoch=0/micro_step=2800/global_step=2800, RunningAvgSamplesPerSec=106.41914468193407, CurrSamplesPerSec=104.58469930026338, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:30:49,633] [INFO] [logging.py:96:log_dist] [Rank 0] step=2800, skipped=42, lr=[3.8013189456213825e-06, 3.8013189456213825e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2799|ppo_ep: 1|act_loss: -0.0113983154296875|cri_loss: -0.00405120849609375|unsuper_loss: 0.0
+average reward score: 5.8203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.61%) |Training time=0.47s (21.83%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2800|ppo_ep: 1|act_loss: -0.0082550048828125|cri_loss: -0.002552032470703125|unsuper_loss: 0.0
+average reward score: 5.296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.96%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2801|ppo_ep: 1|act_loss: -0.04388427734375|cri_loss: -0.0208587646484375|unsuper_loss: 0.0
+average reward score: 5.52734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.67%) |Training time=0.47s (21.75%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2802|ppo_ep: 1|act_loss: -0.00791168212890625|cri_loss: -0.003131866455078125|unsuper_loss: 0.0
+average reward score: 5.01171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.52%) |Training time=0.47s (21.90%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2803|ppo_ep: 1|act_loss: 0.00707244873046875|cri_loss: 0.00591278076171875|unsuper_loss: 0.0
+average reward score: 5.3515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.71%) |Training time=0.47s (21.71%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2804|ppo_ep: 1|act_loss: -0.0084075927734375|cri_loss: -0.003631591796875|unsuper_loss: 0.0
+average reward score: 5.16015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.87s |Gather latency=0.00s (0.00%) |Generate time=1.59s (55.46%) |Training time=0.46s (16.10%) |Others=0.81 (28.43%)|CurSamplesPerSec=11.17 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2805|ppo_ep: 1|act_loss: 0.0189971923828125|cri_loss: 0.0105438232421875|unsuper_loss: 0.0
+average reward score: 4.80859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.33%) |Training time=0.48s (22.09%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2806|ppo_ep: 1|act_loss: 0.057586669921875|cri_loss: 0.0298919677734375|unsuper_loss: 0.0
+average reward score: 5.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.82%) |Training time=0.46s (19.91%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.88 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2807|ppo_ep: 1|act_loss: 0.03485107421875|cri_loss: 0.01806640625|unsuper_loss: 0.0
+average reward score: 5.671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.12%) |Training time=0.46s (21.29%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2808|ppo_ep: 1|act_loss: 0.054534912109375|cri_loss: 0.0288238525390625|unsuper_loss: 0.0
+average reward score: 5.7109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.85%) |Training time=0.46s (21.57%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44
+[2023-04-14 10:31:11,871] [INFO] [logging.py:96:log_dist] [Rank 0] step=2810, skipped=36, lr=[7.311120157363665e-06, 7.311120157363665e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:31:11,889] [INFO] [timer.py:199:stop] epoch=0/micro_step=2810/global_step=2810, RunningAvgSamplesPerSec=106.41754482170897, CurrSamplesPerSec=110.76354693624923, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:31:11,994] [INFO] [logging.py:96:log_dist] [Rank 0] step=2810, skipped=42, lr=[3.7930910840473377e-06, 3.7930910840473377e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2809|ppo_ep: 1|act_loss: 0.0004031658172607422|cri_loss: 0.00038695335388183594|unsuper_loss: 0.0
+average reward score: 5.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.45s (20.91%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2810|ppo_ep: 1|act_loss: 0.01690673828125|cri_loss: 0.0100555419921875|unsuper_loss: 0.0
+average reward score: 5.55859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.63s (71.55%) |Training time=0.55s (24.14%) |Others=0.10 (4.31%)|CurSamplesPerSec=14.04 |AvgSamplesPerSec=14.44
+epoch: 0|step: 2811|ppo_ep: 1|act_loss: -0.0279998779296875|cri_loss: -0.01287841796875|unsuper_loss: 0.0
+average reward score: 5.0
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.59%) |Training time=0.47s (21.83%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2812|ppo_ep: 1|act_loss: -0.038330078125|cri_loss: -0.0186614990234375|unsuper_loss: 0.0
+average reward score: 5.25390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.31%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2813|ppo_ep: 1|act_loss: 0.032928466796875|cri_loss: 0.017578125|unsuper_loss: 0.0
+average reward score: 5.19140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.10%) |Training time=0.48s (22.35%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2814|ppo_ep: 1|act_loss: -0.0133514404296875|cri_loss: -0.006317138671875|unsuper_loss: 0.0
+average reward score: 5.7109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.59%) |Training time=0.47s (21.83%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2815|ppo_ep: 1|act_loss: 0.0244140625|cri_loss: 0.01275634765625|unsuper_loss: 0.0
+average reward score: 5.12109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.89%) |Training time=0.49s (22.57%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2816|ppo_ep: 1|act_loss: -0.0089569091796875|cri_loss: -0.0037841796875|unsuper_loss: 0.0
+average reward score: 5.28515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.28%) |Training time=0.50s (23.21%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2817|ppo_ep: 1|act_loss: -0.0140380859375|cri_loss: -0.006824493408203125|unsuper_loss: 0.0
+average reward score: 5.35546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.02%) |Training time=0.48s (22.39%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2818|ppo_ep: 1|act_loss: 0.002536773681640625|cri_loss: 0.0032253265380859375|unsuper_loss: 0.0
+average reward score: 5.5546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.27%) |Training time=0.48s (22.14%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
+[2023-04-14 10:31:33,595] [INFO] [logging.py:96:log_dist] [Rank 0] step=2820, skipped=36, lr=[7.295181310874729e-06, 7.295181310874729e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:31:33,613] [INFO] [timer.py:199:stop] epoch=0/micro_step=2820/global_step=2820, RunningAvgSamplesPerSec=106.38886203729886, CurrSamplesPerSec=102.92391691103396, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:31:33,832] [INFO] [logging.py:96:log_dist] [Rank 0] step=2820, skipped=42, lr=[3.7848440557440147e-06, 3.7848440557440147e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2819|ppo_ep: 1|act_loss: 0.03643798828125|cri_loss: 0.021148681640625|unsuper_loss: 0.0
+average reward score: 4.859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.61s (69.73%) |Training time=0.60s (26.01%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.88 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2820|ppo_ep: 1|act_loss: -0.03076171875|cri_loss: -0.01486968994140625|unsuper_loss: 0.0
+average reward score: 5.1328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.86%) |Training time=0.46s (21.57%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2821|ppo_ep: 1|act_loss: -0.05303955078125|cri_loss: -0.026031494140625|unsuper_loss: 0.0
+average reward score: 4.98828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.48%) |Training time=0.47s (21.94%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.45
+[2023-04-14 10:31:40,440] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 2822|ppo_ep: 1|act_loss: -0.04193115234375|cri_loss: -0.0196075439453125|unsuper_loss: 0.0
+average reward score: 6.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.45%) |Training time=0.48s (20.77%) |Others=0.09 (3.78%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.45
+[2023-04-14 10:31:42,585] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 2823|ppo_ep: 1|act_loss: 0.017669677734375|cri_loss: 0.0095062255859375|unsuper_loss: 0.0
+average reward score: 5.2734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.55%) |Training time=0.48s (22.38%) |Others=0.09 (4.07%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2824|ppo_ep: 1|act_loss: -0.035980224609375|cri_loss: -0.0175323486328125|unsuper_loss: 0.0
+average reward score: 5.15625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.31%) |Training time=0.48s (22.13%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2825|ppo_ep: 1|act_loss: 0.005672454833984375|cri_loss: 0.003009796142578125|unsuper_loss: 0.0
+average reward score: 5.50390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.29%) |Training time=0.47s (21.70%) |Others=0.13 (6.01%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2826|ppo_ep: 1|act_loss: -0.0008726119995117188|cri_loss: -0.00019288063049316406|unsuper_loss: 0.0
+average reward score: 5.31640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.49%) |Training time=0.47s (21.94%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2827|ppo_ep: 1|act_loss: -0.0009431838989257812|cri_loss: -7.43865966796875e-05|unsuper_loss: 0.0
+average reward score: 4.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (21.99%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2828|ppo_ep: 1|act_loss: 0.00569915771484375|cri_loss: 0.003047943115234375|unsuper_loss: 0.0
+average reward score: 5.125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.33%) |Training time=0.47s (22.10%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
+[2023-04-14 10:31:55,391] [INFO] [logging.py:96:log_dist] [Rank 0] step=2830, skipped=36, lr=[7.2792058503397775e-06, 7.2792058503397775e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:31:55,410] [INFO] [timer.py:199:stop] epoch=0/micro_step=2830/global_step=2830, RunningAvgSamplesPerSec=106.37661744892732, CurrSamplesPerSec=104.80478821218995, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:31:55,502] [INFO] [logging.py:96:log_dist] [Rank 0] step=2830, skipped=44, lr=[3.77823271519263e-06, 3.77823271519263e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2829|ppo_ep: 1|act_loss: 0.0153961181640625|cri_loss: 0.0088958740234375|unsuper_loss: 0.0
+average reward score: 4.9453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.62%) |Training time=0.47s (21.81%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2830|ppo_ep: 1|act_loss: -0.013885498046875|cri_loss: -0.00673675537109375|unsuper_loss: 0.0
+average reward score: 5.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.18%) |Training time=0.48s (22.24%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2831|ppo_ep: 1|act_loss: -0.035552978515625|cri_loss: -0.015960693359375|unsuper_loss: 0.0
+average reward score: 5.71875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.19%) |Training time=0.48s (22.21%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2832|ppo_ep: 1|act_loss: -0.01444244384765625|cri_loss: -0.0068206787109375|unsuper_loss: 0.0
+average reward score: 5.41796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.49%) |Training time=0.47s (21.93%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2833|ppo_ep: 1|act_loss: -0.024932861328125|cri_loss: -0.01194000244140625|unsuper_loss: 0.0
+average reward score: 5.50390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.55%) |Training time=0.47s (21.88%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2834|ppo_ep: 1|act_loss: 0.0003077983856201172|cri_loss: 0.00028443336486816406|unsuper_loss: 0.0
+average reward score: 5.77734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.88%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2835|ppo_ep: 1|act_loss: -0.03741455078125|cri_loss: -0.017669677734375|unsuper_loss: 0.0
+average reward score: 5.36328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.97%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2836|ppo_ep: 1|act_loss: -0.024627685546875|cri_loss: -0.0111541748046875|unsuper_loss: 0.0
+average reward score: 4.69921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.76%) |Training time=0.44s (20.67%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2837|ppo_ep: 1|act_loss: -0.0279388427734375|cri_loss: -0.0135650634765625|unsuper_loss: 0.0
+average reward score: 5.10546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.39%) |Training time=0.47s (20.39%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.45
+[2023-04-14 10:32:14,915] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 2838|ppo_ep: 1|act_loss: -0.03369140625|cri_loss: -0.01508331298828125|unsuper_loss: 0.0
+average reward score: 5.00390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.42%) |Training time=0.45s (20.97%) |Others=0.10 (4.61%)|CurSamplesPerSec=15.06 |AvgSamplesPerSec=14.45
+[2023-04-14 10:32:17,061] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+[2023-04-14 10:32:17,062] [INFO] [logging.py:96:log_dist] [Rank 0] step=2840, skipped=38, lr=[7.266399278906688e-06, 7.266399278906688e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:32:17,062] [INFO] [timer.py:199:stop] epoch=0/micro_step=2840/global_step=2840, RunningAvgSamplesPerSec=106.37877097595695, CurrSamplesPerSec=116.98970678860347, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:32:17,154] [INFO] [logging.py:96:log_dist] [Rank 0] step=2840, skipped=44, lr=[3.769951495013317e-06, 3.769951495013317e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2839|ppo_ep: 1|act_loss: -0.0142669677734375|cri_loss: -0.006763458251953125|unsuper_loss: 0.0
+average reward score: 5.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.15%) |Training time=0.44s (20.29%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2840|ppo_ep: 1|act_loss: -0.00069427490234375|cri_loss: 0.0004968643188476562|unsuper_loss: 0.0
+average reward score: 5.0859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.45s |Gather latency=0.00s (0.00%) |Generate time=1.68s (68.55%) |Training time=0.47s (19.18%) |Others=0.30 (12.27%)|CurSamplesPerSec=13.07 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2841|ppo_ep: 1|act_loss: 0.0382080078125|cri_loss: 0.0198822021484375|unsuper_loss: 0.0
+average reward score: 5.45703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.94%) |Training time=0.46s (21.47%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2842|ppo_ep: 1|act_loss: 0.035308837890625|cri_loss: 0.0183868408203125|unsuper_loss: 0.0
+average reward score: 5.484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.11%) |Training time=0.48s (22.33%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2843|ppo_ep: 1|act_loss: -0.0150146484375|cri_loss: -0.007251739501953125|unsuper_loss: 0.0
+average reward score: 4.95703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.51%) |Training time=0.47s (21.91%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2844|ppo_ep: 1|act_loss: -0.037109375|cri_loss: -0.017974853515625|unsuper_loss: 0.0
+average reward score: 4.9296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.36%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2845|ppo_ep: 1|act_loss: -0.0166168212890625|cri_loss: -0.007476806640625|unsuper_loss: 0.0
+average reward score: 5.37109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.96%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2846|ppo_ep: 1|act_loss: -0.0270538330078125|cri_loss: -0.01276397705078125|unsuper_loss: 0.0
+average reward score: 5.375
+-------------------------------------------------------------------------------------
+|E2E latency=3.05s |Gather latency=0.00s (0.00%) |Generate time=1.58s (51.83%) |Training time=0.46s (15.23%) |Others=1.00 (32.94%)|CurSamplesPerSec=10.50 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2847|ppo_ep: 1|act_loss: -0.033111572265625|cri_loss: -0.01593017578125|unsuper_loss: 0.0
+average reward score: 5.75
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.66%) |Training time=0.47s (21.74%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2848|ppo_ep: 1|act_loss: -0.0134124755859375|cri_loss: -0.006237030029296875|unsuper_loss: 0.0
+average reward score: 5.95703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.77%) |Training time=0.47s (21.72%) |Others=0.12 (5.51%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.45
+[2023-04-14 10:32:39,758] [INFO] [logging.py:96:log_dist] [Rank 0] step=2850, skipped=38, lr=[7.2503585101744275e-06, 7.2503585101744275e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:32:39,776] [INFO] [timer.py:199:stop] epoch=0/micro_step=2850/global_step=2850, RunningAvgSamplesPerSec=106.37187852437273, CurrSamplesPerSec=110.91549223611467, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:32:39,869] [INFO] [logging.py:96:log_dist] [Rank 0] step=2850, skipped=44, lr=[3.7616514510892553e-06, 3.7616514510892553e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2849|ppo_ep: 1|act_loss: -0.0085906982421875|cri_loss: -0.002605438232421875|unsuper_loss: 0.0
+average reward score: 5.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.57%) |Training time=0.45s (20.86%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2850|ppo_ep: 1|act_loss: 0.0133056640625|cri_loss: 0.00717926025390625|unsuper_loss: 0.0
+average reward score: 5.35546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.34%) |Training time=0.48s (22.14%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2851|ppo_ep: 1|act_loss: 0.03900146484375|cri_loss: 0.021392822265625|unsuper_loss: 0.0
+average reward score: 5.0234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.86%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2852|ppo_ep: 1|act_loss: 0.02392578125|cri_loss: 0.01236724853515625|unsuper_loss: 0.0
+average reward score: 5.390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.74s (75.60%) |Training time=0.46s (20.13%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.87 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2853|ppo_ep: 1|act_loss: 0.0272216796875|cri_loss: 0.01763916015625|unsuper_loss: 0.0
+average reward score: 4.8515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.47s (21.99%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2854|ppo_ep: 1|act_loss: 0.0024585723876953125|cri_loss: 0.001834869384765625|unsuper_loss: 0.0
+average reward score: 5.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.67%) |Training time=0.49s (22.60%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2855|ppo_ep: 1|act_loss: -0.029083251953125|cri_loss: -0.0135650634765625|unsuper_loss: 0.0
+average reward score: 4.68359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.96%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2856|ppo_ep: 1|act_loss: -0.001056671142578125|cri_loss: -0.0002918243408203125|unsuper_loss: 0.0
+average reward score: 5.53515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.86%) |Training time=0.49s (22.56%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2857|ppo_ep: 1|act_loss: -0.02532958984375|cri_loss: -0.01251983642578125|unsuper_loss: 0.0
+average reward score: 4.75390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.86%) |Training time=0.49s (22.59%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2858|ppo_ep: 1|act_loss: -0.033660888671875|cri_loss: -0.0146331787109375|unsuper_loss: 0.0
+average reward score: 5.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.95s |Gather latency=0.00s (0.00%) |Generate time=1.58s (53.57%) |Training time=0.50s (16.83%) |Others=0.87 (29.60%)|CurSamplesPerSec=10.85 |AvgSamplesPerSec=14.45
+[2023-04-14 10:33:02,332] [INFO] [logging.py:96:log_dist] [Rank 0] step=2860, skipped=38, lr=[7.234281791778185e-06, 7.234281791778185e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:33:02,351] [INFO] [timer.py:199:stop] epoch=0/micro_step=2860/global_step=2860, RunningAvgSamplesPerSec=106.34976605325895, CurrSamplesPerSec=101.42614741121662, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:33:02,444] [INFO] [logging.py:96:log_dist] [Rank 0] step=2860, skipped=44, lr=[3.7533327064471216e-06, 3.7533327064471216e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2859|ppo_ep: 1|act_loss: 0.00896453857421875|cri_loss: 0.00466156005859375|unsuper_loss: 0.0
+average reward score: 5.48828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.30%) |Training time=0.48s (22.12%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2860|ppo_ep: 1|act_loss: 0.04473876953125|cri_loss: 0.0229949951171875|unsuper_loss: 0.0
+average reward score: 5.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.49s (22.40%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2861|ppo_ep: 1|act_loss: -0.00661468505859375|cri_loss: -0.002811431884765625|unsuper_loss: 0.0
+average reward score: 5.1328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.71%) |Training time=0.49s (22.76%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2862|ppo_ep: 1|act_loss: 0.0772705078125|cri_loss: 0.0430908203125|unsuper_loss: 0.0
+average reward score: 5.4453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.78%) |Training time=0.46s (21.63%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2863|ppo_ep: 1|act_loss: 0.0021076202392578125|cri_loss: 0.0012416839599609375|unsuper_loss: 0.0
+average reward score: 5.20703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.69%) |Training time=0.47s (21.70%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2864|ppo_ep: 1|act_loss: -0.037628173828125|cri_loss: -0.017913818359375|unsuper_loss: 0.0
+average reward score: 5.50390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.47s (21.85%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2865|ppo_ep: 1|act_loss: -0.0204315185546875|cri_loss: -0.0097503662109375|unsuper_loss: 0.0
+average reward score: 4.85546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.06%) |Training time=0.48s (22.38%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2866|ppo_ep: 1|act_loss: -0.007228851318359375|cri_loss: -0.0034275054931640625|unsuper_loss: 0.0
+average reward score: 4.875
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.63%) |Training time=0.49s (21.17%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2867|ppo_ep: 1|act_loss: -0.015838623046875|cri_loss: -0.007709503173828125|unsuper_loss: 0.0
+average reward score: 5.40625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.04%) |Training time=0.49s (22.43%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2868|ppo_ep: 1|act_loss: -0.0140533447265625|cri_loss: -0.006755828857421875|unsuper_loss: 0.0
+average reward score: 5.16796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.38%) |Training time=0.49s (22.14%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.45
+[2023-04-14 10:33:24,269] [INFO] [logging.py:96:log_dist] [Rank 0] step=2870, skipped=38, lr=[7.2181693620137145e-06, 7.2181693620137145e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:33:24,287] [INFO] [timer.py:199:stop] epoch=0/micro_step=2870/global_step=2870, RunningAvgSamplesPerSec=106.32509058550373, CurrSamplesPerSec=95.30749151435104, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:33:24,380] [INFO] [logging.py:96:log_dist] [Rank 0] step=2870, skipped=44, lr=[3.7449953843907764e-06, 3.7449953843907764e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2869|ppo_ep: 1|act_loss: -0.019073486328125|cri_loss: -0.00899505615234375|unsuper_loss: 0.0
+average reward score: 5.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.68s (73.84%) |Training time=0.50s (21.87%) |Others=0.10 (4.29%)|CurSamplesPerSec=14.05 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2870|ppo_ep: 1|act_loss: -0.05633544921875|cri_loss: -0.0259857177734375|unsuper_loss: 0.0
+average reward score: 5.1171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.62%) |Training time=0.50s (22.87%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2871|ppo_ep: 1|act_loss: 0.0181427001953125|cri_loss: 0.00942230224609375|unsuper_loss: 0.0
+average reward score: 5.8125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.89%) |Training time=0.49s (22.57%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2872|ppo_ep: 1|act_loss: 0.0108642578125|cri_loss: 0.005611419677734375|unsuper_loss: 0.0
+average reward score: 5.11328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.69%) |Training time=0.49s (22.71%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2873|ppo_ep: 1|act_loss: 0.064453125|cri_loss: 0.033233642578125|unsuper_loss: 0.0
+average reward score: 5.5234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.75%) |Training time=0.49s (22.73%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2874|ppo_ep: 1|act_loss: 0.0105133056640625|cri_loss: 0.00571441650390625|unsuper_loss: 0.0
+average reward score: 5.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.26%) |Training time=0.48s (22.20%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2875|ppo_ep: 1|act_loss: -0.03668212890625|cri_loss: -0.0172271728515625|unsuper_loss: 0.0
+average reward score: 5.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.14%) |Training time=0.48s (22.30%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2876|ppo_ep: 1|act_loss: 0.009979248046875|cri_loss: 0.00543212890625|unsuper_loss: 0.0
+average reward score: 5.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.12%) |Training time=0.48s (22.32%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2877|ppo_ep: 1|act_loss: -0.024017333984375|cri_loss: -0.01128387451171875|unsuper_loss: 0.0
+average reward score: 5.1328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.71%) |Training time=0.49s (22.76%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2878|ppo_ep: 1|act_loss: -0.0335693359375|cri_loss: -0.0164337158203125|unsuper_loss: 0.0
+average reward score: 4.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.78%) |Training time=0.49s (22.33%) |Others=0.13 (5.88%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.45
+[2023-04-14 10:33:45,963] [INFO] [logging.py:96:log_dist] [Rank 0] step=2880, skipped=38, lr=[7.20202145970609e-06, 7.20202145970609e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:33:45,981] [INFO] [timer.py:199:stop] epoch=0/micro_step=2880/global_step=2880, RunningAvgSamplesPerSec=106.29718321043626, CurrSamplesPerSec=108.31802231928128, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:33:46,081] [INFO] [logging.py:96:log_dist] [Rank 0] step=2880, skipped=44, lr=[3.736639608499448e-06, 3.736639608499448e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2879|ppo_ep: 1|act_loss: -0.027557373046875|cri_loss: -0.01270294189453125|unsuper_loss: 0.0
+average reward score: 5.40625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.23%) |Others=0.11 (4.89%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2880|ppo_ep: 1|act_loss: -0.0287933349609375|cri_loss: -0.0141143798828125|unsuper_loss: 0.0
+average reward score: 5.84765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.13%) |Training time=0.46s (21.27%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2881|ppo_ep: 1|act_loss: -0.0151824951171875|cri_loss: -0.006744384765625|unsuper_loss: 0.0
+average reward score: 5.1640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.76s (76.27%) |Training time=0.45s (19.46%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.88 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2882|ppo_ep: 1|act_loss: 0.0058746337890625|cri_loss: 0.003101348876953125|unsuper_loss: 0.0
+average reward score: 5.203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.02%) |Training time=0.46s (21.41%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2883|ppo_ep: 1|act_loss: 0.01129150390625|cri_loss: 0.0064697265625|unsuper_loss: 0.0
+average reward score: 5.9609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.90%) |Training time=0.46s (21.50%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2884|ppo_ep: 1|act_loss: -0.00933837890625|cri_loss: -0.003841400146484375|unsuper_loss: 0.0
+average reward score: 5.28125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.35%) |Training time=0.45s (21.05%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2885|ppo_ep: 1|act_loss: -0.0268402099609375|cri_loss: -0.0126953125|unsuper_loss: 0.0
+average reward score: 5.79296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.70%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2886|ppo_ep: 1|act_loss: 0.00786590576171875|cri_loss: 0.004169464111328125|unsuper_loss: 0.0
+average reward score: 5.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.73%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2887|ppo_ep: 1|act_loss: -0.00860595703125|cri_loss: -0.004085540771484375|unsuper_loss: 0.0
+average reward score: 5.53515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.96%) |Training time=0.49s (22.49%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2888|ppo_ep: 1|act_loss: 0.017120361328125|cri_loss: 0.00911712646484375|unsuper_loss: 0.0
+average reward score: 5.16015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.43%) |Training time=0.45s (20.87%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.45
+[2023-04-14 10:34:07,712] [INFO] [logging.py:96:log_dist] [Rank 0] step=2890, skipped=38, lr=[7.185838324206182e-06, 7.185838324206182e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:34:07,730] [INFO] [timer.py:199:stop] epoch=0/micro_step=2890/global_step=2890, RunningAvgSamplesPerSec=106.30418029404586, CurrSamplesPerSec=124.88239437340486, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:34:07,822] [INFO] [logging.py:96:log_dist] [Rank 0] step=2890, skipped=44, lr=[3.7282655026258895e-06, 3.7282655026258895e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2889|ppo_ep: 1|act_loss: -0.0262298583984375|cri_loss: -0.01256561279296875|unsuper_loss: 0.0
+average reward score: 4.9765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.13%) |Training time=0.42s (19.31%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2890|ppo_ep: 1|act_loss: -0.00704193115234375|cri_loss: -0.0017490386962890625|unsuper_loss: 0.0
+average reward score: 5.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.68%) |Training time=0.47s (21.77%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2891|ppo_ep: 1|act_loss: 0.01342010498046875|cri_loss: 0.0097503662109375|unsuper_loss: 0.0
+average reward score: 5.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.30%) |Training time=0.46s (21.15%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2892|ppo_ep: 1|act_loss: 0.007518768310546875|cri_loss: 0.00547027587890625|unsuper_loss: 0.0
+average reward score: 5.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.92%) |Training time=0.47s (21.54%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2893|ppo_ep: 1|act_loss: 0.09027099609375|cri_loss: 0.05419921875|unsuper_loss: 0.0
+average reward score: 5.16015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.78%) |Training time=0.47s (21.65%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2894|ppo_ep: 1|act_loss: 0.01409912109375|cri_loss: 0.00913238525390625|unsuper_loss: 0.0
+average reward score: 3.931640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.80%) |Training time=0.43s (19.65%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2895|ppo_ep: 1|act_loss: 0.0149078369140625|cri_loss: 0.0116729736328125|unsuper_loss: 0.0
+average reward score: 5.12109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.99%) |Training time=0.44s (20.34%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2896|ppo_ep: 1|act_loss: 0.025390625|cri_loss: 0.01457977294921875|unsuper_loss: 0.0
+average reward score: 4.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.22%) |Training time=0.48s (20.54%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2897|ppo_ep: 1|act_loss: 0.033203125|cri_loss: 0.0169677734375|unsuper_loss: 0.0
+average reward score: 5.1640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.75%) |Training time=0.48s (21.76%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2898|ppo_ep: 1|act_loss: 0.0023441314697265625|cri_loss: 0.00432586669921875|unsuper_loss: 0.0
+average reward score: 5.03125
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.66%) |Training time=0.48s (21.01%) |Others=0.10 (4.33%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.45
+[2023-04-14 10:34:29,698] [INFO] [logging.py:96:log_dist] [Rank 0] step=2900, skipped=38, lr=[7.169620195387097e-06, 7.169620195387097e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:34:29,717] [INFO] [timer.py:199:stop] epoch=0/micro_step=2900/global_step=2900, RunningAvgSamplesPerSec=106.30332220155994, CurrSamplesPerSec=100.94465398682478, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:34:29,809] [INFO] [logging.py:96:log_dist] [Rank 0] step=2900, skipped=44, lr=[3.719873190894554e-06, 3.719873190894554e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2899|ppo_ep: 1|act_loss: 0.0145416259765625|cri_loss: 0.007717132568359375|unsuper_loss: 0.0
+average reward score: 5.48046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.26%) |Training time=0.48s (22.21%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2900|ppo_ep: 1|act_loss: 0.007343292236328125|cri_loss: 0.0048675537109375|unsuper_loss: 0.0
+average reward score: 5.23828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.67%) |Training time=0.47s (21.79%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2901|ppo_ep: 1|act_loss: 0.00492095947265625|cri_loss: 0.00273895263671875|unsuper_loss: 0.0
+average reward score: 5.69140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.82%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2902|ppo_ep: 1|act_loss: 0.000827789306640625|cri_loss: 0.001277923583984375|unsuper_loss: 0.0
+average reward score: 5.17578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.50s |Gather latency=0.00s (0.00%) |Generate time=1.59s (63.82%) |Training time=0.47s (18.99%) |Others=0.43 (17.19%)|CurSamplesPerSec=12.82 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2903|ppo_ep: 1|act_loss: -0.0103912353515625|cri_loss: -0.004791259765625|unsuper_loss: 0.0
+average reward score: 5.3671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.74%) |Training time=0.47s (21.73%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2904|ppo_ep: 1|act_loss: 0.003299713134765625|cri_loss: 0.0017385482788085938|unsuper_loss: 0.0
+average reward score: 5.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.48s (21.97%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2905|ppo_ep: 1|act_loss: -0.005413055419921875|cri_loss: -0.0025043487548828125|unsuper_loss: 0.0
+average reward score: 5.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.85%) |Training time=0.47s (21.60%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2906|ppo_ep: 1|act_loss: 0.001773834228515625|cri_loss: 0.0010547637939453125|unsuper_loss: 0.0
+average reward score: 5.65625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.86%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2907|ppo_ep: 1|act_loss: 0.0262451171875|cri_loss: 0.01409912109375|unsuper_loss: 0.0
+average reward score: 5.90234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.80%) |Training time=0.49s (22.18%) |Others=0.13 (6.02%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2908|ppo_ep: 1|act_loss: 0.00876617431640625|cri_loss: 0.0050048828125|unsuper_loss: 0.0
+average reward score: 5.6953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.33%) |Training time=0.45s (20.96%) |Others=0.10 (4.71%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.45
+[2023-04-14 10:34:51,677] [INFO] [logging.py:96:log_dist] [Rank 0] step=2910, skipped=38, lr=[7.1533673136406274e-06, 7.1533673136406274e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:34:51,695] [INFO] [timer.py:199:stop] epoch=0/micro_step=2910/global_step=2910, RunningAvgSamplesPerSec=106.29643390840222, CurrSamplesPerSec=104.84285647330844, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:34:51,788] [INFO] [logging.py:96:log_dist] [Rank 0] step=2910, skipped=44, lr=[3.7114627976997454e-06, 3.7114627976997454e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2909|ppo_ep: 1|act_loss: 0.01393890380859375|cri_loss: 0.008758544921875|unsuper_loss: 0.0
+average reward score: 5.07421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.86%) |Training time=0.47s (21.60%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2910|ppo_ep: 1|act_loss: -0.00762939453125|cri_loss: -0.0035247802734375|unsuper_loss: 0.0
+average reward score: 5.40234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.95%) |Training time=0.49s (22.51%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2911|ppo_ep: 1|act_loss: 0.01641845703125|cri_loss: 0.00928497314453125|unsuper_loss: 0.0
+average reward score: 5.37109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.58s (70.96%) |Training time=0.48s (21.42%) |Others=0.17 (7.62%)|CurSamplesPerSec=14.33 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2912|ppo_ep: 1|act_loss: -0.03759765625|cri_loss: -0.0178375244140625|unsuper_loss: 0.0
+average reward score: 5.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.15%) |Training time=0.48s (22.16%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2913|ppo_ep: 1|act_loss: -0.00995635986328125|cri_loss: -0.004405975341796875|unsuper_loss: 0.0
+average reward score: 5.09375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (22.01%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2914|ppo_ep: 1|act_loss: -0.023712158203125|cri_loss: -0.0115509033203125|unsuper_loss: 0.0
+average reward score: 5.6171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.62s |Gather latency=0.00s (0.00%) |Generate time=1.58s (60.47%) |Training time=0.48s (18.30%) |Others=0.56 (21.22%)|CurSamplesPerSec=12.22 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2915|ppo_ep: 1|act_loss: -0.03619384765625|cri_loss: -0.0172119140625|unsuper_loss: 0.0
+average reward score: 5.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.13%) |Training time=0.48s (22.30%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2916|ppo_ep: 1|act_loss: 0.022613525390625|cri_loss: 0.014739990234375|unsuper_loss: 0.0
+average reward score: 5.29296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.51%) |Training time=0.47s (21.95%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2917|ppo_ep: 1|act_loss: 0.03131103515625|cri_loss: 0.016082763671875|unsuper_loss: 0.0
+average reward score: 5.28125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.23%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2918|ppo_ep: 1|act_loss: 0.025970458984375|cri_loss: 0.0135650634765625|unsuper_loss: 0.0
+average reward score: 5.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.46%) |Training time=0.47s (21.99%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
+[2023-04-14 10:35:13,853] [INFO] [logging.py:96:log_dist] [Rank 0] step=2920, skipped=38, lr=[7.1370799198736894e-06, 7.1370799198736894e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:35:13,872] [INFO] [timer.py:199:stop] epoch=0/micro_step=2920/global_step=2920, RunningAvgSamplesPerSec=106.27708410449475, CurrSamplesPerSec=102.26821669159538, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:35:13,964] [INFO] [logging.py:96:log_dist] [Rank 0] step=2920, skipped=44, lr=[3.7030344477037794e-06, 3.7030344477037794e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2919|ppo_ep: 1|act_loss: -0.017486572265625|cri_loss: -0.00734710693359375|unsuper_loss: 0.0
+average reward score: 5.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.47%) |Training time=0.48s (22.00%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2920|ppo_ep: 1|act_loss: 0.004459381103515625|cri_loss: 0.003025054931640625|unsuper_loss: 0.0
+average reward score: 5.109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.59s (67.90%) |Training time=0.48s (20.49%) |Others=0.27 (11.61%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2921|ppo_ep: 1|act_loss: -0.0131072998046875|cri_loss: -0.005886077880859375|unsuper_loss: 0.0
+average reward score: 5.5
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.82%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2922|ppo_ep: 1|act_loss: -0.017913818359375|cri_loss: -0.008392333984375|unsuper_loss: 0.0
+average reward score: 5.14453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.65%) |Training time=0.48s (21.85%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2923|ppo_ep: 1|act_loss: 0.001232147216796875|cri_loss: 0.0011682510375976562|unsuper_loss: 0.0
+average reward score: 5.41015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (22.04%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.45
+[2023-04-14 10:35:24,979] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 2924|ppo_ep: 1|act_loss: 0.05810546875|cri_loss: 0.03289794921875|unsuper_loss: 0.0
+average reward score: 5.9375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.47s (21.72%) |Others=0.09 (4.08%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
+[2023-04-14 10:35:27,132] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 2925|ppo_ep: 1|act_loss: 0.0648193359375|cri_loss: 0.034698486328125|unsuper_loss: 0.0
+average reward score: 5.36328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.48s (22.17%) |Others=0.09 (4.08%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2926|ppo_ep: 1|act_loss: 0.02801513671875|cri_loss: 0.01515960693359375|unsuper_loss: 0.0
+average reward score: 5.953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.83s |Gather latency=0.00s (0.00%) |Generate time=1.78s (62.85%) |Training time=0.47s (16.68%) |Others=0.58 (20.46%)|CurSamplesPerSec=11.31 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2927|ppo_ep: 1|act_loss: 0.0149688720703125|cri_loss: 0.00780487060546875|unsuper_loss: 0.0
+average reward score: 5.49609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.58%) |Training time=0.48s (21.07%) |Others=0.10 (4.35%)|CurSamplesPerSec=14.12 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2928|ppo_ep: 1|act_loss: 0.00994110107421875|cri_loss: 0.0059051513671875|unsuper_loss: 0.0
+average reward score: 5.9296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.05%) |Training time=0.49s (22.43%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.45
+[2023-04-14 10:35:36,456] [INFO] [logging.py:96:log_dist] [Rank 0] step=2930, skipped=38, lr=[7.120758255504751e-06, 7.120758255504751e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:35:36,474] [INFO] [timer.py:199:stop] epoch=0/micro_step=2930/global_step=2930, RunningAvgSamplesPerSec=106.26202270186438, CurrSamplesPerSec=101.59874100814726, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:35:36,567] [INFO] [logging.py:96:log_dist] [Rank 0] step=2930, skipped=46, lr=[3.6962789227532165e-06, 3.6962789227532165e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2929|ppo_ep: 1|act_loss: 0.00696563720703125|cri_loss: 0.004253387451171875|unsuper_loss: 0.0
+average reward score: 5.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.48s (22.09%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2930|ppo_ep: 1|act_loss: 0.020294189453125|cri_loss: 0.01128387451171875|unsuper_loss: 0.0
+average reward score: 5.12890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.48s (22.01%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2931|ppo_ep: 1|act_loss: -0.0184783935546875|cri_loss: -0.00890350341796875|unsuper_loss: 0.0
+average reward score: 4.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.48s (22.00%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2932|ppo_ep: 1|act_loss: -0.0265960693359375|cri_loss: -0.01244354248046875|unsuper_loss: 0.0
+average reward score: 5.16015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.22%) |Training time=0.48s (22.19%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2933|ppo_ep: 1|act_loss: -0.001399993896484375|cri_loss: -0.00023174285888671875|unsuper_loss: 0.0
+average reward score: 5.28515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.12%) |Training time=0.48s (22.34%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2934|ppo_ep: 1|act_loss: -0.023223876953125|cri_loss: -0.01146697998046875|unsuper_loss: 0.0
+average reward score: 5.21484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.34%) |Training time=0.48s (22.11%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2935|ppo_ep: 1|act_loss: -0.043487548828125|cri_loss: -0.021331787109375|unsuper_loss: 0.0
+average reward score: 4.94140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.13%) |Training time=0.48s (22.32%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2936|ppo_ep: 1|act_loss: -0.0113525390625|cri_loss: -0.00528717041015625|unsuper_loss: 0.0
+average reward score: 5.296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.17%) |Training time=0.48s (22.27%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2937|ppo_ep: 1|act_loss: 0.0017881393432617188|cri_loss: 0.00162506103515625|unsuper_loss: 0.0
+average reward score: 5.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.14%) |Training time=0.48s (22.32%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2938|ppo_ep: 1|act_loss: 0.004360198974609375|cri_loss: 0.002471923828125|unsuper_loss: 0.0
+average reward score: 5.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.99s |Gather latency=0.00s (0.00%) |Generate time=1.58s (53.00%) |Training time=0.48s (16.19%) |Others=0.92 (30.80%)|CurSamplesPerSec=10.72 |AvgSamplesPerSec=14.45
+[2023-04-14 10:35:58,887] [INFO] [logging.py:96:log_dist] [Rank 0] step=2940, skipped=38, lr=[7.10440256246025e-06, 7.10440256246025e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:35:58,905] [INFO] [timer.py:199:stop] epoch=0/micro_step=2940/global_step=2940, RunningAvgSamplesPerSec=106.2466257993554, CurrSamplesPerSec=115.49485676915266, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:35:58,998] [INFO] [logging.py:96:log_dist] [Rank 0] step=2940, skipped=46, lr=[3.6878185655126163e-06, 3.6878185655126163e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2939|ppo_ep: 1|act_loss: 0.02459716796875|cri_loss: 0.0127716064453125|unsuper_loss: 0.0
+average reward score: 5.41796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.94%) |Training time=0.44s (20.48%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
+[2023-04-14 10:36:01,038] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 2940|ppo_ep: 1|act_loss: -0.0093536376953125|cri_loss: -0.0039825439453125|unsuper_loss: 0.0
+average reward score: 4.94921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.23%) |Training time=0.45s (21.17%) |Others=0.10 (4.60%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.45
+[2023-04-14 10:36:03,170] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 2941|ppo_ep: 1|act_loss: 0.01451873779296875|cri_loss: 0.007579803466796875|unsuper_loss: 0.0
+average reward score: 5.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.31%) |Training time=0.45s (21.09%) |Others=0.10 (4.60%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2942|ppo_ep: 1|act_loss: 0.003055572509765625|cri_loss: 0.002071380615234375|unsuper_loss: 0.0
+average reward score: 5.15234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.25%) |Training time=0.48s (20.52%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2943|ppo_ep: 1|act_loss: -0.0129547119140625|cri_loss: -0.00557708740234375|unsuper_loss: 0.0
+average reward score: 5.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.23%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2944|ppo_ep: 1|act_loss: -0.022705078125|cri_loss: -0.0099639892578125|unsuper_loss: 0.0
+average reward score: 5.234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.18%) |Training time=0.48s (22.27%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2945|ppo_ep: 1|act_loss: -0.002780914306640625|cri_loss: -0.001194000244140625|unsuper_loss: 0.0
+average reward score: 5.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.04%) |Training time=0.49s (22.42%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2946|ppo_ep: 1|act_loss: -0.0325927734375|cri_loss: -0.01580810546875|unsuper_loss: 0.0
+average reward score: 4.828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.34%) |Training time=0.48s (22.11%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2947|ppo_ep: 1|act_loss: 0.043701171875|cri_loss: 0.02294921875|unsuper_loss: 0.0
+average reward score: 5.7421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.23%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2948|ppo_ep: 1|act_loss: 0.03558349609375|cri_loss: 0.018707275390625|unsuper_loss: 0.0
+average reward score: 5.06640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.19%) |Training time=0.48s (22.26%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.45
+[2023-04-14 10:36:20,616] [INFO] [logging.py:96:log_dist] [Rank 0] step=2950, skipped=40, lr=[7.091293670274905e-06, 7.091293670274905e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:36:20,635] [INFO] [timer.py:199:stop] epoch=0/micro_step=2950/global_step=2950, RunningAvgSamplesPerSec=106.23428625978279, CurrSamplesPerSec=103.20907689290974, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:36:20,728] [INFO] [logging.py:96:log_dist] [Rank 0] step=2950, skipped=46, lr=[3.679340601935418e-06, 3.679340601935418e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2949|ppo_ep: 1|act_loss: 0.055755615234375|cri_loss: 0.029510498046875|unsuper_loss: 0.0
+average reward score: 5.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.89%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2950|ppo_ep: 1|act_loss: 0.02801513671875|cri_loss: 0.0146636962890625|unsuper_loss: 0.0
+average reward score: 5.12890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (22.08%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2951|ppo_ep: 1|act_loss: 0.028778076171875|cri_loss: 0.0155487060546875|unsuper_loss: 0.0
+average reward score: 4.46484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.21%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2952|ppo_ep: 1|act_loss: 0.002716064453125|cri_loss: 0.002109527587890625|unsuper_loss: 0.0
+average reward score: 5.390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.47%) |Training time=0.47s (21.98%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2953|ppo_ep: 1|act_loss: -0.032562255859375|cri_loss: -0.0157012939453125|unsuper_loss: 0.0
+average reward score: 4.70703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.31%) |Training time=0.48s (22.15%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2954|ppo_ep: 1|act_loss: -0.05206298828125|cri_loss: -0.0253143310546875|unsuper_loss: 0.0
+average reward score: 5.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.78%) |Training time=0.47s (21.65%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2955|ppo_ep: 1|act_loss: -0.016204833984375|cri_loss: -0.007656097412109375|unsuper_loss: 0.0
+average reward score: 5.7578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.76%) |Training time=0.47s (21.52%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2956|ppo_ep: 1|act_loss: -0.006183624267578125|cri_loss: -0.0024566650390625|unsuper_loss: 0.0
+average reward score: 5.4921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.62s (70.42%) |Training time=0.52s (22.78%) |Others=0.16 (6.80%)|CurSamplesPerSec=13.89 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2957|ppo_ep: 1|act_loss: 0.0180206298828125|cri_loss: 0.00919342041015625|unsuper_loss: 0.0
+average reward score: 5.56640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.63s (69.64%) |Training time=0.47s (20.24%) |Others=0.24 (10.12%)|CurSamplesPerSec=13.63 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2958|ppo_ep: 1|act_loss: 0.006927490234375|cri_loss: 0.0037059783935546875|unsuper_loss: 0.0
+average reward score: 5.43359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.30%) |Training time=0.48s (22.15%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
+[2023-04-14 10:36:42,565] [INFO] [logging.py:96:log_dist] [Rank 0] step=2960, skipped=40, lr=[7.07487733687673e-06, 7.07487733687673e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:36:42,583] [INFO] [timer.py:199:stop] epoch=0/micro_step=2960/global_step=2960, RunningAvgSamplesPerSec=106.21938333389814, CurrSamplesPerSec=99.69147854428071, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:36:42,675] [INFO] [logging.py:96:log_dist] [Rank 0] step=2960, skipped=46, lr=[3.6708451576854964e-06, 3.6708451576854964e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2959|ppo_ep: 1|act_loss: 0.02166748046875|cri_loss: 0.011566162109375|unsuper_loss: 0.0
+average reward score: 5.56640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.01%) |Training time=0.48s (22.45%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2960|ppo_ep: 1|act_loss: 0.03948974609375|cri_loss: 0.0203857421875|unsuper_loss: 0.0
+average reward score: 5.27734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.48s (22.39%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2961|ppo_ep: 1|act_loss: 0.0328369140625|cri_loss: 0.0174560546875|unsuper_loss: 0.0
+average reward score: 5.90625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.39%) |Training time=0.47s (22.04%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2962|ppo_ep: 1|act_loss: -0.00704193115234375|cri_loss: -0.003192901611328125|unsuper_loss: 0.0
+average reward score: 5.66015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.13%) |Training time=0.48s (22.31%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2963|ppo_ep: 1|act_loss: 0.02923583984375|cri_loss: 0.0168609619140625|unsuper_loss: 0.0
+average reward score: 4.71484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.21%) |Training time=0.48s (22.24%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2964|ppo_ep: 1|act_loss: -0.00560760498046875|cri_loss: -0.002147674560546875|unsuper_loss: 0.0
+average reward score: 5.55859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.48s (22.09%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2965|ppo_ep: 1|act_loss: -0.017333984375|cri_loss: -0.00814056396484375|unsuper_loss: 0.0
+average reward score: 5.6328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.00%) |Training time=0.48s (22.45%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2966|ppo_ep: 1|act_loss: -0.05963134765625|cri_loss: -0.0282135009765625|unsuper_loss: 0.0
+average reward score: 5.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.21%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2967|ppo_ep: 1|act_loss: 0.060333251953125|cri_loss: 0.032135009765625|unsuper_loss: 0.0
+average reward score: 4.875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.19%) |Training time=0.48s (22.26%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2968|ppo_ep: 1|act_loss: -0.0307159423828125|cri_loss: -0.014678955078125|unsuper_loss: 0.0
+average reward score: 5.6328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.17%) |Training time=0.48s (22.29%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45
+[2023-04-14 10:37:04,143] [INFO] [logging.py:96:log_dist] [Rank 0] step=2970, skipped=40, lr=[7.0584276548688896e-06, 7.0584276548688896e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:37:04,161] [INFO] [timer.py:199:stop] epoch=0/micro_step=2970/global_step=2970, RunningAvgSamplesPerSec=106.20216768200422, CurrSamplesPerSec=108.36122310905577, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:37:04,254] [INFO] [logging.py:96:log_dist] [Rank 0] step=2970, skipped=46, lr=[3.662332358685826e-06, 3.662332358685826e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2969|ppo_ep: 1|act_loss: -0.0328369140625|cri_loss: -0.0156707763671875|unsuper_loss: 0.0
+average reward score: 5.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.26%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2970|ppo_ep: 1|act_loss: -0.018341064453125|cri_loss: -0.00885772705078125|unsuper_loss: 0.0
+average reward score: 5.56640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.75%) |Training time=0.49s (22.72%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2971|ppo_ep: 1|act_loss: 9.191036224365234e-05|cri_loss: 0.0001608133316040039|unsuper_loss: 0.0
+average reward score: 5.35546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.31%) |Training time=0.48s (22.13%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2972|ppo_ep: 1|act_loss: -0.028167724609375|cri_loss: -0.0137939453125|unsuper_loss: 0.0
+average reward score: 5.28515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.07%) |Training time=0.48s (20.72%) |Others=0.10 (4.21%)|CurSamplesPerSec=13.70 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2973|ppo_ep: 1|act_loss: -0.0010089874267578125|cri_loss: 0.0002651214599609375|unsuper_loss: 0.0
+average reward score: 5.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.25%) |Training time=0.48s (22.19%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2974|ppo_ep: 1|act_loss: 0.0003681182861328125|cri_loss: 0.00141143798828125|unsuper_loss: 0.0
+average reward score: 5.4453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.22%) |Training time=0.48s (22.23%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2975|ppo_ep: 1|act_loss: -0.0010356903076171875|cri_loss: -0.00015783309936523438|unsuper_loss: 0.0
+average reward score: 5.5859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.11%) |Training time=0.48s (22.34%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2976|ppo_ep: 1|act_loss: 0.00823211669921875|cri_loss: 0.004405975341796875|unsuper_loss: 0.0
+average reward score: 5.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.13%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2977|ppo_ep: 1|act_loss: -0.0009083747863769531|cri_loss: -0.00011587142944335938|unsuper_loss: 0.0
+average reward score: 5.25
+-------------------------------------------------------------------------------------
+|E2E latency=2.51s |Gather latency=0.00s (0.00%) |Generate time=1.58s (63.02%) |Training time=0.48s (19.08%) |Others=0.45 (17.90%)|CurSamplesPerSec=12.75 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2978|ppo_ep: 1|act_loss: -0.031768798828125|cri_loss: -0.01482391357421875|unsuper_loss: 0.0
+average reward score: 5.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.06s |Gather latency=0.00s (0.00%) |Generate time=1.58s (76.46%) |Training time=0.39s (18.75%) |Others=0.10 (4.78%)|CurSamplesPerSec=15.52 |AvgSamplesPerSec=14.45
+[2023-04-14 10:37:26,167] [INFO] [logging.py:96:log_dist] [Rank 0] step=2980, skipped=40, lr=[7.041944868075353e-06, 7.041944868075353e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:37:26,185] [INFO] [timer.py:199:stop] epoch=0/micro_step=2980/global_step=2980, RunningAvgSamplesPerSec=106.19269914626885, CurrSamplesPerSec=101.53371919119633, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:37:26,278] [INFO] [logging.py:96:log_dist] [Rank 0] step=2980, skipped=46, lr=[3.6538023311166273e-06, 3.6538023311166273e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2979|ppo_ep: 1|act_loss: -0.02227783203125|cri_loss: -0.0102081298828125|unsuper_loss: 0.0
+average reward score: 5.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.30%) |Training time=0.48s (22.14%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2980|ppo_ep: 1|act_loss: -0.004032135009765625|cri_loss: -0.0018053054809570312|unsuper_loss: 0.0
+average reward score: 5.68359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.11%) |Training time=0.44s (20.34%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2981|ppo_ep: 1|act_loss: -0.040283203125|cri_loss: -0.018829345703125|unsuper_loss: 0.0
+average reward score: 5.66015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.08%) |Training time=0.46s (21.36%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2982|ppo_ep: 1|act_loss: -0.0193023681640625|cri_loss: -0.00920867919921875|unsuper_loss: 0.0
+average reward score: 5.58984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.84%) |Training time=0.47s (21.62%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2983|ppo_ep: 1|act_loss: -0.01230621337890625|cri_loss: -0.00579071044921875|unsuper_loss: 0.0
+average reward score: 5.50390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.05%) |Training time=0.44s (20.41%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2984|ppo_ep: 1|act_loss: 0.021392822265625|cri_loss: 0.01100921630859375|unsuper_loss: 0.0
+average reward score: 5.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.57s |Gather latency=0.00s (0.00%) |Generate time=1.62s (63.31%) |Training time=0.44s (17.34%) |Others=0.50 (19.34%)|CurSamplesPerSec=12.48 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2985|ppo_ep: 1|act_loss: 0.02447509765625|cri_loss: 0.01253509521484375|unsuper_loss: 0.0
+average reward score: 6.1328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.84%) |Training time=0.46s (20.90%) |Others=0.12 (5.27%)|CurSamplesPerSec=14.49 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2986|ppo_ep: 1|act_loss: 0.01194000244140625|cri_loss: 0.007659912109375|unsuper_loss: 0.0
+average reward score: 4.7265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.39s |Gather latency=0.00s (0.00%) |Generate time=1.84s (76.86%) |Training time=0.45s (19.00%) |Others=0.10 (4.14%)|CurSamplesPerSec=13.40 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2987|ppo_ep: 1|act_loss: 0.0084075927734375|cri_loss: 0.0046234130859375|unsuper_loss: 0.0
+average reward score: 5.3203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.16%) |Training time=0.46s (21.27%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2988|ppo_ep: 1|act_loss: 0.04998779296875|cri_loss: 0.026336669921875|unsuper_loss: 0.0
+average reward score: 5.23046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.96%) |Training time=0.46s (21.48%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
+[2023-04-14 10:37:48,439] [INFO] [logging.py:96:log_dist] [Rank 0] step=2990, skipped=40, lr=[7.025429220810784e-06, 7.025429220810784e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:37:48,458] [INFO] [timer.py:199:stop] epoch=0/micro_step=2990/global_step=2990, RunningAvgSamplesPerSec=106.20281446283651, CurrSamplesPerSec=107.48080335693007, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:37:48,550] [INFO] [logging.py:96:log_dist] [Rank 0] step=2990, skipped=46, lr=[3.6452552014134834e-06, 3.6452552014134834e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2989|ppo_ep: 1|act_loss: 0.017669677734375|cri_loss: 0.00970458984375|unsuper_loss: 0.0
+average reward score: 6.1328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.09%) |Training time=0.46s (21.36%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2990|ppo_ep: 1|act_loss: -0.0224609375|cri_loss: -0.01080322265625|unsuper_loss: 0.0
+average reward score: 6.0859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.96%) |Training time=0.46s (21.48%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2991|ppo_ep: 1|act_loss: -0.01204681396484375|cri_loss: -0.00508880615234375|unsuper_loss: 0.0
+average reward score: 5.8046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.02%) |Training time=0.46s (21.41%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2992|ppo_ep: 1|act_loss: 0.0279541015625|cri_loss: 0.01557159423828125|unsuper_loss: 0.0
+average reward score: 5.4921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.92%) |Training time=0.47s (21.52%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2993|ppo_ep: 1|act_loss: 0.0250701904296875|cri_loss: 0.015655517578125|unsuper_loss: 0.0
+average reward score: 5.125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.96%) |Training time=0.46s (21.49%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2994|ppo_ep: 1|act_loss: -0.00518035888671875|cri_loss: -0.00228118896484375|unsuper_loss: 0.0
+average reward score: 5.3046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.46s (21.44%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2995|ppo_ep: 1|act_loss: -0.0118560791015625|cri_loss: -0.00574493408203125|unsuper_loss: 0.0
+average reward score: 5.39453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.08s |Gather latency=0.00s (0.00%) |Generate time=1.60s (76.67%) |Training time=0.39s (18.57%) |Others=0.10 (4.76%)|CurSamplesPerSec=15.37 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2996|ppo_ep: 1|act_loss: 0.04974365234375|cri_loss: 0.03350830078125|unsuper_loss: 0.0
+average reward score: 5.8125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.43%) |Training time=0.45s (21.01%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2997|ppo_ep: 1|act_loss: 0.0386962890625|cri_loss: 0.020751953125|unsuper_loss: 0.0
+average reward score: 5.3125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.68%) |Training time=0.45s (20.76%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
+epoch: 0|step: 2998|ppo_ep: 1|act_loss: -0.0017347335815429688|cri_loss: -0.000568389892578125|unsuper_loss: 0.0
+average reward score: 5.55078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.26%) |Training time=0.46s (21.17%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
+[2023-04-14 10:38:10,330] [INFO] [logging.py:96:log_dist] [Rank 0] step=3000, skipped=40, lr=[7.008880957876916e-06, 7.008880957876916e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:38:10,349] [INFO] [timer.py:199:stop] epoch=0/micro_step=3000/global_step=3000, RunningAvgSamplesPerSec=106.215874634767, CurrSamplesPerSec=105.73411932827526, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:38:10,442] [INFO] [logging.py:96:log_dist] [Rank 0] step=3000, skipped=46, lr=[3.6366910962654785e-06, 3.6366910962654785e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 2999|ppo_ep: 1|act_loss: -0.0080108642578125|cri_loss: -0.0035762786865234375|unsuper_loss: 0.0
+average reward score: 5.44921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.05%) |Training time=0.47s (21.43%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3000|ppo_ep: 1|act_loss: 0.01529693603515625|cri_loss: 0.009674072265625|unsuper_loss: 0.0
+average reward score: 5.953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.07%) |Training time=0.46s (21.29%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3001|ppo_ep: 1|act_loss: 0.02740478515625|cri_loss: 0.01422882080078125|unsuper_loss: 0.0
+average reward score: 5.08203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.25%) |Training time=0.45s (19.50%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.82 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3002|ppo_ep: 1|act_loss: 0.023345947265625|cri_loss: 0.01261138916015625|unsuper_loss: 0.0
+average reward score: 5.57421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.16%) |Training time=0.46s (21.28%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3003|ppo_ep: 1|act_loss: 0.005279541015625|cri_loss: 0.00919342041015625|unsuper_loss: 0.0
+average reward score: 5.19921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.55%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3004|ppo_ep: 1|act_loss: -0.025146484375|cri_loss: -0.01141357421875|unsuper_loss: 0.0
+average reward score: 5.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.81s |Gather latency=0.00s (0.00%) |Generate time=1.58s (56.22%) |Training time=0.47s (16.82%) |Others=0.76 (26.95%)|CurSamplesPerSec=11.40 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3005|ppo_ep: 1|act_loss: 0.056640625|cri_loss: 0.031097412109375|unsuper_loss: 0.0
+average reward score: 5.23046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.57%) |Training time=0.47s (21.85%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3006|ppo_ep: 1|act_loss: -0.0225372314453125|cri_loss: -0.0108642578125|unsuper_loss: 0.0
+average reward score: 5.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (22.00%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45
+[2023-04-14 10:38:28,354] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
+[2023-04-14 10:38:28,440] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
+epoch: 0|step: 3007|ppo_ep: 1|act_loss: 0.048126220703125|cri_loss: 0.026763916015625|unsuper_loss: 0.0
+average reward score: 5.30859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.66%) |Training time=0.45s (21.13%) |Others=0.09 (4.22%)|CurSamplesPerSec=15.13 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3008|ppo_ep: 1|act_loss: -0.053314208984375|cri_loss: -0.0260467529296875|unsuper_loss: 0.0
+average reward score: 5.1328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.76%) |Training time=0.46s (21.67%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45
+[2023-04-14 10:38:32,622] [INFO] [logging.py:96:log_dist] [Rank 0] step=3010, skipped=41, lr=[6.993959837557451e-06, 6.993959837557451e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:38:32,640] [INFO] [timer.py:199:stop] epoch=0/micro_step=3010/global_step=3010, RunningAvgSamplesPerSec=106.2159635052104, CurrSamplesPerSec=102.30953312005147, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:38:32,733] [INFO] [logging.py:96:log_dist] [Rank 0] step=3010, skipped=47, lr=[3.628968992538233e-06, 3.628968992538233e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3009|ppo_ep: 1|act_loss: 0.02825927734375|cri_loss: 0.01508331298828125|unsuper_loss: 0.0
+average reward score: 4.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.34%) |Training time=0.48s (22.10%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3010|ppo_ep: 1|act_loss: -0.006519317626953125|cri_loss: -0.001796722412109375|unsuper_loss: 0.0
+average reward score: 5.40234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.48s (22.07%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3011|ppo_ep: 1|act_loss: -0.007564544677734375|cri_loss: -0.0034637451171875|unsuper_loss: 0.0
+average reward score: 5.6328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.39%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3012|ppo_ep: 1|act_loss: 0.0518798828125|cri_loss: 0.0274505615234375|unsuper_loss: 0.0
+average reward score: 4.03515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.96%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3013|ppo_ep: 1|act_loss: -0.00540924072265625|cri_loss: 0.00273895263671875|unsuper_loss: 0.0
+average reward score: 4.515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.28%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3014|ppo_ep: 1|act_loss: -0.0091400146484375|cri_loss: -0.003551483154296875|unsuper_loss: 0.0
+average reward score: 5.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.69%) |Training time=0.46s (20.81%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3015|ppo_ep: 1|act_loss: 0.028167724609375|cri_loss: 0.015838623046875|unsuper_loss: 0.0
+average reward score: 4.796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.69s (75.25%) |Training time=0.46s (20.36%) |Others=0.10 (4.39%)|CurSamplesPerSec=14.28 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3016|ppo_ep: 1|act_loss: 0.06402587890625|cri_loss: 0.035064697265625|unsuper_loss: 0.0
+average reward score: 4.43359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.45s |Gather latency=0.00s (0.00%) |Generate time=1.75s (71.39%) |Training time=0.47s (19.05%) |Others=0.23 (9.56%)|CurSamplesPerSec=13.05 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3017|ppo_ep: 1|act_loss: -0.022430419921875|cri_loss: -0.00969696044921875|unsuper_loss: 0.0
+average reward score: 4.5703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.60%) |Training time=0.47s (21.83%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3018|ppo_ep: 1|act_loss: 0.001911163330078125|cri_loss: 0.0020999908447265625|unsuper_loss: 0.0
+average reward score: 5.29296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.73%) |Training time=0.47s (21.69%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
+[2023-04-14 10:38:54,564] [INFO] [logging.py:96:log_dist] [Rank 0] step=3020, skipped=41, lr=[6.977350281009214e-06, 6.977350281009214e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:38:54,583] [INFO] [timer.py:199:stop] epoch=0/micro_step=3020/global_step=3020, RunningAvgSamplesPerSec=106.21456349820467, CurrSamplesPerSec=103.59215705645721, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:38:54,675] [INFO] [logging.py:96:log_dist] [Rank 0] step=3020, skipped=47, lr=[3.6203729839730567e-06, 3.6203729839730567e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3019|ppo_ep: 1|act_loss: 0.04150390625|cri_loss: 0.02349853515625|unsuper_loss: 0.0
+average reward score: 5.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.55%) |Training time=0.47s (21.90%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3020|ppo_ep: 1|act_loss: 0.0745849609375|cri_loss: 0.041107177734375|unsuper_loss: 0.0
+average reward score: 4.56640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.70%) |Training time=0.47s (21.73%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3021|ppo_ep: 1|act_loss: 0.0186767578125|cri_loss: 0.0098724365234375|unsuper_loss: 0.0
+average reward score: 5.328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.64%) |Training time=0.47s (21.79%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3022|ppo_ep: 1|act_loss: -0.0171051025390625|cri_loss: -0.00785064697265625|unsuper_loss: 0.0
+average reward score: 5.6640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.97%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3023|ppo_ep: 1|act_loss: 0.01088714599609375|cri_loss: 0.006252288818359375|unsuper_loss: 0.0
+average reward score: 5.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.61%) |Training time=0.47s (21.82%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3024|ppo_ep: 1|act_loss: 0.0006403923034667969|cri_loss: 0.0006952285766601562|unsuper_loss: 0.0
+average reward score: 4.79296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.54%) |Training time=0.47s (21.89%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3025|ppo_ep: 1|act_loss: -0.03167724609375|cri_loss: -0.01506805419921875|unsuper_loss: 0.0
+average reward score: 5.296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.80%) |Training time=0.47s (21.64%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3026|ppo_ep: 1|act_loss: -0.0455322265625|cri_loss: -0.021087646484375|unsuper_loss: 0.0
+average reward score: 5.57421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.74%) |Training time=0.47s (21.68%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3027|ppo_ep: 1|act_loss: -0.01092529296875|cri_loss: -0.00153350830078125|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.34%) |Training time=0.48s (22.09%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3028|ppo_ep: 1|act_loss: -0.04278564453125|cri_loss: -0.0206298828125|unsuper_loss: 0.0
+average reward score: 5.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.58s (66.99%) |Training time=0.47s (19.91%) |Others=0.31 (13.10%)|CurSamplesPerSec=13.54 |AvgSamplesPerSec=14.45
+[2023-04-14 10:39:16,295] [INFO] [logging.py:96:log_dist] [Rank 0] step=3030, skipped=41, lr=[6.96070882143755e-06, 6.96070882143755e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:39:16,313] [INFO] [timer.py:199:stop] epoch=0/micro_step=3030/global_step=3030, RunningAvgSamplesPerSec=106.2086986497776, CurrSamplesPerSec=107.58660138577608, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:39:16,406] [INFO] [logging.py:96:log_dist] [Rank 0] step=3030, skipped=47, lr=[3.6117603687775048e-06, 3.6117603687775048e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3029|ppo_ep: 1|act_loss: 0.015106201171875|cri_loss: 0.007755279541015625|unsuper_loss: 0.0
+average reward score: 4.8203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.18%) |Training time=0.46s (21.27%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3030|ppo_ep: 1|act_loss: 0.0186767578125|cri_loss: 0.0110931396484375|unsuper_loss: 0.0
+average reward score: 4.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.56%) |Training time=0.47s (21.87%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3031|ppo_ep: 1|act_loss: 0.0296630859375|cri_loss: 0.016082763671875|unsuper_loss: 0.0
+average reward score: 4.80078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.74s (75.28%) |Training time=0.47s (20.47%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3032|ppo_ep: 1|act_loss: 0.04290771484375|cri_loss: 0.0226593017578125|unsuper_loss: 0.0
+average reward score: 5.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.62%) |Training time=0.47s (21.81%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3033|ppo_ep: 1|act_loss: 0.0767822265625|cri_loss: 0.041412353515625|unsuper_loss: 0.0
+average reward score: 5.0078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.86%) |Training time=0.46s (21.58%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3034|ppo_ep: 1|act_loss: 0.033294677734375|cri_loss: 0.0171356201171875|unsuper_loss: 0.0
+average reward score: 6.3203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.44s |Gather latency=0.00s (0.00%) |Generate time=1.58s (64.85%) |Training time=0.47s (19.34%) |Others=0.39 (15.81%)|CurSamplesPerSec=13.13 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3035|ppo_ep: 1|act_loss: 0.04876708984375|cri_loss: 0.027008056640625|unsuper_loss: 0.0
+average reward score: 5.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.50%) |Training time=0.47s (21.93%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3036|ppo_ep: 1|act_loss: -0.03179931640625|cri_loss: -0.015411376953125|unsuper_loss: 0.0
+average reward score: 5.8828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.41%) |Training time=0.47s (21.87%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3037|ppo_ep: 1|act_loss: -0.0127716064453125|cri_loss: -0.00620269775390625|unsuper_loss: 0.0
+average reward score: 5.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.47s (22.07%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3038|ppo_ep: 1|act_loss: 0.031005859375|cri_loss: 0.0171051025390625|unsuper_loss: 0.0
+average reward score: 4.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.24%) |Training time=0.46s (21.21%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.45
+[2023-04-14 10:39:38,258] [INFO] [logging.py:96:log_dist] [Rank 0] step=3040, skipped=41, lr=[6.944035705509035e-06, 6.944035705509035e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:39:38,277] [INFO] [timer.py:199:stop] epoch=0/micro_step=3040/global_step=3040, RunningAvgSamplesPerSec=106.20304657307675, CurrSamplesPerSec=106.6089857439244, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:39:38,455] [INFO] [logging.py:96:log_dist] [Rank 0] step=3040, skipped=47, lr=[3.603131274611312e-06, 3.603131274611312e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3039|ppo_ep: 1|act_loss: -0.053680419921875|cri_loss: -0.0252685546875|unsuper_loss: 0.0
+average reward score: 5.46484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.13%) |Training time=0.55s (24.52%) |Others=0.10 (4.35%)|CurSamplesPerSec=14.29 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3040|ppo_ep: 1|act_loss: -0.044677734375|cri_loss: -0.021453857421875|unsuper_loss: 0.0
+average reward score: 5.109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.84%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3041|ppo_ep: 1|act_loss: 0.014801025390625|cri_loss: 0.0083770751953125|unsuper_loss: 0.0
+average reward score: 4.95703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.75%) |Training time=0.47s (21.67%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3042|ppo_ep: 1|act_loss: -0.0051422119140625|cri_loss: -0.001735687255859375|unsuper_loss: 0.0
+average reward score: 4.796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.45%) |Training time=0.47s (22.00%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3043|ppo_ep: 1|act_loss: 0.008453369140625|cri_loss: 0.00479888916015625|unsuper_loss: 0.0
+average reward score: 5.5546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.49%) |Training time=0.48s (22.00%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3044|ppo_ep: 1|act_loss: -0.00157928466796875|cri_loss: 4.9591064453125e-05|unsuper_loss: 0.0
+average reward score: 5.6328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.42%) |Training time=0.46s (20.21%) |Others=0.10 (4.37%)|CurSamplesPerSec=14.10 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3045|ppo_ep: 1|act_loss: 0.012237548828125|cri_loss: 0.00685882568359375|unsuper_loss: 0.0
+average reward score: 4.84765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.93%) |Training time=0.52s (23.55%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3046|ppo_ep: 1|act_loss: 0.008270263671875|cri_loss: 0.004291534423828125|unsuper_loss: 0.0
+average reward score: 5.58984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.56%) |Training time=0.47s (21.86%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3047|ppo_ep: 1|act_loss: -3.62396240234375e-05|cri_loss: 0.0002925395965576172|unsuper_loss: 0.0
+average reward score: 4.546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.50%) |Training time=0.47s (21.94%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3048|ppo_ep: 1|act_loss: 0.0095367431640625|cri_loss: 0.005397796630859375|unsuper_loss: 0.0
+average reward score: 5.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.68s |Gather latency=0.00s (0.00%) |Generate time=1.58s (58.98%) |Training time=0.47s (17.46%) |Others=0.63 (23.56%)|CurSamplesPerSec=11.93 |AvgSamplesPerSec=14.45
+[2023-04-14 10:40:00,590] [INFO] [logging.py:96:log_dist] [Rank 0] step=3050, skipped=41, lr=[6.92733118035947e-06, 6.92733118035947e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:40:00,608] [INFO] [timer.py:199:stop] epoch=0/micro_step=3050/global_step=3050, RunningAvgSamplesPerSec=106.19061216110067, CurrSamplesPerSec=103.65696085806124, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:40:00,701] [INFO] [logging.py:96:log_dist] [Rank 0] step=3050, skipped=47, lr=[3.59448582937847e-06, 3.59448582937847e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3049|ppo_ep: 1|act_loss: 0.01837158203125|cri_loss: 0.0163421630859375|unsuper_loss: 0.0
+average reward score: 4.5546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.53%) |Training time=0.47s (21.92%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3050|ppo_ep: 1|act_loss: 0.0167694091796875|cri_loss: 0.00876617431640625|unsuper_loss: 0.0
+average reward score: 5.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.44%) |Training time=0.47s (22.00%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3051|ppo_ep: 1|act_loss: 0.0504150390625|cri_loss: 0.0277099609375|unsuper_loss: 0.0
+average reward score: 4.2109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.44%) |Training time=0.47s (21.99%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3052|ppo_ep: 1|act_loss: 0.044677734375|cri_loss: 0.0234375|unsuper_loss: 0.0
+average reward score: 5.44921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.37%) |Training time=0.47s (22.07%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3053|ppo_ep: 1|act_loss: 0.0081939697265625|cri_loss: 0.0041656494140625|unsuper_loss: 0.0
+average reward score: 5.09765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.99%) |Training time=0.46s (21.44%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3054|ppo_ep: 1|act_loss: 0.019744873046875|cri_loss: 0.01055145263671875|unsuper_loss: 0.0
+average reward score: 5.13671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.52%) |Training time=0.47s (21.90%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3055|ppo_ep: 1|act_loss: 0.00218963623046875|cri_loss: 0.001293182373046875|unsuper_loss: 0.0
+average reward score: 5.0
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.11%) |Training time=0.48s (22.35%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3056|ppo_ep: 1|act_loss: -0.0211181640625|cri_loss: -0.01013946533203125|unsuper_loss: 0.0
+average reward score: 5.37109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.47s (22.05%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3057|ppo_ep: 1|act_loss: 0.020233154296875|cri_loss: 0.0114288330078125|unsuper_loss: 0.0
+average reward score: 4.92578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.48s (22.09%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3058|ppo_ep: 1|act_loss: 0.026153564453125|cri_loss: 0.01445770263671875|unsuper_loss: 0.0
+average reward score: 4.8203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.14%) |Training time=0.46s (21.32%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.45
+[2023-04-14 10:40:22,136] [INFO] [logging.py:96:log_dist] [Rank 0] step=3060, skipped=41, lr=[6.910595493590213e-06, 6.910595493590213e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:40:22,155] [INFO] [timer.py:199:stop] epoch=0/micro_step=3060/global_step=3060, RunningAvgSamplesPerSec=106.18132775281478, CurrSamplesPerSec=103.46374930236718, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:40:22,248] [INFO] [logging.py:96:log_dist] [Rank 0] step=3060, skipped=47, lr=[3.585824161225334e-06, 3.585824161225334e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3059|ppo_ep: 1|act_loss: -0.00453948974609375|cri_loss: -0.0020771026611328125|unsuper_loss: 0.0
+average reward score: 5.078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.72%) |Training time=0.47s (21.74%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3060|ppo_ep: 1|act_loss: -0.017425537109375|cri_loss: -0.00827789306640625|unsuper_loss: 0.0
+average reward score: 5.203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.45%) |Training time=0.47s (21.98%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3061|ppo_ep: 1|act_loss: 0.00922393798828125|cri_loss: 0.004856109619140625|unsuper_loss: 0.0
+average reward score: 5.1015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.33%) |Training time=0.47s (20.44%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.80 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3062|ppo_ep: 1|act_loss: -0.030548095703125|cri_loss: -0.0146484375|unsuper_loss: 0.0
+average reward score: 4.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.42%) |Training time=0.47s (22.02%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3063|ppo_ep: 1|act_loss: 0.0113067626953125|cri_loss: 0.00652313232421875|unsuper_loss: 0.0
+average reward score: 4.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.57%) |Training time=0.47s (21.87%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3064|ppo_ep: 1|act_loss: 0.04510498046875|cri_loss: 0.024749755859375|unsuper_loss: 0.0
+average reward score: 4.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.58s (69.11%) |Training time=0.48s (20.89%) |Others=0.23 (10.00%)|CurSamplesPerSec=14.03 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3065|ppo_ep: 1|act_loss: -0.027740478515625|cri_loss: -0.0132293701171875|unsuper_loss: 0.0
+average reward score: 4.984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.48s (21.99%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3066|ppo_ep: 1|act_loss: 0.017669677734375|cri_loss: 0.0101318359375|unsuper_loss: 0.0
+average reward score: 4.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.86%) |Training time=0.46s (21.56%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3067|ppo_ep: 1|act_loss: -0.018463134765625|cri_loss: -0.009002685546875|unsuper_loss: 0.0
+average reward score: 5.54296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.47s (21.92%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3068|ppo_ep: 1|act_loss: -0.0201416015625|cri_loss: -0.009796142578125|unsuper_loss: 0.0
+average reward score: 5.29296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.68%) |Training time=0.47s (21.75%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
+[2023-04-14 10:40:43,959] [INFO] [logging.py:96:log_dist] [Rank 0] step=3070, skipped=41, lr=[6.893828893264516e-06, 6.893828893264516e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:40:43,978] [INFO] [timer.py:199:stop] epoch=0/micro_step=3070/global_step=3070, RunningAvgSamplesPerSec=106.17196197617312, CurrSamplesPerSec=103.21756958575386, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:40:44,070] [INFO] [logging.py:96:log_dist] [Rank 0] step=3070, skipped=47, lr=[3.57714639853872e-06, 3.57714639853872e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3069|ppo_ep: 1|act_loss: 0.0130615234375|cri_loss: 0.00708770751953125|unsuper_loss: 0.0
+average reward score: 5.18359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.90%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3070|ppo_ep: 1|act_loss: -0.003082275390625|cri_loss: -0.0013217926025390625|unsuper_loss: 0.0
+average reward score: 4.98046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.58s (69.34%) |Training time=0.47s (20.42%) |Others=0.23 (10.24%)|CurSamplesPerSec=14.02 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3071|ppo_ep: 1|act_loss: 0.0140533447265625|cri_loss: 0.007343292236328125|unsuper_loss: 0.0
+average reward score: 4.70703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.78%) |Training time=0.46s (21.64%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3072|ppo_ep: 1|act_loss: 0.0193023681640625|cri_loss: 0.01043701171875|unsuper_loss: 0.0
+average reward score: 4.890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.92%) |Training time=0.46s (21.54%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3073|ppo_ep: 1|act_loss: 0.00827789306640625|cri_loss: 0.00452423095703125|unsuper_loss: 0.0
+average reward score: 5.23828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.62s (72.16%) |Training time=0.47s (20.93%) |Others=0.15 (6.90%)|CurSamplesPerSec=14.25 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3074|ppo_ep: 1|act_loss: -0.0176544189453125|cri_loss: -0.00861358642578125|unsuper_loss: 0.0
+average reward score: 4.89453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.30%) |Training time=0.46s (21.10%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3075|ppo_ep: 1|act_loss: -0.0271453857421875|cri_loss: -0.013336181640625|unsuper_loss: 0.0
+average reward score: 5.03125
+-------------------------------------------------------------------------------------
+|E2E latency=2.86s |Gather latency=0.00s (0.00%) |Generate time=1.76s (61.70%) |Training time=0.47s (16.32%) |Others=0.63 (21.97%)|CurSamplesPerSec=11.20 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3076|ppo_ep: 1|act_loss: 0.005237579345703125|cri_loss: 0.003314971923828125|unsuper_loss: 0.0
+average reward score: 5.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.45%) |Training time=0.47s (21.97%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3077|ppo_ep: 1|act_loss: -0.03656005859375|cri_loss: -0.0175323486328125|unsuper_loss: 0.0
+average reward score: 4.71875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.13%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3078|ppo_ep: 1|act_loss: 0.0016117095947265625|cri_loss: 0.00209808349609375|unsuper_loss: 0.0
+average reward score: 4.59375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.75%) |Training time=0.47s (21.68%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
+[2023-04-14 10:41:06,415] [INFO] [logging.py:96:log_dist] [Rank 0] step=3080, skipped=41, lr=[6.8770316279038435e-06, 6.8770316279038435e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:41:06,433] [INFO] [timer.py:199:stop] epoch=0/micro_step=3080/global_step=3080, RunningAvgSamplesPerSec=106.16864078143239, CurrSamplesPerSec=106.1309223235332, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:41:06,525] [INFO] [logging.py:96:log_dist] [Rank 0] step=3080, skipped=47, lr=[3.568452669944006e-06, 3.568452669944006e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3079|ppo_ep: 1|act_loss: 0.00951385498046875|cri_loss: 0.01016998291015625|unsuper_loss: 0.0
+average reward score: 4.57421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.68%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3080|ppo_ep: 1|act_loss: 0.019134521484375|cri_loss: 0.00991058349609375|unsuper_loss: 0.0
+average reward score: 4.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.53%) |Training time=0.47s (21.90%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3081|ppo_ep: 1|act_loss: 0.0150146484375|cri_loss: 0.0078277587890625|unsuper_loss: 0.0
+average reward score: 4.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.53s |Gather latency=0.00s (0.00%) |Generate time=1.58s (62.59%) |Training time=0.46s (18.38%) |Others=0.48 (19.03%)|CurSamplesPerSec=12.66 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3082|ppo_ep: 1|act_loss: 0.027618408203125|cri_loss: 0.01419830322265625|unsuper_loss: 0.0
+average reward score: 5.3828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.97%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3083|ppo_ep: 1|act_loss: -0.0113525390625|cri_loss: -0.00547027587890625|unsuper_loss: 0.0
+average reward score: 5.3828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.98%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3084|ppo_ep: 1|act_loss: -0.04022216796875|cri_loss: -0.019439697265625|unsuper_loss: 0.0
+average reward score: 4.796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.42%) |Training time=0.47s (22.03%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3085|ppo_ep: 1|act_loss: 0.004192352294921875|cri_loss: 0.0023860931396484375|unsuper_loss: 0.0
+average reward score: 4.58984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.70%) |Training time=0.47s (21.74%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3086|ppo_ep: 1|act_loss: -0.01071929931640625|cri_loss: -0.00405120849609375|unsuper_loss: 0.0
+average reward score: 5.36328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.34%) |Training time=0.47s (22.09%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3087|ppo_ep: 1|act_loss: -0.0206756591796875|cri_loss: -0.010009765625|unsuper_loss: 0.0
+average reward score: 4.9453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.58s (70.70%) |Training time=0.48s (21.37%) |Others=0.18 (7.92%)|CurSamplesPerSec=14.35 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3088|ppo_ep: 1|act_loss: -0.0251617431640625|cri_loss: -0.01202392578125|unsuper_loss: 0.0
+average reward score: 4.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.10%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
+[2023-04-14 10:41:28,381] [INFO] [logging.py:96:log_dist] [Rank 0] step=3090, skipped=41, lr=[6.860203946484189e-06, 6.860203946484189e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:41:28,399] [INFO] [timer.py:199:stop] epoch=0/micro_step=3090/global_step=3090, RunningAvgSamplesPerSec=106.15902057491887, CurrSamplesPerSec=102.21190862761702, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:41:28,492] [INFO] [logging.py:96:log_dist] [Rank 0] step=3090, skipped=47, lr=[3.5597431043032205e-06, 3.5597431043032205e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3089|ppo_ep: 1|act_loss: -0.025787353515625|cri_loss: -0.01262664794921875|unsuper_loss: 0.0
+average reward score: 5.16796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.48s (21.97%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3090|ppo_ep: 1|act_loss: -0.0203094482421875|cri_loss: -0.009735107421875|unsuper_loss: 0.0
+average reward score: 4.5546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.37%) |Training time=0.47s (20.41%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.77 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3091|ppo_ep: 1|act_loss: 0.02978515625|cri_loss: 0.0167083740234375|unsuper_loss: 0.0
+average reward score: 4.8125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.56%) |Training time=0.47s (21.88%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3092|ppo_ep: 1|act_loss: -0.0156097412109375|cri_loss: -0.007183074951171875|unsuper_loss: 0.0
+average reward score: 4.328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.52%) |Training time=0.47s (21.90%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3093|ppo_ep: 1|act_loss: -0.031402587890625|cri_loss: -0.01488494873046875|unsuper_loss: 0.0
+average reward score: 4.7109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.29%) |Training time=0.46s (21.17%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3094|ppo_ep: 1|act_loss: -0.032623291015625|cri_loss: -0.0154571533203125|unsuper_loss: 0.0
+average reward score: 5.45703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.54%) |Training time=0.47s (21.90%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3095|ppo_ep: 1|act_loss: -0.0212554931640625|cri_loss: -0.01013946533203125|unsuper_loss: 0.0
+average reward score: 4.9296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.73%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3096|ppo_ep: 1|act_loss: -0.03173828125|cri_loss: -0.0146636962890625|unsuper_loss: 0.0
+average reward score: 4.67578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.74%) |Training time=0.47s (21.70%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3097|ppo_ep: 1|act_loss: 0.00027441978454589844|cri_loss: 0.0005178451538085938|unsuper_loss: 0.0
+average reward score: 5.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.63%) |Training time=0.47s (21.81%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3098|ppo_ep: 1|act_loss: 0.000667572021484375|cri_loss: 0.0013866424560546875|unsuper_loss: 0.0
+average reward score: 4.4296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.72%) |Training time=0.47s (21.71%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45
+[2023-04-14 10:41:50,068] [INFO] [logging.py:96:log_dist] [Rank 0] step=3100, skipped=41, lr=[6.843346098432385e-06, 6.843346098432385e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:41:50,371] [INFO] [timer.py:199:stop] epoch=0/micro_step=3100/global_step=3100, RunningAvgSamplesPerSec=106.1222394732359, CurrSamplesPerSec=54.794599648087136, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:41:50,464] [INFO] [logging.py:96:log_dist] [Rank 0] step=3100, skipped=47, lr=[3.551017830713139e-06, 3.551017830713139e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3099|ppo_ep: 1|act_loss: 0.014007568359375|cri_loss: 0.007511138916015625|unsuper_loss: 0.0
+average reward score: 4.71875
+-------------------------------------------------------------------------------------
+|E2E latency=2.44s |Gather latency=0.00s (0.00%) |Generate time=1.59s (65.33%) |Training time=0.75s (30.65%) |Others=0.10 (4.02%)|CurSamplesPerSec=13.13 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3100|ppo_ep: 1|act_loss: 0.05084228515625|cri_loss: 0.0269622802734375|unsuper_loss: 0.0
+average reward score: 4.69140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.57%) |Training time=0.47s (21.86%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3101|ppo_ep: 1|act_loss: 0.000579833984375|cri_loss: 0.0024852752685546875|unsuper_loss: 0.0
+average reward score: 4.7578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.62%) |Training time=0.47s (21.85%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3102|ppo_ep: 1|act_loss: 0.0220184326171875|cri_loss: 0.0114593505859375|unsuper_loss: 0.0
+average reward score: 4.90625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.90%) |Training time=0.47s (21.60%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3103|ppo_ep: 1|act_loss: 0.01221466064453125|cri_loss: 0.006389617919921875|unsuper_loss: 0.0
+average reward score: 4.40625
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (75.06%) |Training time=0.46s (20.58%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.19 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3104|ppo_ep: 1|act_loss: -0.019287109375|cri_loss: -0.00951385498046875|unsuper_loss: 0.0
+average reward score: 5.40625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.33%) |Training time=0.46s (21.12%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3105|ppo_ep: 1|act_loss: -0.0160675048828125|cri_loss: -0.007110595703125|unsuper_loss: 0.0
+average reward score: 5.3671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.57%) |Training time=0.47s (21.87%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3106|ppo_ep: 1|act_loss: -0.027008056640625|cri_loss: -0.01226806640625|unsuper_loss: 0.0
+average reward score: 4.2734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.41%) |Training time=0.47s (20.34%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.77 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3107|ppo_ep: 1|act_loss: -0.032196044921875|cri_loss: -0.0145111083984375|unsuper_loss: 0.0
+average reward score: 4.69921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.48%) |Training time=0.47s (21.97%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3108|ppo_ep: 1|act_loss: -0.005462646484375|cri_loss: -0.002117156982421875|unsuper_loss: 0.0
+average reward score: 4.90234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.17%) |Training time=0.48s (22.26%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45
+[2023-04-14 10:42:12,205] [INFO] [logging.py:96:log_dist] [Rank 0] step=3110, skipped=41, lr=[6.826458333622407e-06, 6.826458333622407e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:42:12,224] [INFO] [timer.py:199:stop] epoch=0/micro_step=3110/global_step=3110, RunningAvgSamplesPerSec=106.113810899425, CurrSamplesPerSec=99.49490248637686, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:42:12,316] [INFO] [logging.py:96:log_dist] [Rank 0] step=3110, skipped=47, lr=[3.542276978503364e-06, 3.542276978503364e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3109|ppo_ep: 1|act_loss: 0.0574951171875|cri_loss: 0.030975341796875|unsuper_loss: 0.0
+average reward score: 4.80078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.10%) |Training time=0.48s (22.38%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3110|ppo_ep: 1|act_loss: 0.02020263671875|cri_loss: 0.01190948486328125|unsuper_loss: 0.0
+average reward score: 4.9296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.20%) |Training time=0.48s (22.23%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3111|ppo_ep: 1|act_loss: 0.019256591796875|cri_loss: 0.0103302001953125|unsuper_loss: 0.0
+average reward score: 4.6953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.07%) |Training time=0.46s (21.39%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3112|ppo_ep: 1|act_loss: -0.05108642578125|cri_loss: -0.0231170654296875|unsuper_loss: 0.0
+average reward score: 4.80078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.22%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3113|ppo_ep: 1|act_loss: -0.0019350051879882812|cri_loss: -0.0007886886596679688|unsuper_loss: 0.0
+average reward score: 5.60546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.28%) |Training time=0.48s (22.17%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3114|ppo_ep: 1|act_loss: 0.04736328125|cri_loss: 0.024871826171875|unsuper_loss: 0.0
+average reward score: 4.69140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.19%) |Training time=0.48s (22.26%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3115|ppo_ep: 1|act_loss: 0.0191650390625|cri_loss: 0.01056671142578125|unsuper_loss: 0.0
+average reward score: 5.125
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.58s (70.34%) |Training time=0.47s (21.09%) |Others=0.19 (8.57%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3116|ppo_ep: 1|act_loss: 0.0114898681640625|cri_loss: 0.006500244140625|unsuper_loss: 0.0
+average reward score: 5.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.24%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3117|ppo_ep: 1|act_loss: -0.003936767578125|cri_loss: -0.001834869384765625|unsuper_loss: 0.0
+average reward score: 5.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.30%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3118|ppo_ep: 1|act_loss: -0.01453399658203125|cri_loss: -0.0068359375|unsuper_loss: 0.0
+average reward score: 4.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.48%) |Training time=0.47s (21.96%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
+[2023-04-14 10:42:33,852] [INFO] [logging.py:96:log_dist] [Rank 0] step=3120, skipped=41, lr=[6.80954090237167e-06, 6.80954090237167e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:42:33,870] [INFO] [timer.py:199:stop] epoch=0/micro_step=3120/global_step=3120, RunningAvgSamplesPerSec=106.10118858785661, CurrSamplesPerSec=105.24239249756924, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:42:33,963] [INFO] [logging.py:96:log_dist] [Rank 0] step=3120, skipped=47, lr=[3.533520677234411e-06, 3.533520677234411e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3119|ppo_ep: 1|act_loss: 0.0082855224609375|cri_loss: 0.004329681396484375|unsuper_loss: 0.0
+average reward score: 6.32421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.47s (21.63%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3120|ppo_ep: 1|act_loss: 0.00994873046875|cri_loss: 0.005306243896484375|unsuper_loss: 0.0
+average reward score: 5.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.72%) |Training time=0.47s (21.73%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3121|ppo_ep: 1|act_loss: 0.0122833251953125|cri_loss: 0.00635528564453125|unsuper_loss: 0.0
+average reward score: 4.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.44%) |Training time=0.47s (20.31%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3122|ppo_ep: 1|act_loss: 0.018890380859375|cri_loss: 0.00965118408203125|unsuper_loss: 0.0
+average reward score: 5.16015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.70%) |Training time=0.47s (21.72%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3123|ppo_ep: 1|act_loss: 0.012359619140625|cri_loss: 0.00652313232421875|unsuper_loss: 0.0
+average reward score: 4.65234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.64%) |Training time=0.47s (21.79%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3124|ppo_ep: 1|act_loss: 0.0006108283996582031|cri_loss: 0.0007014274597167969|unsuper_loss: 0.0
+average reward score: 4.7265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.50%) |Training time=0.47s (21.95%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3125|ppo_ep: 1|act_loss: -0.0137481689453125|cri_loss: -0.006610870361328125|unsuper_loss: 0.0
+average reward score: 5.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.45%) |Training time=0.47s (22.00%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3126|ppo_ep: 1|act_loss: 0.0255279541015625|cri_loss: 0.015472412109375|unsuper_loss: 0.0
+average reward score: 5.25390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.55%) |Training time=0.47s (21.89%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3127|ppo_ep: 1|act_loss: 0.00579833984375|cri_loss: 0.0034637451171875|unsuper_loss: 0.0
+average reward score: 5.83203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.69s |Gather latency=0.00s (0.00%) |Generate time=1.58s (58.80%) |Training time=0.47s (17.47%) |Others=0.64 (23.74%)|CurSamplesPerSec=11.88 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3128|ppo_ep: 1|act_loss: -0.0211181640625|cri_loss: -0.0097198486328125|unsuper_loss: 0.0
+average reward score: 5.33984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.47s (22.04%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
+[2023-04-14 10:42:56,080] [INFO] [logging.py:96:log_dist] [Rank 0] step=3130, skipped=41, lr=[6.792594055437316e-06, 6.792594055437316e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:42:56,098] [INFO] [timer.py:199:stop] epoch=0/micro_step=3130/global_step=3130, RunningAvgSamplesPerSec=106.09292350573443, CurrSamplesPerSec=101.92254909405708, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:42:56,191] [INFO] [logging.py:96:log_dist] [Rank 0] step=3130, skipped=47, lr=[3.524749056695789e-06, 3.524749056695789e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3129|ppo_ep: 1|act_loss: -0.0255126953125|cri_loss: -0.01183319091796875|unsuper_loss: 0.0
+average reward score: 4.42578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.48s (22.08%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3130|ppo_ep: 1|act_loss: -0.01776123046875|cri_loss: -0.00868988037109375|unsuper_loss: 0.0
+average reward score: 5.953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.42%) |Training time=0.48s (22.03%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3131|ppo_ep: 1|act_loss: -0.011505126953125|cri_loss: -0.0054473876953125|unsuper_loss: 0.0
+average reward score: 5.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.16%) |Training time=0.49s (22.35%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3132|ppo_ep: 1|act_loss: 0.0022106170654296875|cri_loss: 0.001453399658203125|unsuper_loss: 0.0
+average reward score: 4.6875
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.68s (74.39%) |Training time=0.48s (21.27%) |Others=0.10 (4.34%)|CurSamplesPerSec=14.16 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3133|ppo_ep: 1|act_loss: 0.0005121231079101562|cri_loss: 0.0004792213439941406|unsuper_loss: 0.0
+average reward score: 4.72265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.89%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3134|ppo_ep: 1|act_loss: 0.01171875|cri_loss: 0.006145477294921875|unsuper_loss: 0.0
+average reward score: 4.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (22.04%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3135|ppo_ep: 1|act_loss: -0.0181884765625|cri_loss: -0.0087127685546875|unsuper_loss: 0.0
+average reward score: 4.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.68%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3136|ppo_ep: 1|act_loss: 0.018280029296875|cri_loss: 0.0098419189453125|unsuper_loss: 0.0
+average reward score: 4.9609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.59s |Gather latency=0.00s (0.00%) |Generate time=1.75s (67.58%) |Training time=0.47s (18.03%) |Others=0.37 (14.39%)|CurSamplesPerSec=12.34 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3137|ppo_ep: 1|act_loss: 0.006168365478515625|cri_loss: 0.0038909912109375|unsuper_loss: 0.0
+average reward score: 5.43359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.86%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3138|ppo_ep: 1|act_loss: -0.0243682861328125|cri_loss: -0.01153564453125|unsuper_loss: 0.0
+average reward score: 4.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.83%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+[2023-04-14 10:43:18,216] [INFO] [logging.py:96:log_dist] [Rank 0] step=3140, skipped=41, lr=[6.775618044012496e-06, 6.775618044012496e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:43:18,234] [INFO] [timer.py:199:stop] epoch=0/micro_step=3140/global_step=3140, RunningAvgSamplesPerSec=106.08196738154236, CurrSamplesPerSec=103.5805649257089, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:43:18,327] [INFO] [logging.py:96:log_dist] [Rank 0] step=3140, skipped=47, lr=[3.5159622469040734e-06, 3.5159622469040734e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3139|ppo_ep: 1|act_loss: -0.0006122589111328125|cri_loss: 0.000782012939453125|unsuper_loss: 0.0
+average reward score: 4.59765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.86%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3140|ppo_ep: 1|act_loss: -0.0027523040771484375|cri_loss: -0.000537872314453125|unsuper_loss: 0.0
+average reward score: 4.63671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.47s (21.63%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3141|ppo_ep: 1|act_loss: 0.009124755859375|cri_loss: 0.0054473876953125|unsuper_loss: 0.0
+average reward score: 4.98046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.74%) |Training time=0.47s (21.65%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3142|ppo_ep: 1|act_loss: 0.027740478515625|cri_loss: 0.01450347900390625|unsuper_loss: 0.0
+average reward score: 4.7109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.75%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3143|ppo_ep: 1|act_loss: 0.0130767822265625|cri_loss: 0.00711822509765625|unsuper_loss: 0.0
+average reward score: 4.8203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.56%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3144|ppo_ep: 1|act_loss: 0.0394287109375|cri_loss: 0.0209197998046875|unsuper_loss: 0.0
+average reward score: 5.16796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.07%) |Training time=0.53s (23.64%) |Others=0.12 (5.28%)|CurSamplesPerSec=14.40 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3145|ppo_ep: 1|act_loss: -0.09375|cri_loss: -0.044036865234375|unsuper_loss: 0.0
+average reward score: 4.8515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.58s (70.92%) |Training time=0.55s (24.58%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.33 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3146|ppo_ep: 1|act_loss: -0.1318359375|cri_loss: -0.0621337890625|unsuper_loss: 0.0
+average reward score: 4.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.49%) |Training time=0.46s (20.99%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3147|ppo_ep: 1|act_loss: 0.0472412109375|cri_loss: 0.02520751953125|unsuper_loss: 0.0
+average reward score: 5.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.48s (22.15%) |Others=0.10 (4.84%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3148|ppo_ep: 1|act_loss: -0.037689208984375|cri_loss: -0.017608642578125|unsuper_loss: 0.0
+average reward score: 5.6015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.90%) |Training time=0.46s (21.37%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
+[2023-04-14 10:43:39,941] [INFO] [logging.py:96:log_dist] [Rank 0] step=3150, skipped=41, lr=[6.758613119722655e-06, 6.758613119722655e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:43:39,958] [INFO] [timer.py:199:stop] epoch=0/micro_step=3150/global_step=3150, RunningAvgSamplesPerSec=106.06281589531704, CurrSamplesPerSec=101.26017409621011, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:43:40,051] [INFO] [logging.py:96:log_dist] [Rank 0] step=3150, skipped=47, lr=[3.5071603781009834e-06, 3.5071603781009834e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3149|ppo_ep: 1|act_loss: -0.029327392578125|cri_loss: -0.01125335693359375|unsuper_loss: 0.0
+average reward score: 4.6484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (22.04%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3150|ppo_ep: 1|act_loss: 0.031280517578125|cri_loss: 0.017364501953125|unsuper_loss: 0.0
+average reward score: 4.5
+-------------------------------------------------------------------------------------
+|E2E latency=2.42s |Gather latency=0.00s (0.00%) |Generate time=1.60s (66.03%) |Training time=0.49s (20.17%) |Others=0.33 (13.79%)|CurSamplesPerSec=13.24 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3151|ppo_ep: 1|act_loss: 0.063720703125|cri_loss: 0.034332275390625|unsuper_loss: 0.0
+average reward score: 5.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.53%) |Training time=0.43s (19.75%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3152|ppo_ep: 1|act_loss: 0.0151519775390625|cri_loss: 0.00836944580078125|unsuper_loss: 0.0
+average reward score: 4.9765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.40%) |Training time=0.44s (20.03%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3153|ppo_ep: 1|act_loss: 0.010284423828125|cri_loss: 0.0057373046875|unsuper_loss: 0.0
+average reward score: 6.6640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.87%) |Training time=0.45s (20.58%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3154|ppo_ep: 1|act_loss: 0.01165771484375|cri_loss: 0.006153106689453125|unsuper_loss: 0.0
+average reward score: 5.12109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.37%) |Training time=0.46s (21.10%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3155|ppo_ep: 1|act_loss: -0.015777587890625|cri_loss: -0.00711822509765625|unsuper_loss: 0.0
+average reward score: 4.53515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.52s |Gather latency=0.00s (0.00%) |Generate time=1.61s (63.95%) |Training time=0.45s (18.02%) |Others=0.45 (18.03%)|CurSamplesPerSec=12.71 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3156|ppo_ep: 1|act_loss: -0.019256591796875|cri_loss: -0.00878143310546875|unsuper_loss: 0.0
+average reward score: 4.68359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.63s (77.07%) |Training time=0.39s (18.27%) |Others=0.10 (4.66%)|CurSamplesPerSec=15.10 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3157|ppo_ep: 1|act_loss: -0.0109405517578125|cri_loss: -0.004871368408203125|unsuper_loss: 0.0
+average reward score: 4.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.08%) |Training time=0.44s (20.40%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3158|ppo_ep: 1|act_loss: -0.0211181640625|cri_loss: -0.01001739501953125|unsuper_loss: 0.0
+average reward score: 5.1953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.34%) |Training time=0.46s (21.08%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+[2023-04-14 10:44:02,182] [INFO] [logging.py:96:log_dist] [Rank 0] step=3160, skipped=41, lr=[6.741579534621794e-06, 6.741579534621794e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:44:02,201] [INFO] [timer.py:199:stop] epoch=0/micro_step=3160/global_step=3160, RunningAvgSamplesPerSec=106.08499964575394, CurrSamplesPerSec=108.97365410549887, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:44:02,293] [INFO] [logging.py:96:log_dist] [Rank 0] step=3160, skipped=47, lr=[3.4983435807514484e-06, 3.4983435807514484e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3159|ppo_ep: 1|act_loss: -0.02764892578125|cri_loss: -0.0126495361328125|unsuper_loss: 0.0
+average reward score: 4.7265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.34%) |Training time=0.46s (21.14%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3160|ppo_ep: 1|act_loss: -0.0206298828125|cri_loss: -0.009796142578125|unsuper_loss: 0.0
+average reward score: 4.6328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.77%) |Training time=0.47s (21.12%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.51 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3161|ppo_ep: 1|act_loss: 0.0300445556640625|cri_loss: 0.015899658203125|unsuper_loss: 0.0
+average reward score: 4.94921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.71s (74.42%) |Training time=0.49s (21.31%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.91 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3162|ppo_ep: 1|act_loss: 0.069091796875|cri_loss: 0.036834716796875|unsuper_loss: 0.0
+average reward score: 5.2734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.44s |Gather latency=0.00s (0.00%) |Generate time=1.62s (66.42%) |Training time=0.45s (18.26%) |Others=0.37 (15.33%)|CurSamplesPerSec=13.09 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3163|ppo_ep: 1|act_loss: -0.01104736328125|cri_loss: -0.00527191162109375|unsuper_loss: 0.0
+average reward score: 5.25
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.37%) |Training time=0.46s (21.08%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3164|ppo_ep: 1|act_loss: -0.017486572265625|cri_loss: -0.0081787109375|unsuper_loss: 0.0
+average reward score: 4.87890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.48%) |Training time=0.46s (21.01%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3165|ppo_ep: 1|act_loss: -0.029510498046875|cri_loss: -0.0138702392578125|unsuper_loss: 0.0
+average reward score: 5.1640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.82%) |Training time=0.47s (21.66%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3166|ppo_ep: 1|act_loss: -0.00011587142944335938|cri_loss: 0.00028586387634277344|unsuper_loss: 0.0
+average reward score: 4.5859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.62s (71.38%) |Training time=0.55s (24.28%) |Others=0.10 (4.34%)|CurSamplesPerSec=14.08 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3167|ppo_ep: 1|act_loss: 0.0069122314453125|cri_loss: 0.0042724609375|unsuper_loss: 0.0
+average reward score: 4.91015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.39%) |Training time=0.48s (22.07%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3168|ppo_ep: 1|act_loss: 0.004741668701171875|cri_loss: 0.0040435791015625|unsuper_loss: 0.0
+average reward score: 5.3828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.85%) |Training time=0.47s (21.62%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+[2023-04-14 10:44:24,441] [INFO] [logging.py:96:log_dist] [Rank 0] step=3170, skipped=41, lr=[6.724517541188731e-06, 6.724517541188731e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:44:24,509] [INFO] [timer.py:199:stop] epoch=0/micro_step=3170/global_step=3170, RunningAvgSamplesPerSec=106.07379913337502, CurrSamplesPerSec=87.51397169668613, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:44:24,602] [INFO] [logging.py:96:log_dist] [Rank 0] step=3170, skipped=47, lr=[3.4895119855416737e-06, 3.4895119855416737e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3169|ppo_ep: 1|act_loss: 0.00868988037109375|cri_loss: 0.00487518310546875|unsuper_loss: 0.0
+average reward score: 5.484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.85%) |Training time=0.53s (23.74%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.39 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3170|ppo_ep: 1|act_loss: 0.0268707275390625|cri_loss: 0.014068603515625|unsuper_loss: 0.0
+average reward score: 4.93359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.05%) |Training time=0.49s (22.44%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3171|ppo_ep: 1|act_loss: 0.0170440673828125|cri_loss: 0.009185791015625|unsuper_loss: 0.0
+average reward score: 5.8515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.46%) |Training time=0.50s (23.05%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3172|ppo_ep: 1|act_loss: 0.01983642578125|cri_loss: 0.0110321044921875|unsuper_loss: 0.0
+average reward score: 4.79296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.99%) |Training time=0.49s (22.50%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3173|ppo_ep: 1|act_loss: 0.046783447265625|cri_loss: 0.02850341796875|unsuper_loss: 0.0
+average reward score: 4.71484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.29%) |Training time=0.48s (22.17%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3174|ppo_ep: 1|act_loss: 0.0023670196533203125|cri_loss: 0.001544952392578125|unsuper_loss: 0.0
+average reward score: 5.47265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.32%) |Training time=0.48s (22.14%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3175|ppo_ep: 1|act_loss: -0.0003724098205566406|cri_loss: 0.00018739700317382812|unsuper_loss: 0.0
+average reward score: 5.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.46%) |Training time=0.48s (22.04%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3176|ppo_ep: 1|act_loss: -0.0162811279296875|cri_loss: -0.0079498291015625|unsuper_loss: 0.0
+average reward score: 5.13671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.48s (22.11%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3177|ppo_ep: 1|act_loss: 0.011505126953125|cri_loss: 0.00592803955078125|unsuper_loss: 0.0
+average reward score: 4.97265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (22.10%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3178|ppo_ep: 1|act_loss: 0.051971435546875|cri_loss: 0.02734375|unsuper_loss: 0.0
+average reward score: 4.78515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.14%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+[2023-04-14 10:44:46,203] [INFO] [logging.py:96:log_dist] [Rank 0] step=3180, skipped=41, lr=[6.707427392323372e-06, 6.707427392323372e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:44:46,221] [INFO] [timer.py:199:stop] epoch=0/micro_step=3180/global_step=3180, RunningAvgSamplesPerSec=106.05239888050805, CurrSamplesPerSec=101.55715126687541, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:44:46,314] [INFO] [logging.py:96:log_dist] [Rank 0] step=3180, skipped=47, lr=[3.480665723377205e-06, 3.480665723377205e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3179|ppo_ep: 1|act_loss: 0.07061767578125|cri_loss: 0.0386962890625|unsuper_loss: 0.0
+average reward score: 4.91796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.49%) |Training time=0.48s (21.99%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3180|ppo_ep: 1|act_loss: 0.0008573532104492188|cri_loss: 0.0007100105285644531|unsuper_loss: 0.0
+average reward score: 5.58984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.27%) |Training time=0.45s (21.13%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3181|ppo_ep: 1|act_loss: -0.035064697265625|cri_loss: -0.01690673828125|unsuper_loss: 0.0
+average reward score: 5.3203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.40%) |Training time=0.48s (20.39%) |Others=0.10 (4.21%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3182|ppo_ep: 1|act_loss: 0.032012939453125|cri_loss: 0.0175628662109375|unsuper_loss: 0.0
+average reward score: 5.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.42%) |Training time=0.45s (20.98%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3183|ppo_ep: 1|act_loss: 0.0159912109375|cri_loss: 0.01290130615234375|unsuper_loss: 0.0
+average reward score: 5.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.66%) |Training time=0.44s (20.74%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3184|ppo_ep: 1|act_loss: -0.00478363037109375|cri_loss: -0.0021953582763671875|unsuper_loss: 0.0
+average reward score: 5.68359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.60s (69.27%) |Training time=0.45s (19.35%) |Others=0.26 (11.38%)|CurSamplesPerSec=13.89 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3185|ppo_ep: 1|act_loss: -0.005828857421875|cri_loss: -0.0022563934326171875|unsuper_loss: 0.0
+average reward score: 5.35546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.65%) |Training time=0.44s (20.76%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3186|ppo_ep: 1|act_loss: 0.0272064208984375|cri_loss: 0.0141143798828125|unsuper_loss: 0.0
+average reward score: 6.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.71%) |Training time=0.44s (20.61%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3187|ppo_ep: 1|act_loss: 0.0460205078125|cri_loss: 0.0245819091796875|unsuper_loss: 0.0
+average reward score: 5.48828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.37%) |Training time=0.45s (21.04%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3188|ppo_ep: 1|act_loss: 0.00832366943359375|cri_loss: 0.0048675537109375|unsuper_loss: 0.0
+average reward score: 5.15625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.29%) |Training time=0.45s (21.13%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
+[2023-04-14 10:45:07,978] [INFO] [logging.py:96:log_dist] [Rank 0] step=3190, skipped=41, lr=[6.690309341342949e-06, 6.690309341342949e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:45:07,996] [INFO] [timer.py:199:stop] epoch=0/micro_step=3190/global_step=3190, RunningAvgSamplesPerSec=106.06791101364033, CurrSamplesPerSec=112.8028891206655, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:45:08,088] [INFO] [logging.py:96:log_dist] [Rank 0] step=3190, skipped=47, lr=[3.4718049253809894e-06, 3.4718049253809894e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3189|ppo_ep: 1|act_loss: 0.0041351318359375|cri_loss: 0.0023345947265625|unsuper_loss: 0.0
+average reward score: 5.49609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.76%) |Training time=0.45s (20.69%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3190|ppo_ep: 1|act_loss: 0.01507568359375|cri_loss: 0.0077056884765625|unsuper_loss: 0.0
+average reward score: 6.13671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.63s (72.68%) |Training time=0.46s (20.58%) |Others=0.15 (6.74%)|CurSamplesPerSec=14.25 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3191|ppo_ep: 1|act_loss: -0.01251220703125|cri_loss: -0.004730224609375|unsuper_loss: 0.0
+average reward score: 4.625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.53%) |Training time=0.48s (21.94%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3192|ppo_ep: 1|act_loss: 0.019500732421875|cri_loss: 0.01045989990234375|unsuper_loss: 0.0
+average reward score: 4.90234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.90%) |Training time=0.47s (21.55%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3193|ppo_ep: 1|act_loss: -0.024658203125|cri_loss: -0.0119476318359375|unsuper_loss: 0.0
+average reward score: 5.515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.71%) |Training time=0.47s (21.73%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3194|ppo_ep: 1|act_loss: -0.0204010009765625|cri_loss: -0.00936126708984375|unsuper_loss: 0.0
+average reward score: 4.765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.45%) |Training time=0.45s (20.91%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3195|ppo_ep: 1|act_loss: -0.005889892578125|cri_loss: -0.0024585723876953125|unsuper_loss: 0.0
+average reward score: 4.60546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.41%) |Training time=0.45s (21.04%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3196|ppo_ep: 1|act_loss: 0.000583648681640625|cri_loss: 0.0017080307006835938|unsuper_loss: 0.0
+average reward score: 5.15234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.04%) |Training time=0.46s (19.73%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3197|ppo_ep: 1|act_loss: -0.024444580078125|cri_loss: -0.01142120361328125|unsuper_loss: 0.0
+average reward score: 4.7265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.28%) |Training time=0.46s (21.16%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3198|ppo_ep: 1|act_loss: -0.00677490234375|cri_loss: -0.0029048919677734375|unsuper_loss: 0.0
+average reward score: 4.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.46s (21.24%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+[2023-04-14 10:45:29,972] [INFO] [logging.py:96:log_dist] [Rank 0] step=3200, skipped=41, lr=[6.673163641978276e-06, 6.673163641978276e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:45:30,562] [INFO] [timer.py:199:stop] epoch=0/micro_step=3200/global_step=3200, RunningAvgSamplesPerSec=105.9945701207933, CurrSamplesPerSec=32.19384756571583, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:45:30,656] [INFO] [logging.py:96:log_dist] [Rank 0] step=3200, skipped=47, lr=[3.462929722891427e-06, 3.462929722891427e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3199|ppo_ep: 1|act_loss: -0.01418304443359375|cri_loss: -0.00630950927734375|unsuper_loss: 0.0
+average reward score: 5.1328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.87s |Gather latency=0.00s (0.00%) |Generate time=1.60s (55.75%) |Training time=1.16s (40.33%) |Others=0.11 (3.92%)|CurSamplesPerSec=11.16 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3200|ppo_ep: 1|act_loss: 0.036529541015625|cri_loss: 0.0186614990234375|unsuper_loss: 0.0
+average reward score: 5.20703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.45%) |Training time=0.45s (20.99%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3201|ppo_ep: 1|act_loss: 0.0728759765625|cri_loss: 0.04498291015625|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.39%) |Training time=0.48s (22.06%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3202|ppo_ep: 1|act_loss: 0.003406524658203125|cri_loss: 0.00264739990234375|unsuper_loss: 0.0
+average reward score: 5.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.92%) |Training time=0.47s (21.54%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3203|ppo_ep: 1|act_loss: 0.021942138671875|cri_loss: 0.01192474365234375|unsuper_loss: 0.0
+average reward score: 5.671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (22.06%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3204|ppo_ep: 1|act_loss: -0.02435302734375|cri_loss: -0.01165771484375|unsuper_loss: 0.0
+average reward score: 5.75
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.55%) |Training time=0.47s (21.88%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3205|ppo_ep: 1|act_loss: 0.027679443359375|cri_loss: 0.0162811279296875|unsuper_loss: 0.0
+average reward score: 5.1953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.76s |Gather latency=0.00s (0.00%) |Generate time=1.58s (57.30%) |Training time=0.47s (16.98%) |Others=0.71 (25.72%)|CurSamplesPerSec=11.58 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3206|ppo_ep: 1|act_loss: -0.04412841796875|cri_loss: -0.0216217041015625|unsuper_loss: 0.0
+average reward score: 5.85546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.98%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3207|ppo_ep: 1|act_loss: -0.0545654296875|cri_loss: -0.0265045166015625|unsuper_loss: 0.0
+average reward score: 4.78515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.45%) |Training time=0.47s (21.99%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+[2023-04-14 10:45:50,602] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+[2023-04-14 10:45:50,686] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 3208|ppo_ep: 1|act_loss: -0.046234130859375|cri_loss: -0.02215576171875|unsuper_loss: 0.0
+average reward score: 5.7421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.13%) |Training time=0.47s (21.79%) |Others=0.09 (4.08%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
+[2023-04-14 10:45:52,725] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+[2023-04-14 10:45:52,725] [INFO] [logging.py:96:log_dist] [Rank 0] step=3210, skipped=43, lr=[6.659427346418702e-06, 6.659427346418702e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:45:52,725] [INFO] [timer.py:199:stop] epoch=0/micro_step=3210/global_step=3210, RunningAvgSamplesPerSec=105.99191837382371, CurrSamplesPerSec=116.19224419656577, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:45:52,808] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+[2023-04-14 10:45:52,809] [INFO] [logging.py:96:log_dist] [Rank 0] step=3210, skipped=49, lr=[3.4558192780603294e-06, 3.4558192780603294e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3209|ppo_ep: 1|act_loss: -0.0201263427734375|cri_loss: -0.008544921875|unsuper_loss: 0.0
+average reward score: 5.08203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.27%) |Training time=0.44s (20.60%) |Others=0.09 (4.12%)|CurSamplesPerSec=15.08 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3210|ppo_ep: 1|act_loss: -0.02313232421875|cri_loss: -0.01012420654296875|unsuper_loss: 0.0
+average reward score: 5.01953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.70%) |Training time=0.47s (21.73%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3211|ppo_ep: 1|act_loss: -0.0171661376953125|cri_loss: -0.008209228515625|unsuper_loss: 0.0
+average reward score: 5.80078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.25%) |Training time=0.51s (21.60%) |Others=0.10 (4.15%)|CurSamplesPerSec=13.51 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3212|ppo_ep: 1|act_loss: 0.002307891845703125|cri_loss: 0.0018768310546875|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.93%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3213|ppo_ep: 1|act_loss: 0.046966552734375|cri_loss: 0.024261474609375|unsuper_loss: 0.0
+average reward score: 5.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.58%) |Training time=0.47s (21.90%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3214|ppo_ep: 1|act_loss: 0.0421142578125|cri_loss: 0.02203369140625|unsuper_loss: 0.0
+average reward score: 6.125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.48s (21.96%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3215|ppo_ep: 1|act_loss: 0.04229736328125|cri_loss: 0.02191162109375|unsuper_loss: 0.0
+average reward score: 5.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (22.06%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3216|ppo_ep: 1|act_loss: 0.041778564453125|cri_loss: 0.0214996337890625|unsuper_loss: 0.0
+average reward score: 5.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.26%) |Training time=0.48s (22.12%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3217|ppo_ep: 1|act_loss: -0.0154876708984375|cri_loss: -0.00630950927734375|unsuper_loss: 0.0
+average reward score: 5.609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (22.05%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3218|ppo_ep: 1|act_loss: -0.0030689239501953125|cri_loss: -0.0011997222900390625|unsuper_loss: 0.0
+average reward score: 5.98046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.57%) |Training time=0.48s (21.88%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
+[2023-04-14 10:46:14,630] [INFO] [logging.py:96:log_dist] [Rank 0] step=3220, skipped=43, lr=[6.642232520669742e-06, 6.642232520669742e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:46:14,648] [INFO] [timer.py:199:stop] epoch=0/micro_step=3220/global_step=3220, RunningAvgSamplesPerSec=105.97259385289864, CurrSamplesPerSec=94.68141865104559, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:46:14,741] [INFO] [logging.py:96:log_dist] [Rank 0] step=3220, skipped=49, lr=[3.4469184791357944e-06, 3.4469184791357944e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3219|ppo_ep: 1|act_loss: -0.0013322830200195312|cri_loss: 6.29425048828125e-05|unsuper_loss: 0.0
+average reward score: 5.74609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.12%) |Training time=0.50s (22.47%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.37 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3220|ppo_ep: 1|act_loss: -0.049072265625|cri_loss: -0.0238494873046875|unsuper_loss: 0.0
+average reward score: 5.3828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.27%) |Training time=0.48s (22.17%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3221|ppo_ep: 1|act_loss: -0.05157470703125|cri_loss: -0.022247314453125|unsuper_loss: 0.0
+average reward score: 4.953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.22%) |Training time=0.48s (22.11%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3222|ppo_ep: 1|act_loss: 0.00411224365234375|cri_loss: 0.003429412841796875|unsuper_loss: 0.0
+average reward score: 5.85546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.92%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3223|ppo_ep: 1|act_loss: -0.045166015625|cri_loss: -0.02154541015625|unsuper_loss: 0.0
+average reward score: 5.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.64%) |Training time=0.50s (22.80%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3224|ppo_ep: 1|act_loss: -0.01055908203125|cri_loss: -0.0046844482421875|unsuper_loss: 0.0
+average reward score: 5.9140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.27%) |Training time=0.48s (22.21%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3225|ppo_ep: 1|act_loss: -0.03643798828125|cri_loss: -0.017333984375|unsuper_loss: 0.0
+average reward score: 4.9765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.59s (67.61%) |Training time=0.54s (22.98%) |Others=0.22 (9.41%)|CurSamplesPerSec=13.59 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3226|ppo_ep: 1|act_loss: 0.02813720703125|cri_loss: 0.014892578125|unsuper_loss: 0.0
+average reward score: 5.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.33%) |Training time=0.48s (22.14%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3227|ppo_ep: 1|act_loss: 0.068115234375|cri_loss: 0.035675048828125|unsuper_loss: 0.0
+average reward score: 4.8984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.92%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3228|ppo_ep: 1|act_loss: 0.10015869140625|cri_loss: 0.0543212890625|unsuper_loss: 0.0
+average reward score: 5.58203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.23%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+[2023-04-14 10:46:36,491] [INFO] [logging.py:96:log_dist] [Rank 0] step=3230, skipped=43, lr=[6.625010759150993e-06, 6.625010759150993e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:46:36,510] [INFO] [timer.py:199:stop] epoch=0/micro_step=3230/global_step=3230, RunningAvgSamplesPerSec=105.94732779611219, CurrSamplesPerSec=99.21879437735863, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:46:36,602] [INFO] [logging.py:96:log_dist] [Rank 0] step=3230, skipped=49, lr=[3.4380036445950826e-06, 3.4380036445950826e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3229|ppo_ep: 1|act_loss: 0.0136871337890625|cri_loss: 0.00812530517578125|unsuper_loss: 0.0
+average reward score: 5.30078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.10%) |Training time=0.48s (22.40%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3230|ppo_ep: 1|act_loss: 0.028350830078125|cri_loss: 0.01450347900390625|unsuper_loss: 0.0
+average reward score: 5.3515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.37%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3231|ppo_ep: 1|act_loss: -0.0118408203125|cri_loss: -0.00531768798828125|unsuper_loss: 0.0
+average reward score: 4.86328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.31%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3232|ppo_ep: 1|act_loss: -0.00424957275390625|cri_loss: -0.0014390945434570312|unsuper_loss: 0.0
+average reward score: 4.9375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.03%) |Training time=0.49s (22.45%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3233|ppo_ep: 1|act_loss: -0.02142333984375|cri_loss: -0.01001739501953125|unsuper_loss: 0.0
+average reward score: 5.3203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.49s (22.46%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3234|ppo_ep: 1|act_loss: -0.0184478759765625|cri_loss: -0.00812530517578125|unsuper_loss: 0.0
+average reward score: 4.515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.22%) |Training time=0.48s (22.24%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3235|ppo_ep: 1|act_loss: -0.0230255126953125|cri_loss: -0.01080322265625|unsuper_loss: 0.0
+average reward score: 4.6328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.48s (22.31%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3236|ppo_ep: 1|act_loss: 0.011444091796875|cri_loss: 0.0060272216796875|unsuper_loss: 0.0
+average reward score: 4.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.13%) |Training time=0.48s (22.34%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3237|ppo_ep: 1|act_loss: -0.037109375|cri_loss: -0.01812744140625|unsuper_loss: 0.0
+average reward score: 5.5859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.09%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3238|ppo_ep: 1|act_loss: 0.016021728515625|cri_loss: 0.00850677490234375|unsuper_loss: 0.0
+average reward score: 5.20703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.76%) |Training time=0.49s (22.68%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
+[2023-04-14 10:46:58,161] [INFO] [logging.py:96:log_dist] [Rank 0] step=3240, skipped=43, lr=[6.6077623171305024e-06, 6.6077623171305024e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:46:58,179] [INFO] [timer.py:199:stop] epoch=0/micro_step=3240/global_step=3240, RunningAvgSamplesPerSec=105.92459917021735, CurrSamplesPerSec=96.84810997952171, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:46:58,274] [INFO] [logging.py:96:log_dist] [Rank 0] step=3240, skipped=49, lr=[3.4290749065775475e-06, 3.4290749065775475e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3239|ppo_ep: 1|act_loss: -0.0272216796875|cri_loss: -0.0132293701171875|unsuper_loss: 0.0
+average reward score: 5.76953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.79%) |Training time=0.49s (22.62%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3240|ppo_ep: 1|act_loss: 0.01654052734375|cri_loss: 0.00868988037109375|unsuper_loss: 0.0
+average reward score: 5.5859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.81%) |Training time=0.47s (21.39%) |Others=0.15 (6.80%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3241|ppo_ep: 1|act_loss: 0.05535888671875|cri_loss: 0.02935791015625|unsuper_loss: 0.0
+average reward score: 5.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.58%) |Training time=0.48s (21.08%) |Others=0.10 (4.34%)|CurSamplesPerSec=14.10 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3242|ppo_ep: 1|act_loss: -0.00484466552734375|cri_loss: -0.002094268798828125|unsuper_loss: 0.0
+average reward score: 5.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.37%) |Training time=0.48s (22.07%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3243|ppo_ep: 1|act_loss: 0.03314208984375|cri_loss: 0.0167999267578125|unsuper_loss: 0.0
+average reward score: 5.23046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.94%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3244|ppo_ep: 1|act_loss: 0.009765625|cri_loss: 0.005191802978515625|unsuper_loss: 0.0
+average reward score: 5.6171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.65s |Gather latency=0.00s (0.00%) |Generate time=1.58s (59.59%) |Training time=0.48s (18.08%) |Others=0.59 (22.32%)|CurSamplesPerSec=12.06 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3245|ppo_ep: 1|act_loss: -0.00560760498046875|cri_loss: -0.002361297607421875|unsuper_loss: 0.0
+average reward score: 5.38671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.28%) |Training time=0.48s (22.14%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3246|ppo_ep: 1|act_loss: -0.0221405029296875|cri_loss: -0.01041412353515625|unsuper_loss: 0.0
+average reward score: 5.17578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.28%) |Training time=0.48s (22.18%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3247|ppo_ep: 1|act_loss: -0.023529052734375|cri_loss: -0.010528564453125|unsuper_loss: 0.0
+average reward score: 4.90625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.70%) |Training time=0.47s (21.74%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3248|ppo_ep: 1|act_loss: -0.033416748046875|cri_loss: -0.0162506103515625|unsuper_loss: 0.0
+average reward score: 4.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.77%) |Training time=0.48s (21.75%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.46
+[2023-04-14 10:47:20,543] [INFO] [logging.py:96:log_dist] [Rank 0] step=3250, skipped=43, lr=[6.590487450271792e-06, 6.590487450271792e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:47:20,561] [INFO] [timer.py:199:stop] epoch=0/micro_step=3250/global_step=3250, RunningAvgSamplesPerSec=105.9117506771129, CurrSamplesPerSec=100.8401444032472, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:47:20,654] [INFO] [logging.py:96:log_dist] [Rank 0] step=3250, skipped=49, lr=[3.420132397428625e-06, 3.420132397428625e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3249|ppo_ep: 1|act_loss: -0.01910400390625|cri_loss: -0.00933837890625|unsuper_loss: 0.0
+average reward score: 5.56640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.51%) |Training time=0.48s (21.18%) |Others=0.10 (4.32%)|CurSamplesPerSec=14.13 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3250|ppo_ep: 1|act_loss: 0.024017333984375|cri_loss: 0.01232147216796875|unsuper_loss: 0.0
+average reward score: 5.7265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.58s (68.45%) |Training time=0.48s (20.75%) |Others=0.25 (10.80%)|CurSamplesPerSec=13.87 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3251|ppo_ep: 1|act_loss: 0.0186614990234375|cri_loss: 0.00963592529296875|unsuper_loss: 0.0
+average reward score: 5.00390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.31%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3252|ppo_ep: 1|act_loss: -0.0013141632080078125|cri_loss: -0.0002598762512207031|unsuper_loss: 0.0
+average reward score: 4.7265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.42%) |Training time=0.48s (22.03%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3253|ppo_ep: 1|act_loss: 0.00670623779296875|cri_loss: 0.0039520263671875|unsuper_loss: 0.0
+average reward score: 5.0234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.81%) |Training time=0.49s (22.64%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3254|ppo_ep: 1|act_loss: 0.005558013916015625|cri_loss: 0.00290679931640625|unsuper_loss: 0.0
+average reward score: 5.546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.49s (22.46%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3255|ppo_ep: 1|act_loss: 0.01549530029296875|cri_loss: 0.0082550048828125|unsuper_loss: 0.0
+average reward score: 4.7421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.89%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3256|ppo_ep: 1|act_loss: -0.0132904052734375|cri_loss: -0.00640869140625|unsuper_loss: 0.0
+average reward score: 5.2109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.40s |Gather latency=0.00s (0.00%) |Generate time=1.58s (65.96%) |Training time=0.49s (20.36%) |Others=0.33 (13.68%)|CurSamplesPerSec=13.36 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3257|ppo_ep: 1|act_loss: 0.006717681884765625|cri_loss: 0.00504302978515625|unsuper_loss: 0.0
+average reward score: 5.13671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.33%) |Training time=0.51s (21.53%) |Others=0.10 (4.15%)|CurSamplesPerSec=13.48 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3258|ppo_ep: 1|act_loss: -0.035125732421875|cri_loss: -0.01690673828125|unsuper_loss: 0.0
+average reward score: 5.609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.31%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+[2023-04-14 10:47:42,752] [INFO] [logging.py:96:log_dist] [Rank 0] step=3260, skipped=43, lr=[6.573186414630062e-06, 6.573186414630062e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:47:42,770] [INFO] [timer.py:199:stop] epoch=0/micro_step=3260/global_step=3260, RunningAvgSamplesPerSec=105.8897419676038, CurrSamplesPerSec=100.19568274861223, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:47:42,863] [INFO] [logging.py:96:log_dist] [Rank 0] step=3260, skipped=49, lr=[3.4111762496978753e-06, 3.4111762496978753e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3259|ppo_ep: 1|act_loss: -0.01148223876953125|cri_loss: -0.005428314208984375|unsuper_loss: 0.0
+average reward score: 5.5234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.21%) |Training time=0.48s (22.27%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3260|ppo_ep: 1|act_loss: 0.01776123046875|cri_loss: 0.00902557373046875|unsuper_loss: 0.0
+average reward score: 5.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.31%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3261|ppo_ep: 1|act_loss: -0.0051727294921875|cri_loss: -0.0015697479248046875|unsuper_loss: 0.0
+average reward score: 4.79296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.38%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3262|ppo_ep: 1|act_loss: 0.0318603515625|cri_loss: 0.017333984375|unsuper_loss: 0.0
+average reward score: 4.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.25%) |Training time=0.48s (22.21%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3263|ppo_ep: 1|act_loss: 0.01312255859375|cri_loss: 0.006908416748046875|unsuper_loss: 0.0
+average reward score: 4.88671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.91%) |Training time=0.49s (22.57%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3264|ppo_ep: 1|act_loss: 0.036163330078125|cri_loss: 0.0189971923828125|unsuper_loss: 0.0
+average reward score: 5.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.80%) |Training time=0.49s (22.68%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3265|ppo_ep: 1|act_loss: 0.01306915283203125|cri_loss: 0.006793975830078125|unsuper_loss: 0.0
+average reward score: 5.68359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.34%) |Training time=0.48s (22.13%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3266|ppo_ep: 1|act_loss: -0.026123046875|cri_loss: -0.01198577880859375|unsuper_loss: 0.0
+average reward score: 5.3515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.58s (66.72%) |Training time=0.48s (20.25%) |Others=0.31 (13.03%)|CurSamplesPerSec=13.51 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3267|ppo_ep: 1|act_loss: -0.0018949508666992188|cri_loss: -0.0007877349853515625|unsuper_loss: 0.0
+average reward score: 5.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.26%) |Training time=0.48s (22.16%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3268|ppo_ep: 1|act_loss: -0.002704620361328125|cri_loss: 0.0006694793701171875|unsuper_loss: 0.0
+average reward score: 5.3046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.16%) |Training time=0.48s (22.24%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
+[2023-04-14 10:48:04,610] [INFO] [logging.py:96:log_dist] [Rank 0] step=3270, skipped=43, lr=[6.555859466648397e-06, 6.555859466648397e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:48:04,628] [INFO] [timer.py:199:stop] epoch=0/micro_step=3270/global_step=3270, RunningAvgSamplesPerSec=105.86868857095156, CurrSamplesPerSec=97.09289883127744, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:48:04,721] [INFO] [logging.py:96:log_dist] [Rank 0] step=3270, skipped=49, lr=[3.4022065961370106e-06, 3.4022065961370106e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3269|ppo_ep: 1|act_loss: -0.00811004638671875|cri_loss: -0.003635406494140625|unsuper_loss: 0.0
+average reward score: 5.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.86%) |Training time=0.49s (22.63%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3270|ppo_ep: 1|act_loss: 0.01363372802734375|cri_loss: 0.007175445556640625|unsuper_loss: 0.0
+average reward score: 5.62890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.95%) |Training time=0.49s (22.52%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3271|ppo_ep: 1|act_loss: 0.05487060546875|cri_loss: 0.0282440185546875|unsuper_loss: 0.0
+average reward score: 5.11328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.09%) |Training time=0.49s (22.40%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3272|ppo_ep: 1|act_loss: -0.015411376953125|cri_loss: -0.007541656494140625|unsuper_loss: 0.0
+average reward score: 5.40234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.58s (70.69%) |Training time=0.50s (22.16%) |Others=0.16 (7.16%)|CurSamplesPerSec=14.30 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3273|ppo_ep: 1|act_loss: 0.034271240234375|cri_loss: 0.0180206298828125|unsuper_loss: 0.0
+average reward score: 5.1171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.90%) |Training time=0.50s (22.62%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3274|ppo_ep: 1|act_loss: 0.0107421875|cri_loss: 0.006000518798828125|unsuper_loss: 0.0
+average reward score: 5.23046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.31%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3275|ppo_ep: 1|act_loss: 0.02215576171875|cri_loss: 0.011566162109375|unsuper_loss: 0.0
+average reward score: 5.30859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.59%) |Training time=0.50s (22.81%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3276|ppo_ep: 1|act_loss: -0.01959228515625|cri_loss: -0.009613037109375|unsuper_loss: 0.0
+average reward score: 5.484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.90%) |Training time=0.49s (22.55%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3277|ppo_ep: 1|act_loss: 0.0102691650390625|cri_loss: 0.00539398193359375|unsuper_loss: 0.0
+average reward score: 5.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.72%) |Training time=0.50s (22.81%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3278|ppo_ep: 1|act_loss: 0.01190185546875|cri_loss: 0.0086517333984375|unsuper_loss: 0.0
+average reward score: 4.77734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.68s (73.95%) |Training time=0.49s (21.68%) |Others=0.10 (4.38%)|CurSamplesPerSec=14.05 |AvgSamplesPerSec=14.46
+[2023-04-14 10:48:26,538] [INFO] [logging.py:96:log_dist] [Rank 0] step=3280, skipped=43, lr=[6.538506863153967e-06, 6.538506863153967e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:48:26,556] [INFO] [timer.py:199:stop] epoch=0/micro_step=3280/global_step=3280, RunningAvgSamplesPerSec=105.8395083408653, CurrSamplesPerSec=98.7977475322228, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:48:26,649] [INFO] [logging.py:96:log_dist] [Rank 0] step=3280, skipped=49, lr=[3.3932235696979367e-06, 3.3932235696979367e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3279|ppo_ep: 1|act_loss: 0.033966064453125|cri_loss: 0.01898193359375|unsuper_loss: 0.0
+average reward score: 5.12890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.02%) |Training time=0.49s (22.47%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3280|ppo_ep: 1|act_loss: -0.03369140625|cri_loss: -0.0164642333984375|unsuper_loss: 0.0
+average reward score: 5.21484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.85%) |Training time=0.49s (22.62%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3281|ppo_ep: 1|act_loss: -0.0006170272827148438|cri_loss: -0.00013208389282226562|unsuper_loss: 0.0
+average reward score: 5.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.95%) |Training time=0.49s (22.39%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3282|ppo_ep: 1|act_loss: 3.743171691894531e-05|cri_loss: 0.00023567676544189453|unsuper_loss: 0.0
+average reward score: 5.03515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.49s (22.42%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3283|ppo_ep: 1|act_loss: 0.01375579833984375|cri_loss: 0.007175445556640625|unsuper_loss: 0.0
+average reward score: 5.859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.83%) |Training time=0.49s (22.68%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3284|ppo_ep: 1|act_loss: 0.027130126953125|cri_loss: 0.01393890380859375|unsuper_loss: 0.0
+average reward score: 5.32421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.32%) |Training time=0.49s (22.14%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3285|ppo_ep: 1|act_loss: 0.01788330078125|cri_loss: 0.0091400146484375|unsuper_loss: 0.0
+average reward score: 6.1328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.21%) |Training time=0.49s (22.29%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3286|ppo_ep: 1|act_loss: 0.0101470947265625|cri_loss: 0.005603790283203125|unsuper_loss: 0.0
+average reward score: 5.80078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.10%) |Training time=0.46s (21.16%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3287|ppo_ep: 1|act_loss: -0.020721435546875|cri_loss: -0.008544921875|unsuper_loss: 0.0
+average reward score: 4.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.43s |Gather latency=0.00s (0.00%) |Generate time=1.77s (72.74%) |Training time=0.50s (20.42%) |Others=0.17 (6.83%)|CurSamplesPerSec=13.15 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3288|ppo_ep: 1|act_loss: -0.0085296630859375|cri_loss: -0.00321197509765625|unsuper_loss: 0.0
+average reward score: 5.203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.99%) |Training time=0.47s (21.47%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+[2023-04-14 10:48:48,531] [INFO] [logging.py:96:log_dist] [Rank 0] step=3290, skipped=43, lr=[6.521128861354213e-06, 6.521128861354213e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:48:48,549] [INFO] [timer.py:199:stop] epoch=0/micro_step=3290/global_step=3290, RunningAvgSamplesPerSec=105.82224074120158, CurrSamplesPerSec=108.69055545522937, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:48:48,641] [INFO] [logging.py:96:log_dist] [Rank 0] step=3290, skipped=49, lr=[3.384227303530776e-06, 3.384227303530776e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3289|ppo_ep: 1|act_loss: -0.001251220703125|cri_loss: -0.00023698806762695312|unsuper_loss: 0.0
+average reward score: 4.7421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.28%) |Training time=0.46s (21.19%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3290|ppo_ep: 1|act_loss: -0.016754150390625|cri_loss: -0.007965087890625|unsuper_loss: 0.0
+average reward score: 4.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.31%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3291|ppo_ep: 1|act_loss: 0.060546875|cri_loss: 0.032806396484375|unsuper_loss: 0.0
+average reward score: 5.28515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.13%) |Training time=0.46s (21.25%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3292|ppo_ep: 1|act_loss: 0.0125274658203125|cri_loss: 0.00705718994140625|unsuper_loss: 0.0
+average reward score: 5.5546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.91%) |Training time=0.47s (21.55%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3293|ppo_ep: 1|act_loss: 0.06329345703125|cri_loss: 0.032257080078125|unsuper_loss: 0.0
+average reward score: 5.08203125
+-------------------------------------------------------------------------------------
+|E2E latency=3.53s |Gather latency=0.00s (0.00%) |Generate time=1.60s (45.37%) |Training time=0.46s (13.04%) |Others=1.47 (41.59%)|CurSamplesPerSec=9.07 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3294|ppo_ep: 1|act_loss: 0.00348663330078125|cri_loss: 0.00228118896484375|unsuper_loss: 0.0
+average reward score: 4.83984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.46%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3295|ppo_ep: 1|act_loss: 0.033905029296875|cri_loss: 0.0184326171875|unsuper_loss: 0.0
+average reward score: 5.15234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.28%) |Training time=0.46s (21.15%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3296|ppo_ep: 1|act_loss: -0.00469970703125|cri_loss: -0.0013952255249023438|unsuper_loss: 0.0
+average reward score: 5.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.46s (21.29%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3297|ppo_ep: 1|act_loss: 0.0008997917175292969|cri_loss: 0.0009350776672363281|unsuper_loss: 0.0
+average reward score: 4.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.13%) |Training time=0.46s (21.32%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3298|ppo_ep: 1|act_loss: -0.03765869140625|cri_loss: -0.0171966552734375|unsuper_loss: 0.0
+average reward score: 4.66015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.64%) |Training time=0.48s (21.85%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.46
+[2023-04-14 10:49:11,545] [INFO] [logging.py:96:log_dist] [Rank 0] step=3300, skipped=43, lr=[6.503725718833046e-06, 6.503725718833046e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:49:11,564] [INFO] [timer.py:199:stop] epoch=0/micro_step=3300/global_step=3300, RunningAvgSamplesPerSec=105.82462227179246, CurrSamplesPerSec=108.39885509746532, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:49:11,656] [INFO] [logging.py:96:log_dist] [Rank 0] step=3300, skipped=49, lr=[3.375217930981894e-06, 3.375217930981894e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3299|ppo_ep: 1|act_loss: -0.0273895263671875|cri_loss: -0.013458251953125|unsuper_loss: 0.0
+average reward score: 5.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.31%) |Training time=0.46s (21.15%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3300|ppo_ep: 1|act_loss: 0.088134765625|cri_loss: 0.050750732421875|unsuper_loss: 0.0
+average reward score: 5.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.04%) |Training time=0.46s (21.41%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3301|ppo_ep: 1|act_loss: 0.0094757080078125|cri_loss: 0.00507354736328125|unsuper_loss: 0.0
+average reward score: 5.82421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.07%) |Training time=0.46s (21.40%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3302|ppo_ep: 1|act_loss: 0.05609130859375|cri_loss: 0.029510498046875|unsuper_loss: 0.0
+average reward score: 5.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.17%) |Training time=0.46s (19.63%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3303|ppo_ep: 1|act_loss: 0.0234527587890625|cri_loss: 0.0122833251953125|unsuper_loss: 0.0
+average reward score: 4.52734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.40%) |Training time=0.46s (21.08%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3304|ppo_ep: 1|act_loss: 0.040771484375|cri_loss: 0.0233306884765625|unsuper_loss: 0.0
+average reward score: 4.78515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.89%) |Training time=0.47s (21.57%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3305|ppo_ep: 1|act_loss: -0.0093994140625|cri_loss: -0.003631591796875|unsuper_loss: 0.0
+average reward score: 4.43359375
+-------------------------------------------------------------------------------------
+|E2E latency=3.12s |Gather latency=0.00s (0.00%) |Generate time=1.61s (51.71%) |Training time=0.46s (14.70%) |Others=1.05 (33.59%)|CurSamplesPerSec=10.25 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3306|ppo_ep: 1|act_loss: -0.03729248046875|cri_loss: -0.0177459716796875|unsuper_loss: 0.0
+average reward score: 4.79296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.68s (73.49%) |Training time=0.51s (22.17%) |Others=0.10 (4.34%)|CurSamplesPerSec=14.01 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3307|ppo_ep: 1|act_loss: -0.025238037109375|cri_loss: -0.0121307373046875|unsuper_loss: 0.0
+average reward score: 4.80078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.03%) |Training time=0.46s (21.43%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3308|ppo_ep: 1|act_loss: -0.038482666015625|cri_loss: -0.018707275390625|unsuper_loss: 0.0
+average reward score: 4.92578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.08%) |Training time=0.46s (21.37%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+[2023-04-14 10:49:34,441] [INFO] [logging.py:96:log_dist] [Rank 0] step=3310, skipped=43, lr=[6.4862976935470215e-06, 6.4862976935470215e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:49:34,459] [INFO] [timer.py:199:stop] epoch=0/micro_step=3310/global_step=3310, RunningAvgSamplesPerSec=105.82343541265251, CurrSamplesPerSec=106.25190725481177, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:49:34,552] [INFO] [logging.py:96:log_dist] [Rank 0] step=3310, skipped=49, lr=[3.366195585591927e-06, 3.366195585591927e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3309|ppo_ep: 1|act_loss: -0.03485107421875|cri_loss: -0.0152740478515625|unsuper_loss: 0.0
+average reward score: 4.28125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.42%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+[2023-04-14 10:49:36,594] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+[2023-04-14 10:49:36,677] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 3310|ppo_ep: 1|act_loss: 0.00023245811462402344|cri_loss: 0.0003600120544433594|unsuper_loss: 0.0
+average reward score: 5.21875
+-------------------------------------------------------------------------------------
+|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.50%) |Training time=0.43s (20.39%) |Others=0.09 (4.11%)|CurSamplesPerSec=15.07 |AvgSamplesPerSec=14.46
+[2023-04-14 10:49:38,737] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+[2023-04-14 10:49:38,820] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 3311|ppo_ep: 1|act_loss: -0.0281982421875|cri_loss: -0.01326751708984375|unsuper_loss: 0.0
+average reward score: 5.37890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.75%) |Training time=0.43s (20.17%) |Others=0.09 (4.09%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3312|ppo_ep: 1|act_loss: -0.026275634765625|cri_loss: -0.01253509521484375|unsuper_loss: 0.0
+average reward score: 5.015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.12%) |Training time=0.46s (21.25%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3313|ppo_ep: 1|act_loss: 0.033477783203125|cri_loss: 0.017578125|unsuper_loss: 0.0
+average reward score: 4.15625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.41%) |Training time=0.45s (21.00%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3314|ppo_ep: 1|act_loss: 0.049713134765625|cri_loss: 0.0262298583984375|unsuper_loss: 0.0
+average reward score: 4.9296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.64%) |Training time=0.47s (21.81%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3315|ppo_ep: 1|act_loss: -0.009429931640625|cri_loss: -0.004192352294921875|unsuper_loss: 0.0
+average reward score: 6.03515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.95%) |Training time=0.46s (21.41%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3316|ppo_ep: 1|act_loss: 0.0149383544921875|cri_loss: 0.007770538330078125|unsuper_loss: 0.0
+average reward score: 4.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.60s (70.84%) |Training time=0.48s (21.38%) |Others=0.18 (7.78%)|CurSamplesPerSec=14.16 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3317|ppo_ep: 1|act_loss: -0.0093841552734375|cri_loss: -0.004528045654296875|unsuper_loss: 0.0
+average reward score: 5.19921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.95%) |Training time=0.47s (21.42%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3318|ppo_ep: 1|act_loss: 0.027679443359375|cri_loss: 0.01447296142578125|unsuper_loss: 0.0
+average reward score: 4.43359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.55%) |Training time=0.48s (21.90%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+[2023-04-14 10:49:56,165] [INFO] [logging.py:96:log_dist] [Rank 0] step=3320, skipped=45, lr=[6.472337531309861e-06, 6.472337531309861e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:49:56,578] [INFO] [timer.py:199:stop] epoch=0/micro_step=3320/global_step=3320, RunningAvgSamplesPerSec=105.78531663292775, CurrSamplesPerSec=45.46805004077017, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:49:56,671] [INFO] [logging.py:96:log_dist] [Rank 0] step=3320, skipped=51, lr=[3.358968458696486e-06, 3.358968458696486e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3319|ppo_ep: 1|act_loss: -0.0241241455078125|cri_loss: -0.00957489013671875|unsuper_loss: 0.0
+average reward score: 4.80078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.57s |Gather latency=0.00s (0.00%) |Generate time=1.60s (62.44%) |Training time=0.87s (33.74%) |Others=0.10 (3.82%)|CurSamplesPerSec=12.46 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3320|ppo_ep: 1|act_loss: -0.02484130859375|cri_loss: -0.01227569580078125|unsuper_loss: 0.0
+average reward score: 5.1328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.58%) |Training time=0.47s (21.87%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3321|ppo_ep: 1|act_loss: -0.024169921875|cri_loss: -0.01117706298828125|unsuper_loss: 0.0
+average reward score: 4.96484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.60%) |Training time=0.47s (21.83%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3322|ppo_ep: 1|act_loss: -0.0293121337890625|cri_loss: -0.0143890380859375|unsuper_loss: 0.0
+average reward score: 5.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.53%) |Training time=0.48s (21.91%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3323|ppo_ep: 1|act_loss: 0.0419921875|cri_loss: 0.022247314453125|unsuper_loss: 0.0
+average reward score: 5.13671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.62%) |Training time=0.47s (21.85%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3324|ppo_ep: 1|act_loss: 0.067626953125|cri_loss: 0.0350341796875|unsuper_loss: 0.0
+average reward score: 5.4296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.67%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3325|ppo_ep: 1|act_loss: -0.007579803466796875|cri_loss: -0.003299713134765625|unsuper_loss: 0.0
+average reward score: 6.6953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.94%) |Training time=0.47s (21.49%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3326|ppo_ep: 1|act_loss: 0.01213836669921875|cri_loss: 0.006786346435546875|unsuper_loss: 0.0
+average reward score: 5.61328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.47s (21.45%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3327|ppo_ep: 1|act_loss: -0.00235748291015625|cri_loss: -0.0010938644409179688|unsuper_loss: 0.0
+average reward score: 5.87109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.05%) |Training time=0.46s (21.27%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3328|ppo_ep: 1|act_loss: -0.007213592529296875|cri_loss: -0.003448486328125|unsuper_loss: 0.0
+average reward score: 5.1953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.16%) |Training time=0.44s (20.27%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+[2023-04-14 10:50:18,251] [INFO] [logging.py:96:log_dist] [Rank 0] step=3330, skipped=45, lr=[6.454865368272448e-06, 6.454865368272448e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:50:18,270] [INFO] [timer.py:199:stop] epoch=0/micro_step=3330/global_step=3330, RunningAvgSamplesPerSec=105.78267192941124, CurrSamplesPerSec=102.44838222396594, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:50:18,363] [INFO] [logging.py:96:log_dist] [Rank 0] step=3330, skipped=51, lr=[3.3499230993274857e-06, 3.3499230993274857e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3329|ppo_ep: 1|act_loss: -0.0174560546875|cri_loss: -0.00833892822265625|unsuper_loss: 0.0
+average reward score: 4.8203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.59%) |Training time=0.48s (21.86%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3330|ppo_ep: 1|act_loss: 0.0152130126953125|cri_loss: 0.00783538818359375|unsuper_loss: 0.0
+average reward score: 5.546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.17%) |Training time=0.47s (21.28%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3331|ppo_ep: 1|act_loss: -0.003993988037109375|cri_loss: -0.0018301010131835938|unsuper_loss: 0.0
+average reward score: 5.2109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.87%) |Training time=0.47s (21.59%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3332|ppo_ep: 1|act_loss: 0.0043182373046875|cri_loss: 0.002300262451171875|unsuper_loss: 0.0
+average reward score: 5.41015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.66%) |Training time=0.47s (20.11%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.70 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3333|ppo_ep: 1|act_loss: 0.006855010986328125|cri_loss: 0.00409698486328125|unsuper_loss: 0.0
+average reward score: 5.30859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.62%) |Training time=0.47s (21.79%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3334|ppo_ep: 1|act_loss: -0.03057861328125|cri_loss: -0.014862060546875|unsuper_loss: 0.0
+average reward score: 5.078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.82%) |Training time=0.47s (21.64%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3335|ppo_ep: 1|act_loss: -0.01727294921875|cri_loss: -0.007293701171875|unsuper_loss: 0.0
+average reward score: 5.046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.50s |Gather latency=0.00s (0.00%) |Generate time=1.64s (65.62%) |Training time=0.47s (18.62%) |Others=0.39 (15.76%)|CurSamplesPerSec=12.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3336|ppo_ep: 1|act_loss: -0.0207061767578125|cri_loss: -0.0089874267578125|unsuper_loss: 0.0
+average reward score: 5.515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.39%) |Training time=0.48s (22.03%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3337|ppo_ep: 1|act_loss: -0.00792694091796875|cri_loss: -0.0018768310546875|unsuper_loss: 0.0
+average reward score: 4.9453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.46%) |Training time=0.48s (21.97%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3338|ppo_ep: 1|act_loss: -0.0087890625|cri_loss: -0.004047393798828125|unsuper_loss: 0.0
+average reward score: 5.15625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.09%) |Training time=0.47s (21.38%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.46
+[2023-04-14 10:50:40,513] [INFO] [logging.py:96:log_dist] [Rank 0] step=3340, skipped=45, lr=[6.437369046698439e-06, 6.437369046698439e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:50:40,532] [INFO] [timer.py:199:stop] epoch=0/micro_step=3340/global_step=3340, RunningAvgSamplesPerSec=105.77579222271166, CurrSamplesPerSec=102.9543915524116, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:50:40,626] [INFO] [logging.py:96:log_dist] [Rank 0] step=3340, skipped=51, lr=[3.3408651420478265e-06, 3.3408651420478265e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3339|ppo_ep: 1|act_loss: 0.002201080322265625|cri_loss: 0.0015249252319335938|unsuper_loss: 0.0
+average reward score: 5.5
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.63%) |Training time=0.47s (21.78%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3340|ppo_ep: 1|act_loss: 0.00557708740234375|cri_loss: 0.004730224609375|unsuper_loss: 0.0
+average reward score: 5.35546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.32%) |Training time=0.46s (21.10%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3341|ppo_ep: 1|act_loss: 0.05126953125|cri_loss: 0.0268707275390625|unsuper_loss: 0.0
+average reward score: 5.6640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.62%) |Training time=0.47s (21.85%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3342|ppo_ep: 1|act_loss: 0.00695037841796875|cri_loss: 0.004302978515625|unsuper_loss: 0.0
+average reward score: 4.93359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.22%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3343|ppo_ep: 1|act_loss: -0.030364990234375|cri_loss: -0.01422882080078125|unsuper_loss: 0.0
+average reward score: 5.19140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.30%) |Training time=0.46s (21.11%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3344|ppo_ep: 1|act_loss: -0.0138092041015625|cri_loss: -0.006580352783203125|unsuper_loss: 0.0
+average reward score: 5.484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.90%) |Training time=0.49s (22.55%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3345|ppo_ep: 1|act_loss: -0.01995849609375|cri_loss: -0.0095062255859375|unsuper_loss: 0.0
+average reward score: 5.5703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.00%) |Training time=0.49s (22.43%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3346|ppo_ep: 1|act_loss: -0.03643798828125|cri_loss: -0.017333984375|unsuper_loss: 0.0
+average reward score: 5.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.88%) |Training time=0.49s (20.90%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3347|ppo_ep: 1|act_loss: -0.01500701904296875|cri_loss: -0.00693511962890625|unsuper_loss: 0.0
+average reward score: 6.03125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.25%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3348|ppo_ep: 1|act_loss: -0.028533935546875|cri_loss: -0.0125885009765625|unsuper_loss: 0.0
+average reward score: 6.01171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.93%) |Training time=0.49s (22.50%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+[2023-04-14 10:51:02,346] [INFO] [logging.py:96:log_dist] [Rank 0] step=3350, skipped=45, lr=[6.41984882592553e-06, 6.41984882592553e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:51:02,364] [INFO] [timer.py:199:stop] epoch=0/micro_step=3350/global_step=3350, RunningAvgSamplesPerSec=105.76279625643068, CurrSamplesPerSec=96.08421076745537, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:51:02,457] [INFO] [logging.py:96:log_dist] [Rank 0] step=3350, skipped=51, lr=[3.3317947211182846e-06, 3.3317947211182846e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3349|ppo_ep: 1|act_loss: 0.027587890625|cri_loss: 0.0145263671875|unsuper_loss: 0.0
+average reward score: 5.62890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.73%) |Training time=0.50s (22.78%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3350|ppo_ep: 1|act_loss: 0.043243408203125|cri_loss: 0.0227203369140625|unsuper_loss: 0.0
+average reward score: 5.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.06%) |Training time=0.48s (22.39%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3351|ppo_ep: 1|act_loss: 0.0242156982421875|cri_loss: 0.0243988037109375|unsuper_loss: 0.0
+average reward score: 6.07421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.77%) |Training time=0.49s (22.72%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3352|ppo_ep: 1|act_loss: -0.011138916015625|cri_loss: -0.00481414794921875|unsuper_loss: 0.0
+average reward score: 4.90625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.36%) |Training time=0.51s (23.15%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3353|ppo_ep: 1|act_loss: -0.0261077880859375|cri_loss: -0.01076507568359375|unsuper_loss: 0.0
+average reward score: 5.7578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.93%) |Training time=0.49s (22.52%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3354|ppo_ep: 1|act_loss: 0.0078125|cri_loss: 0.004383087158203125|unsuper_loss: 0.0
+average reward score: 4.98046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.92%) |Training time=0.47s (21.52%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3355|ppo_ep: 1|act_loss: -0.020904541015625|cri_loss: -0.0100555419921875|unsuper_loss: 0.0
+average reward score: 5.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.61s (68.06%) |Training time=0.46s (19.38%) |Others=0.30 (12.56%)|CurSamplesPerSec=13.51 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3356|ppo_ep: 1|act_loss: 0.001373291015625|cri_loss: 0.00147247314453125|unsuper_loss: 0.0
+average reward score: 5.21875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.69%) |Training time=0.41s (18.72%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3357|ppo_ep: 1|act_loss: 0.03924560546875|cri_loss: 0.02197265625|unsuper_loss: 0.0
+average reward score: 3.931640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.27%) |Training time=0.46s (21.20%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3358|ppo_ep: 1|act_loss: 0.00650787353515625|cri_loss: 0.0037593841552734375|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.35%) |Training time=0.48s (22.11%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.46
+[2023-04-14 10:51:24,302] [INFO] [logging.py:96:log_dist] [Rank 0] step=3360, skipped=45, lr=[6.402304965645658e-06, 6.402304965645658e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:51:24,320] [INFO] [timer.py:199:stop] epoch=0/micro_step=3360/global_step=3360, RunningAvgSamplesPerSec=105.75462987127783, CurrSamplesPerSec=103.27904435641491, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:51:24,413] [INFO] [logging.py:96:log_dist] [Rank 0] step=3360, skipped=51, lr=[3.32271197098438e-06, 3.32271197098438e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3359|ppo_ep: 1|act_loss: -0.0139617919921875|cri_loss: -0.00681304931640625|unsuper_loss: 0.0
+average reward score: 5.40625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.69%) |Training time=0.47s (21.79%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3360|ppo_ep: 1|act_loss: 0.014404296875|cri_loss: 0.00774383544921875|unsuper_loss: 0.0
+average reward score: 4.80078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.28%) |Training time=0.46s (21.18%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3361|ppo_ep: 1|act_loss: -0.02081298828125|cri_loss: -0.0099029541015625|unsuper_loss: 0.0
+average reward score: 4.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.38%) |Training time=0.46s (19.43%) |Others=0.10 (4.19%)|CurSamplesPerSec=13.65 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3362|ppo_ep: 1|act_loss: 0.01715087890625|cri_loss: 0.0088348388671875|unsuper_loss: 0.0
+average reward score: 6.21875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.65%) |Training time=0.47s (21.82%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3363|ppo_ep: 1|act_loss: 0.0213470458984375|cri_loss: 0.01126861572265625|unsuper_loss: 0.0
+average reward score: 4.8359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.58%) |Training time=0.48s (21.91%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3364|ppo_ep: 1|act_loss: -0.0362548828125|cri_loss: -0.0171661376953125|unsuper_loss: 0.0
+average reward score: 4.921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.91%) |Training time=0.48s (21.63%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3365|ppo_ep: 1|act_loss: -0.009552001953125|cri_loss: -0.00391387939453125|unsuper_loss: 0.0
+average reward score: 5.21875
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.21%) |Training time=0.49s (21.48%) |Others=0.10 (4.31%)|CurSamplesPerSec=14.01 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3366|ppo_ep: 1|act_loss: 0.05206298828125|cri_loss: 0.02874755859375|unsuper_loss: 0.0
+average reward score: 5.08984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.27%) |Training time=0.48s (22.21%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3367|ppo_ep: 1|act_loss: 0.013092041015625|cri_loss: 0.007740020751953125|unsuper_loss: 0.0
+average reward score: 5.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.85%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3368|ppo_ep: 1|act_loss: 0.0022411346435546875|cri_loss: 0.0022373199462890625|unsuper_loss: 0.0
+average reward score: 5.6640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.51%) |Training time=0.48s (21.94%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+[2023-04-14 10:51:46,316] [INFO] [logging.py:96:log_dist] [Rank 0] step=3370, skipped=45, lr=[6.384737725901154e-06, 6.384737725901154e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:51:46,334] [INFO] [timer.py:199:stop] epoch=0/micro_step=3370/global_step=3370, RunningAvgSamplesPerSec=105.74734784413464, CurrSamplesPerSec=107.7754519627477, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:51:46,426] [INFO] [logging.py:96:log_dist] [Rank 0] step=3370, skipped=51, lr=[3.3136170262743816e-06, 3.3136170262743816e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3369|ppo_ep: 1|act_loss: -0.03704833984375|cri_loss: -0.016998291015625|unsuper_loss: 0.0
+average reward score: 4.9765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.46s (21.27%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3370|ppo_ep: 1|act_loss: 0.003391265869140625|cri_loss: 0.0019388198852539062|unsuper_loss: 0.0
+average reward score: 4.89453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.47s |Gather latency=0.00s (0.00%) |Generate time=1.60s (64.53%) |Training time=0.47s (18.98%) |Others=0.41 (16.49%)|CurSamplesPerSec=12.93 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3371|ppo_ep: 1|act_loss: 0.030426025390625|cri_loss: 0.0155181884765625|unsuper_loss: 0.0
+average reward score: 5.2109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.47s (21.48%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3372|ppo_ep: 1|act_loss: -0.01071929931640625|cri_loss: -0.004619598388671875|unsuper_loss: 0.0
+average reward score: 5.71875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.98%) |Training time=0.46s (21.37%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3373|ppo_ep: 1|act_loss: -0.0019855499267578125|cri_loss: -0.00032901763916015625|unsuper_loss: 0.0
+average reward score: 4.890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.84%) |Training time=0.50s (22.65%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3374|ppo_ep: 1|act_loss: 0.030792236328125|cri_loss: 0.018341064453125|unsuper_loss: 0.0
+average reward score: 5.0078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.01%) |Training time=0.49s (22.41%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3375|ppo_ep: 1|act_loss: -0.026336669921875|cri_loss: -0.01236724853515625|unsuper_loss: 0.0
+average reward score: 4.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.59%) |Training time=0.45s (20.88%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3376|ppo_ep: 1|act_loss: 0.0124359130859375|cri_loss: 0.00743865966796875|unsuper_loss: 0.0
+average reward score: 5.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.66s |Gather latency=0.00s (0.00%) |Generate time=1.79s (67.30%) |Training time=0.47s (17.60%) |Others=0.40 (15.10%)|CurSamplesPerSec=12.01 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3377|ppo_ep: 1|act_loss: 0.01904296875|cri_loss: 0.010223388671875|unsuper_loss: 0.0
+average reward score: 5.49609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.68s (77.47%) |Training time=0.39s (17.97%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3378|ppo_ep: 1|act_loss: 0.022216796875|cri_loss: 0.01214599609375|unsuper_loss: 0.0
+average reward score: 4.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.22%) |Training time=0.46s (21.19%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+[2023-04-14 10:52:08,821] [INFO] [logging.py:96:log_dist] [Rank 0] step=3380, skipped=45, lr=[6.367147367080889e-06, 6.367147367080889e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:52:08,839] [INFO] [timer.py:199:stop] epoch=0/micro_step=3380/global_step=3380, RunningAvgSamplesPerSec=105.75309889489947, CurrSamplesPerSec=113.50777958381468, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:52:08,932] [INFO] [logging.py:96:log_dist] [Rank 0] step=3380, skipped=51, lr=[3.3045100217973093e-06, 3.3045100217973093e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3379|ppo_ep: 1|act_loss: -0.0112152099609375|cri_loss: -0.005474090576171875|unsuper_loss: 0.0
+average reward score: 4.94140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.84%) |Training time=0.45s (20.59%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3380|ppo_ep: 1|act_loss: 0.055023193359375|cri_loss: 0.028564453125|unsuper_loss: 0.0
+average reward score: 4.8984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.38%) |Training time=0.43s (20.03%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3381|ppo_ep: 1|act_loss: 0.0017099380493164062|cri_loss: 0.00150299072265625|unsuper_loss: 0.0
+average reward score: 5.7109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.80%) |Training time=0.45s (20.65%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3382|ppo_ep: 1|act_loss: 0.032318115234375|cri_loss: 0.016998291015625|unsuper_loss: 0.0
+average reward score: 4.984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.63s |Gather latency=0.00s (0.00%) |Generate time=1.66s (63.03%) |Training time=0.41s (15.45%) |Others=0.57 (21.51%)|CurSamplesPerSec=12.17 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3383|ppo_ep: 1|act_loss: -0.01104736328125|cri_loss: -0.005268096923828125|unsuper_loss: 0.0
+average reward score: 5.41015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.86%) |Training time=0.42s (19.56%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3384|ppo_ep: 1|act_loss: -0.00569915771484375|cri_loss: -0.00243377685546875|unsuper_loss: 0.0
+average reward score: 5.8046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.71%) |Training time=0.47s (21.59%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3385|ppo_ep: 1|act_loss: -7.152557373046875e-05|cri_loss: 0.0007443428039550781|unsuper_loss: 0.0
+average reward score: 5.109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.72%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3386|ppo_ep: 1|act_loss: -0.0491943359375|cri_loss: -0.0230255126953125|unsuper_loss: 0.0
+average reward score: 5.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.90%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3387|ppo_ep: 1|act_loss: -0.036865234375|cri_loss: -0.0172271728515625|unsuper_loss: 0.0
+average reward score: 5.65234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.95%) |Training time=0.49s (22.51%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3388|ppo_ep: 1|act_loss: -0.03228759765625|cri_loss: -0.01568603515625|unsuper_loss: 0.0
+average reward score: 5.3828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.44s |Gather latency=0.00s (0.00%) |Generate time=1.59s (65.28%) |Training time=0.46s (18.96%) |Others=0.38 (15.76%)|CurSamplesPerSec=13.11 |AvgSamplesPerSec=14.46
+[2023-04-14 10:52:31,218] [INFO] [logging.py:96:log_dist] [Rank 0] step=3390, skipped=45, lr=[6.349534149916417e-06, 6.349534149916417e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:52:31,236] [INFO] [timer.py:199:stop] epoch=0/micro_step=3390/global_step=3390, RunningAvgSamplesPerSec=105.76502510812344, CurrSamplesPerSec=106.6209268758078, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:52:31,328] [INFO] [logging.py:96:log_dist] [Rank 0] step=3390, skipped=51, lr=[3.2953910925409387e-06, 3.2953910925409387e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3389|ppo_ep: 1|act_loss: -0.0055389404296875|cri_loss: -0.002254486083984375|unsuper_loss: 0.0
+average reward score: 5.296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.52%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3390|ppo_ep: 1|act_loss: -0.0277252197265625|cri_loss: -0.01322174072265625|unsuper_loss: 0.0
+average reward score: 5.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.37%) |Training time=0.45s (21.01%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3391|ppo_ep: 1|act_loss: -0.03997802734375|cri_loss: -0.018768310546875|unsuper_loss: 0.0
+average reward score: 4.984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.69%) |Training time=0.47s (20.06%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3392|ppo_ep: 1|act_loss: 0.0065765380859375|cri_loss: 0.0040283203125|unsuper_loss: 0.0
+average reward score: 5.6171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.41%) |Training time=0.45s (21.01%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3393|ppo_ep: 1|act_loss: 0.013397216796875|cri_loss: 0.007022857666015625|unsuper_loss: 0.0
+average reward score: 5.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.80%) |Training time=0.46s (20.74%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.54 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3394|ppo_ep: 1|act_loss: 0.007198333740234375|cri_loss: 0.00415802001953125|unsuper_loss: 0.0
+average reward score: 5.375
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.72%) |Training time=0.48s (20.94%) |Others=0.10 (4.34%)|CurSamplesPerSec=14.09 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3395|ppo_ep: 1|act_loss: 0.0068206787109375|cri_loss: 0.004573822021484375|unsuper_loss: 0.0
+average reward score: 5.94921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.83%) |Training time=0.46s (21.56%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3396|ppo_ep: 1|act_loss: 0.0765380859375|cri_loss: 0.0452880859375|unsuper_loss: 0.0
+average reward score: 5.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.73%) |Training time=0.47s (21.68%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3397|ppo_ep: 1|act_loss: 0.0015411376953125|cri_loss: 0.001132965087890625|unsuper_loss: 0.0
+average reward score: 5.71875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.75%) |Training time=0.46s (21.66%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3398|ppo_ep: 1|act_loss: -0.00408935546875|cri_loss: -0.0012340545654296875|unsuper_loss: 0.0
+average reward score: 5.08984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.54%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
+[2023-04-14 10:52:53,057] [INFO] [logging.py:96:log_dist] [Rank 0] step=3400, skipped=45, lr=[6.331898335478102e-06, 6.331898335478102e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:52:53,075] [INFO] [timer.py:199:stop] epoch=0/micro_step=3400/global_step=3400, RunningAvgSamplesPerSec=105.76780238263315, CurrSamplesPerSec=106.7389575160306, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:52:53,167] [INFO] [logging.py:96:log_dist] [Rank 0] step=3400, skipped=51, lr=[3.2862603736698014e-06, 3.2862603736698014e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3399|ppo_ep: 1|act_loss: -0.0025119781494140625|cri_loss: 0.00067901611328125|unsuper_loss: 0.0
+average reward score: 5.29296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.89%) |Training time=0.46s (21.55%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3400|ppo_ep: 1|act_loss: 0.01654052734375|cri_loss: 0.0099945068359375|unsuper_loss: 0.0
+average reward score: 4.66015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.50s |Gather latency=0.00s (0.00%) |Generate time=1.59s (63.56%) |Training time=0.46s (18.35%) |Others=0.45 (18.09%)|CurSamplesPerSec=12.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3401|ppo_ep: 1|act_loss: 0.03460693359375|cri_loss: 0.0185394287109375|unsuper_loss: 0.0
+average reward score: 4.87109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.83%) |Training time=0.46s (21.59%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3402|ppo_ep: 1|act_loss: 0.0011186599731445312|cri_loss: 0.0008335113525390625|unsuper_loss: 0.0
+average reward score: 5.6796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.56%) |Training time=0.48s (21.89%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3403|ppo_ep: 1|act_loss: -0.007110595703125|cri_loss: -0.003444671630859375|unsuper_loss: 0.0
+average reward score: 5.25
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.73%) |Training time=0.47s (21.71%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3404|ppo_ep: 1|act_loss: 0.01158905029296875|cri_loss: 0.006256103515625|unsuper_loss: 0.0
+average reward score: 5.59375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.74%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3405|ppo_ep: 1|act_loss: -0.0084381103515625|cri_loss: -0.0037441253662109375|unsuper_loss: 0.0
+average reward score: 5.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.62s |Gather latency=0.00s (0.00%) |Generate time=1.59s (60.72%) |Training time=0.46s (17.66%) |Others=0.57 (21.62%)|CurSamplesPerSec=12.22 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3406|ppo_ep: 1|act_loss: 0.0013408660888671875|cri_loss: 0.0010423660278320312|unsuper_loss: 0.0
+average reward score: 5.1953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (21.99%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3407|ppo_ep: 1|act_loss: 0.00499725341796875|cri_loss: 0.0028533935546875|unsuper_loss: 0.0
+average reward score: 5.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.57%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3408|ppo_ep: 1|act_loss: -0.006114959716796875|cri_loss: -0.001445770263671875|unsuper_loss: 0.0
+average reward score: 5.77734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.61%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+[2023-04-14 10:53:15,449] [INFO] [logging.py:96:log_dist] [Rank 0] step=3410, skipped=45, lr=[6.314240185171264e-06, 6.314240185171264e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:53:15,467] [INFO] [timer.py:199:stop] epoch=0/micro_step=3410/global_step=3410, RunningAvgSamplesPerSec=105.76521709293858, CurrSamplesPerSec=104.83900745727125, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:53:15,560] [INFO] [logging.py:96:log_dist] [Rank 0] step=3410, skipped=51, lr=[3.2771180005231746e-06, 3.2771180005231746e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3409|ppo_ep: 1|act_loss: -0.02142333984375|cri_loss: -0.0105133056640625|unsuper_loss: 0.0
+average reward score: 5.875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.81%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3410|ppo_ep: 1|act_loss: -0.02252197265625|cri_loss: -0.00972747802734375|unsuper_loss: 0.0
+average reward score: 4.953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.10%) |Training time=0.46s (21.35%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3411|ppo_ep: 1|act_loss: -0.0213623046875|cri_loss: -0.0104827880859375|unsuper_loss: 0.0
+average reward score: 5.67578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.98%) |Training time=0.46s (21.46%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
+[2023-04-14 10:53:21,887] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+[2023-04-14 10:53:21,971] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 3412|ppo_ep: 1|act_loss: -0.006343841552734375|cri_loss: -0.00311279296875|unsuper_loss: 0.0
+average reward score: 6.52734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.10s |Gather latency=0.00s (0.00%) |Generate time=1.59s (75.58%) |Training time=0.43s (20.28%) |Others=0.09 (4.14%)|CurSamplesPerSec=15.20 |AvgSamplesPerSec=14.46
+[2023-04-14 10:53:23,996] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+[2023-04-14 10:53:24,080] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 3413|ppo_ep: 1|act_loss: 0.0185394287109375|cri_loss: 0.011444091796875|unsuper_loss: 0.0
+average reward score: 5.8984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.59s (75.31%) |Training time=0.43s (20.55%) |Others=0.09 (4.13%)|CurSamplesPerSec=15.17 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3414|ppo_ep: 1|act_loss: -0.0077972412109375|cri_loss: -0.0034694671630859375|unsuper_loss: 0.0
+average reward score: 5.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.74%) |Training time=0.47s (21.70%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3415|ppo_ep: 1|act_loss: -0.0147552490234375|cri_loss: -0.0070343017578125|unsuper_loss: 0.0
+average reward score: 5.66796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.21%) |Training time=0.45s (21.19%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3416|ppo_ep: 1|act_loss: -0.017578125|cri_loss: -0.0079498291015625|unsuper_loss: 0.0
+average reward score: 5.12109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.50%) |Training time=0.47s (21.94%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3417|ppo_ep: 1|act_loss: 0.03759765625|cri_loss: 0.02191162109375|unsuper_loss: 0.0
+average reward score: 4.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=3.73s |Gather latency=0.00s (0.00%) |Generate time=1.59s (42.64%) |Training time=0.46s (12.46%) |Others=1.68 (44.90%)|CurSamplesPerSec=8.58 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3418|ppo_ep: 1|act_loss: -0.01320648193359375|cri_loss: -0.005695343017578125|unsuper_loss: 0.0
+average reward score: 5.10546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.49%) |Training time=0.45s (20.94%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45
+[2023-04-14 10:53:38,444] [INFO] [logging.py:96:log_dist] [Rank 0] step=3420, skipped=47, lr=[6.300097758976281e-06, 6.300097758976281e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:53:38,462] [INFO] [timer.py:199:stop] epoch=0/micro_step=3420/global_step=3420, RunningAvgSamplesPerSec=105.77514620401836, CurrSamplesPerSec=106.69610729889406, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:53:38,555] [INFO] [logging.py:96:log_dist] [Rank 0] step=3420, skipped=53, lr=[3.2697958019858506e-06, 3.2697958019858506e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3419|ppo_ep: 1|act_loss: 0.02777099609375|cri_loss: 0.0148773193359375|unsuper_loss: 0.0
+average reward score: 4.8984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.87%) |Training time=0.46s (21.56%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3420|ppo_ep: 1|act_loss: 0.00020694732666015625|cri_loss: 0.0003859996795654297|unsuper_loss: 0.0
+average reward score: 5.90234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.21%) |Training time=0.46s (21.24%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3421|ppo_ep: 1|act_loss: -0.0121612548828125|cri_loss: -0.005523681640625|unsuper_loss: 0.0
+average reward score: 5.30859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.95%) |Training time=0.46s (19.82%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3422|ppo_ep: 1|act_loss: -0.0455322265625|cri_loss: -0.02069091796875|unsuper_loss: 0.0
+average reward score: 5.1015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.62%) |Training time=0.51s (22.96%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.42 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3423|ppo_ep: 1|act_loss: -0.022003173828125|cri_loss: -0.01036834716796875|unsuper_loss: 0.0
+average reward score: 4.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.91%) |Training time=0.46s (21.46%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3424|ppo_ep: 1|act_loss: 0.0006990432739257812|cri_loss: 0.000621795654296875|unsuper_loss: 0.0
+average reward score: 5.796875
+-------------------------------------------------------------------------------------
+|E2E latency=3.00s |Gather latency=0.00s (0.00%) |Generate time=1.59s (53.00%) |Training time=0.46s (15.49%) |Others=0.94 (31.50%)|CurSamplesPerSec=10.68 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3425|ppo_ep: 1|act_loss: -0.023040771484375|cri_loss: -0.010498046875|unsuper_loss: 0.0
+average reward score: 5.58203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.47s (21.62%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3426|ppo_ep: 1|act_loss: -0.00516510009765625|cri_loss: -0.0021610260009765625|unsuper_loss: 0.0
+average reward score: 5.96875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.91%) |Training time=0.46s (21.51%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3427|ppo_ep: 1|act_loss: 0.0189208984375|cri_loss: 0.01000213623046875|unsuper_loss: 0.0
+average reward score: 5.53515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.46s (21.61%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3428|ppo_ep: 1|act_loss: 0.0011386871337890625|cri_loss: 0.002056121826171875|unsuper_loss: 0.0
+average reward score: 4.28515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.08%) |Training time=0.46s (21.30%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.45
+[2023-04-14 10:54:01,026] [INFO] [logging.py:96:log_dist] [Rank 0] step=3430, skipped=47, lr=[6.2824000639017895e-06, 6.2824000639017895e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:54:01,044] [INFO] [timer.py:199:stop] epoch=0/micro_step=3430/global_step=3430, RunningAvgSamplesPerSec=105.77657682475913, CurrSamplesPerSec=108.1306645290452, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:54:01,137] [INFO] [logging.py:96:log_dist] [Rank 0] step=3430, skipped=53, lr=[3.2606327927484516e-06, 3.2606327927484516e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3429|ppo_ep: 1|act_loss: 0.0309295654296875|cri_loss: 0.0161285400390625|unsuper_loss: 0.0
+average reward score: 5.03125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.39%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3430|ppo_ep: 1|act_loss: -0.03143310546875|cri_loss: -0.0149078369140625|unsuper_loss: 0.0
+average reward score: 5.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.46s (21.47%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3431|ppo_ep: 1|act_loss: 0.007232666015625|cri_loss: 0.003753662109375|unsuper_loss: 0.0
+average reward score: 4.51171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.21%) |Training time=0.46s (21.23%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3432|ppo_ep: 1|act_loss: 0.0101165771484375|cri_loss: 0.00543212890625|unsuper_loss: 0.0
+average reward score: 4.859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.99%) |Training time=0.46s (21.45%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3433|ppo_ep: 1|act_loss: -0.0355224609375|cri_loss: -0.01580810546875|unsuper_loss: 0.0
+average reward score: 4.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.28%) |Training time=0.45s (21.12%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3434|ppo_ep: 1|act_loss: -0.021392822265625|cri_loss: -0.01043701171875|unsuper_loss: 0.0
+average reward score: 4.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.26%) |Training time=0.46s (21.16%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3435|ppo_ep: 1|act_loss: 0.015380859375|cri_loss: 0.0092010498046875|unsuper_loss: 0.0
+average reward score: 5.02734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.94s |Gather latency=0.00s (0.00%) |Generate time=1.76s (59.75%) |Training time=0.47s (16.12%) |Others=0.71 (24.13%)|CurSamplesPerSec=10.88 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3436|ppo_ep: 1|act_loss: -0.01514434814453125|cri_loss: -0.0072021484375|unsuper_loss: 0.0
+average reward score: 4.5234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.83%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3437|ppo_ep: 1|act_loss: -0.004932403564453125|cri_loss: -0.0020294189453125|unsuper_loss: 0.0
+average reward score: 5.94921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.48s (21.92%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3438|ppo_ep: 1|act_loss: 0.00617218017578125|cri_loss: 0.0031795501708984375|unsuper_loss: 0.0
+average reward score: 5.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.43%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
+[2023-04-14 10:54:23,363] [INFO] [logging.py:96:log_dist] [Rank 0] step=3440, skipped=47, lr=[6.264680766642565e-06, 6.264680766642565e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:54:23,375] [INFO] [timer.py:199:stop] epoch=0/micro_step=3440/global_step=3440, RunningAvgSamplesPerSec=105.77759385861118, CurrSamplesPerSec=104.42456438456774, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:54:23,467] [INFO] [logging.py:96:log_dist] [Rank 0] step=3440, skipped=53, lr=[3.251458509098137e-06, 3.251458509098137e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3439|ppo_ep: 1|act_loss: -0.005565643310546875|cri_loss: -0.002216339111328125|unsuper_loss: 0.0
+average reward score: 5.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.68%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3440|ppo_ep: 1|act_loss: -0.02606201171875|cri_loss: -0.01192474365234375|unsuper_loss: 0.0
+average reward score: 5.6640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.95%) |Training time=0.47s (21.52%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3441|ppo_ep: 1|act_loss: 0.01318359375|cri_loss: 0.00732421875|unsuper_loss: 0.0
+average reward score: 4.41015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.23%) |Training time=0.46s (21.21%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3442|ppo_ep: 1|act_loss: 0.0325927734375|cri_loss: 0.017913818359375|unsuper_loss: 0.0
+average reward score: 5.30859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.94%) |Training time=0.46s (21.51%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3443|ppo_ep: 1|act_loss: -0.014251708984375|cri_loss: -0.006694793701171875|unsuper_loss: 0.0
+average reward score: 5.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.94%) |Training time=0.46s (21.50%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3444|ppo_ep: 1|act_loss: 0.0066375732421875|cri_loss: 0.0035572052001953125|unsuper_loss: 0.0
+average reward score: 5.30859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.26%) |Training time=0.46s (21.17%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3445|ppo_ep: 1|act_loss: 0.015838623046875|cri_loss: 0.00957489013671875|unsuper_loss: 0.0
+average reward score: 5.92578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.46s (21.24%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3446|ppo_ep: 1|act_loss: -0.013275146484375|cri_loss: -0.00611114501953125|unsuper_loss: 0.0
+average reward score: 5.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.16%) |Training time=0.46s (21.28%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3447|ppo_ep: 1|act_loss: -0.018157958984375|cri_loss: -0.0085296630859375|unsuper_loss: 0.0
+average reward score: 5.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.45%) |Training time=0.46s (21.04%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3448|ppo_ep: 1|act_loss: -0.00926971435546875|cri_loss: -0.00391387939453125|unsuper_loss: 0.0
+average reward score: 5.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.96%) |Training time=0.46s (21.49%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
+[2023-04-14 10:54:44,967] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
+[2023-04-14 10:54:44,967] [INFO] [logging.py:96:log_dist] [Rank 0] step=3450, skipped=48, lr=[6.248715146308981e-06, 6.248715146308981e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:54:44,967] [INFO] [timer.py:199:stop] epoch=0/micro_step=3450/global_step=3450, RunningAvgSamplesPerSec=105.783939143326, CurrSamplesPerSec=117.16853453920088, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:54:45,059] [INFO] [logging.py:96:log_dist] [Rank 0] step=3450, skipped=53, lr=[3.2422730870199237e-06, 3.2422730870199237e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3449|ppo_ep: 1|act_loss: -0.015716552734375|cri_loss: -0.00740814208984375|unsuper_loss: 0.0
+average reward score: 5.8125
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.01%) |Training time=0.43s (20.38%) |Others=0.10 (4.60%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3450|ppo_ep: 1|act_loss: 0.03143310546875|cri_loss: 0.0171966552734375|unsuper_loss: 0.0
+average reward score: 4.921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.56s |Gather latency=0.00s (0.00%) |Generate time=1.71s (66.84%) |Training time=0.50s (19.54%) |Others=0.35 (13.62%)|CurSamplesPerSec=12.48 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3451|ppo_ep: 1|act_loss: 0.02996826171875|cri_loss: 0.01555633544921875|unsuper_loss: 0.0
+average reward score: 4.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.75%) |Training time=0.45s (19.91%) |Others=0.10 (4.34%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3452|ppo_ep: 1|act_loss: 0.035980224609375|cri_loss: 0.01849365234375|unsuper_loss: 0.0
+average reward score: 4.40625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.17%) |Training time=0.46s (21.26%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3453|ppo_ep: 1|act_loss: -0.0020885467529296875|cri_loss: -0.000255584716796875|unsuper_loss: 0.0
+average reward score: 6.40234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.46s (21.18%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3454|ppo_ep: 1|act_loss: -0.0262298583984375|cri_loss: -0.0119476318359375|unsuper_loss: 0.0
+average reward score: 4.9609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.90%) |Training time=0.47s (21.55%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3455|ppo_ep: 1|act_loss: -0.02520751953125|cri_loss: -0.01177215576171875|unsuper_loss: 0.0
+average reward score: 4.6328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.39%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3456|ppo_ep: 1|act_loss: -0.0152587890625|cri_loss: -0.0072784423828125|unsuper_loss: 0.0
+average reward score: 5.41015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.17%) |Training time=0.46s (21.27%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3457|ppo_ep: 1|act_loss: 0.0140533447265625|cri_loss: 0.00745391845703125|unsuper_loss: 0.0
+average reward score: 5.16015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.94%) |Training time=0.47s (21.52%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3458|ppo_ep: 1|act_loss: 0.0024566650390625|cri_loss: 0.0014514923095703125|unsuper_loss: 0.0
+average reward score: 5.3515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.80%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45
+[2023-04-14 10:55:07,050] [INFO] [logging.py:96:log_dist] [Rank 0] step=3460, skipped=48, lr=[6.230955528740993e-06, 6.230955528740993e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:55:07,068] [INFO] [timer.py:199:stop] epoch=0/micro_step=3460/global_step=3460, RunningAvgSamplesPerSec=105.7849063342375, CurrSamplesPerSec=107.60592251472772, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:55:07,161] [INFO] [logging.py:96:log_dist] [Rank 0] step=3460, skipped=53, lr=[3.2330766626639233e-06, 3.2330766626639233e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3459|ppo_ep: 1|act_loss: -0.0158843994140625|cri_loss: -0.006961822509765625|unsuper_loss: 0.0
+average reward score: 5.1796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.31%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3460|ppo_ep: 1|act_loss: -0.018280029296875|cri_loss: -0.00885009765625|unsuper_loss: 0.0
+average reward score: 4.515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.95%) |Training time=0.46s (21.51%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3461|ppo_ep: 1|act_loss: 0.02325439453125|cri_loss: 0.0117645263671875|unsuper_loss: 0.0
+average reward score: 5.375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.22%) |Training time=0.46s (21.22%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3462|ppo_ep: 1|act_loss: -0.0025615692138671875|cri_loss: -0.0010423660278320312|unsuper_loss: 0.0
+average reward score: 4.71875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.61%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3463|ppo_ep: 1|act_loss: 0.0088958740234375|cri_loss: 0.0046844482421875|unsuper_loss: 0.0
+average reward score: 5.21875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.57%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3464|ppo_ep: 1|act_loss: -0.01629638671875|cri_loss: -0.00799560546875|unsuper_loss: 0.0
+average reward score: 5.26171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.26%) |Training time=0.48s (22.18%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3465|ppo_ep: 1|act_loss: 0.009490966796875|cri_loss: 0.0052642822265625|unsuper_loss: 0.0
+average reward score: 5.73828125
+-------------------------------------------------------------------------------------
+|E2E latency=3.12s |Gather latency=0.00s (0.00%) |Generate time=1.61s (51.60%) |Training time=0.51s (16.36%) |Others=1.00 (32.04%)|CurSamplesPerSec=10.24 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3466|ppo_ep: 1|act_loss: 0.0145111083984375|cri_loss: 0.0078887939453125|unsuper_loss: 0.0
+average reward score: 5.1015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.73%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3467|ppo_ep: 1|act_loss: -0.015533447265625|cri_loss: -0.007503509521484375|unsuper_loss: 0.0
+average reward score: 4.9296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (22.07%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3468|ppo_ep: 1|act_loss: -0.014312744140625|cri_loss: -0.006694793701171875|unsuper_loss: 0.0
+average reward score: 4.9609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.48s (22.07%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
+[2023-04-14 10:55:29,606] [INFO] [logging.py:96:log_dist] [Rank 0] step=3470, skipped=48, lr=[6.213175071520385e-06, 6.213175071520385e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:55:29,625] [INFO] [timer.py:199:stop] epoch=0/micro_step=3470/global_step=3470, RunningAvgSamplesPerSec=105.77427848375432, CurrSamplesPerSec=98.66774093949864, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:55:29,717] [INFO] [logging.py:96:log_dist] [Rank 0] step=3470, skipped=53, lr=[3.2238693723433317e-06, 3.2238693723433317e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3469|ppo_ep: 1|act_loss: -0.01213836669921875|cri_loss: -0.005840301513671875|unsuper_loss: 0.0
+average reward score: 4.79296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.97%) |Training time=0.49s (22.50%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3470|ppo_ep: 1|act_loss: -0.0100555419921875|cri_loss: -0.004817962646484375|unsuper_loss: 0.0
+average reward score: 5.42578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.14%) |Training time=0.48s (22.30%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3471|ppo_ep: 1|act_loss: -0.0176544189453125|cri_loss: -0.007389068603515625|unsuper_loss: 0.0
+average reward score: 5.41015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.39%) |Training time=0.48s (22.05%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3472|ppo_ep: 1|act_loss: -0.0164794921875|cri_loss: -0.007843017578125|unsuper_loss: 0.0
+average reward score: 4.82421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.91%) |Training time=0.49s (22.55%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3473|ppo_ep: 1|act_loss: 0.03594970703125|cri_loss: 0.018707275390625|unsuper_loss: 0.0
+average reward score: 4.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.48s (22.05%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3474|ppo_ep: 1|act_loss: 0.01245880126953125|cri_loss: 0.0072021484375|unsuper_loss: 0.0
+average reward score: 5.10546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.38%) |Training time=0.48s (22.12%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3475|ppo_ep: 1|act_loss: 0.016845703125|cri_loss: 0.00959014892578125|unsuper_loss: 0.0
+average reward score: 5.5859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.35%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3476|ppo_ep: 1|act_loss: 0.026947021484375|cri_loss: 0.013885498046875|unsuper_loss: 0.0
+average reward score: 5.765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.04%) |Training time=0.49s (22.42%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3477|ppo_ep: 1|act_loss: -0.006870269775390625|cri_loss: -0.00278472900390625|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.52%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3478|ppo_ep: 1|act_loss: -0.011322021484375|cri_loss: -0.00521087646484375|unsuper_loss: 0.0
+average reward score: 5.41015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.55%) |Training time=0.47s (21.90%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45
+[2023-04-14 10:55:51,216] [INFO] [logging.py:96:log_dist] [Rank 0] step=3480, skipped=48, lr=[6.195374038196429e-06, 6.195374038196429e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:55:51,234] [INFO] [timer.py:199:stop] epoch=0/micro_step=3480/global_step=3480, RunningAvgSamplesPerSec=105.76188277060592, CurrSamplesPerSec=105.97196140666698, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:55:51,378] [INFO] [logging.py:96:log_dist] [Rank 0] step=3480, skipped=53, lr=[3.2146513525324026e-06, 3.2146513525324026e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3479|ppo_ep: 1|act_loss: -0.03985595703125|cri_loss: -0.0194244384765625|unsuper_loss: 0.0
+average reward score: 5.69921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.28%) |Training time=0.46s (21.00%) |Others=0.15 (6.72%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3480|ppo_ep: 1|act_loss: -0.005619049072265625|cri_loss: -0.0025501251220703125|unsuper_loss: 0.0
+average reward score: 4.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.56%) |Training time=0.46s (19.97%) |Others=0.13 (5.47%)|CurSamplesPerSec=13.75 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3481|ppo_ep: 1|act_loss: -0.0032958984375|cri_loss: -0.0014524459838867188|unsuper_loss: 0.0
+average reward score: 5.3125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.86%) |Training time=0.47s (21.33%) |Others=0.11 (4.80%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3482|ppo_ep: 1|act_loss: -0.0078582763671875|cri_loss: -0.0034637451171875|unsuper_loss: 0.0
+average reward score: 5.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.47s (21.95%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45
+epoch: 0|step: 3483|ppo_ep: 1|act_loss: 0.001445770263671875|cri_loss: 0.0008373260498046875|unsuper_loss: 0.0
+average reward score: 5.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.73%) |Training time=0.47s (21.71%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3484|ppo_ep: 1|act_loss: -0.03155517578125|cri_loss: -0.0147247314453125|unsuper_loss: 0.0
+average reward score: 5.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.37%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3485|ppo_ep: 1|act_loss: -0.00524139404296875|cri_loss: -0.002017974853515625|unsuper_loss: 0.0
+average reward score: 4.71875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.47s (22.08%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3486|ppo_ep: 1|act_loss: 0.011993408203125|cri_loss: 0.006175994873046875|unsuper_loss: 0.0
+average reward score: 5.54296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.14%) |Training time=0.48s (22.31%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3487|ppo_ep: 1|act_loss: 0.0201263427734375|cri_loss: 0.01052093505859375|unsuper_loss: 0.0
+average reward score: 5.1328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.58%) |Training time=0.47s (21.84%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3488|ppo_ep: 1|act_loss: 0.029449462890625|cri_loss: 0.01541900634765625|unsuper_loss: 0.0
+average reward score: 4.85546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.68%) |Training time=0.47s (21.74%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46
+[2023-04-14 10:56:12,986] [INFO] [logging.py:96:log_dist] [Rank 0] step=3490, skipped=48, lr=[6.177552692623376e-06, 6.177552692623376e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:56:13,005] [INFO] [timer.py:199:stop] epoch=0/micro_step=3490/global_step=3490, RunningAvgSamplesPerSec=105.75557515461509, CurrSamplesPerSec=105.23719384686797, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:56:13,097] [INFO] [logging.py:96:log_dist] [Rank 0] step=3490, skipped=53, lr=[3.2054227398644253e-06, 3.2054227398644253e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3489|ppo_ep: 1|act_loss: -0.0174713134765625|cri_loss: -0.00785064697265625|unsuper_loss: 0.0
+average reward score: 5.80859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.66%) |Training time=0.47s (21.77%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3490|ppo_ep: 1|act_loss: -0.029205322265625|cri_loss: -0.0137939453125|unsuper_loss: 0.0
+average reward score: 5.65234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.54%) |Training time=0.47s (21.89%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3491|ppo_ep: 1|act_loss: 0.018524169921875|cri_loss: 0.01007843017578125|unsuper_loss: 0.0
+average reward score: 4.5546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.33%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3492|ppo_ep: 1|act_loss: -0.0187835693359375|cri_loss: -0.00879669189453125|unsuper_loss: 0.0
+average reward score: 5.6171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.91%) |Training time=0.46s (21.26%) |Others=0.10 (4.83%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3493|ppo_ep: 1|act_loss: -0.01885986328125|cri_loss: -0.0089111328125|unsuper_loss: 0.0
+average reward score: 4.97265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.84%) |Training time=0.46s (21.59%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3494|ppo_ep: 1|act_loss: 0.0062103271484375|cri_loss: 0.0032501220703125|unsuper_loss: 0.0
+average reward score: 5.63671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.31%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3495|ppo_ep: 1|act_loss: 0.005767822265625|cri_loss: 0.0030612945556640625|unsuper_loss: 0.0
+average reward score: 4.77734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.47s |Gather latency=0.00s (0.00%) |Generate time=1.65s (66.68%) |Training time=0.59s (23.83%) |Others=0.23 (9.48%)|CurSamplesPerSec=12.94 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3496|ppo_ep: 1|act_loss: 0.029144287109375|cri_loss: 0.01495361328125|unsuper_loss: 0.0
+average reward score: 5.16015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.47s (21.57%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3497|ppo_ep: 1|act_loss: -0.01514434814453125|cri_loss: -0.0066375732421875|unsuper_loss: 0.0
+average reward score: 4.75390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.70%) |Training time=0.47s (21.75%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3498|ppo_ep: 1|act_loss: -0.020843505859375|cri_loss: -0.00994110107421875|unsuper_loss: 0.0
+average reward score: 4.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.48s (22.09%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+[2023-04-14 10:56:34,865] [INFO] [logging.py:96:log_dist] [Rank 0] step=3500, skipped=48, lr=[6.159711298956566e-06, 6.159711298956566e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:56:34,883] [INFO] [timer.py:199:stop] epoch=0/micro_step=3500/global_step=3500, RunningAvgSamplesPerSec=105.74144100983754, CurrSamplesPerSec=101.71870384138222, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:56:34,976] [INFO] [logging.py:96:log_dist] [Rank 0] step=3500, skipped=53, lr=[3.1961836711297046e-06, 3.1961836711297046e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3499|ppo_ep: 1|act_loss: 0.0005273818969726562|cri_loss: 0.0006098747253417969|unsuper_loss: 0.0
+average reward score: 4.453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (22.08%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3500|ppo_ep: 1|act_loss: 0.0246734619140625|cri_loss: 0.01354217529296875|unsuper_loss: 0.0
+average reward score: 5.3203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.30%) |Training time=0.48s (22.15%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3501|ppo_ep: 1|act_loss: -0.003116607666015625|cri_loss: -0.001251220703125|unsuper_loss: 0.0
+average reward score: 5.65625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.58%) |Training time=0.48s (22.14%) |Others=0.11 (5.28%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3502|ppo_ep: 1|act_loss: -0.0015268325805664062|cri_loss: -0.0006113052368164062|unsuper_loss: 0.0
+average reward score: 5.3046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.25%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3503|ppo_ep: 1|act_loss: -0.009063720703125|cri_loss: -0.0035953521728515625|unsuper_loss: 0.0
+average reward score: 5.234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.45%) |Training time=0.47s (22.00%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3504|ppo_ep: 1|act_loss: -0.01104736328125|cri_loss: -0.005390167236328125|unsuper_loss: 0.0
+average reward score: 5.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.08%) |Training time=0.48s (22.37%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3505|ppo_ep: 1|act_loss: 0.01168060302734375|cri_loss: 0.00601959228515625|unsuper_loss: 0.0
+average reward score: 4.6953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (22.02%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3506|ppo_ep: 1|act_loss: -0.01763916015625|cri_loss: -0.00856781005859375|unsuper_loss: 0.0
+average reward score: 6.0234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.94%) |Training time=0.49s (22.50%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3507|ppo_ep: 1|act_loss: -0.0230255126953125|cri_loss: -0.01116180419921875|unsuper_loss: 0.0
+average reward score: 4.8515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.44s |Gather latency=0.00s (0.00%) |Generate time=1.59s (65.11%) |Training time=0.47s (19.31%) |Others=0.38 (15.58%)|CurSamplesPerSec=13.13 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3508|ppo_ep: 1|act_loss: -0.006900787353515625|cri_loss: -0.002567291259765625|unsuper_loss: 0.0
+average reward score: 5.2734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.61%) |Others=0.10 (4.79%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+[2023-04-14 10:56:56,750] [INFO] [logging.py:96:log_dist] [Rank 0] step=3510, skipped=48, lr=[6.141850121648488e-06, 6.141850121648488e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:56:56,769] [INFO] [timer.py:199:stop] epoch=0/micro_step=3510/global_step=3510, RunningAvgSamplesPerSec=105.72984477658514, CurrSamplesPerSec=104.4290330386313, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:56:56,861] [INFO] [logging.py:96:log_dist] [Rank 0] step=3510, skipped=53, lr=[3.186934283273525e-06, 3.186934283273525e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3509|ppo_ep: 1|act_loss: -0.028656005859375|cri_loss: -0.01386260986328125|unsuper_loss: 0.0
+average reward score: 5.9375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.69%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3510|ppo_ep: 1|act_loss: 0.002899169921875|cri_loss: 0.0016813278198242188|unsuper_loss: 0.0
+average reward score: 5.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.68s (74.55%) |Training time=0.47s (21.08%) |Others=0.10 (4.37%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3511|ppo_ep: 1|act_loss: 0.035186767578125|cri_loss: 0.018035888671875|unsuper_loss: 0.0
+average reward score: 4.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.30%) |Training time=0.47s (20.47%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3512|ppo_ep: 1|act_loss: 0.01568603515625|cri_loss: 0.00904083251953125|unsuper_loss: 0.0
+average reward score: 4.7109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.48%) |Training time=0.47s (21.95%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3513|ppo_ep: 1|act_loss: 0.00518035888671875|cri_loss: 0.003559112548828125|unsuper_loss: 0.0
+average reward score: 5.59765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.42%) |Training time=0.47s (22.01%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
+[2023-04-14 10:57:07,867] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 3514|ppo_ep: 1|act_loss: 0.0416259765625|cri_loss: 0.0216064453125|unsuper_loss: 0.0
+average reward score: 5.515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.59s |Gather latency=0.00s (0.00%) |Generate time=1.58s (60.83%) |Training time=0.48s (18.46%) |Others=0.54 (20.70%)|CurSamplesPerSec=12.34 |AvgSamplesPerSec=14.46
+[2023-04-14 10:57:10,455] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 3515|ppo_ep: 1|act_loss: 0.0184783935546875|cri_loss: 0.0100860595703125|unsuper_loss: 0.0
+average reward score: 4.76953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.74%) |Training time=0.47s (22.17%) |Others=0.09 (4.09%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3516|ppo_ep: 1|act_loss: -0.000457763671875|cri_loss: 0.0002994537353515625|unsuper_loss: 0.0
+average reward score: 5.1171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.12%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3517|ppo_ep: 1|act_loss: 0.023529052734375|cri_loss: 0.01279449462890625|unsuper_loss: 0.0
+average reward score: 4.50390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.26%) |Training time=0.48s (22.12%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3518|ppo_ep: 1|act_loss: 0.00598907470703125|cri_loss: 0.003543853759765625|unsuper_loss: 0.0
+average reward score: 4.49609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.51%) |Training time=0.47s (21.92%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
+[2023-04-14 10:57:18,957] [INFO] [logging.py:96:log_dist] [Rank 0] step=3520, skipped=48, lr=[6.123969425444881e-06, 6.123969425444881e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:57:18,976] [INFO] [timer.py:199:stop] epoch=0/micro_step=3520/global_step=3520, RunningAvgSamplesPerSec=105.7193975041583, CurrSamplesPerSec=99.69532912568643, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:57:19,068] [INFO] [logging.py:96:log_dist] [Rank 0] step=3520, skipped=55, lr=[3.1795274353460633e-06, 3.1795274353460633e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3519|ppo_ep: 1|act_loss: -0.018646240234375|cri_loss: -0.00894927978515625|unsuper_loss: 0.0
+average reward score: 4.77734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.48s (22.39%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3520|ppo_ep: 1|act_loss: -0.04638671875|cri_loss: -0.0218658447265625|unsuper_loss: 0.0
+average reward score: 4.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.39%) |Training time=0.47s (22.05%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3521|ppo_ep: 1|act_loss: -0.018463134765625|cri_loss: -0.00884246826171875|unsuper_loss: 0.0
+average reward score: 5.203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.34%) |Training time=0.48s (22.10%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3522|ppo_ep: 1|act_loss: -0.014129638671875|cri_loss: -0.0068817138671875|unsuper_loss: 0.0
+average reward score: 5.65625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.49%) |Training time=0.47s (21.95%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3523|ppo_ep: 1|act_loss: -0.01030731201171875|cri_loss: -0.00466156005859375|unsuper_loss: 0.0
+average reward score: 5.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.37%) |Training time=0.48s (22.09%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3524|ppo_ep: 1|act_loss: 0.0089874267578125|cri_loss: 0.005382537841796875|unsuper_loss: 0.0
+average reward score: 5.046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.99%) |Training time=0.49s (22.46%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3525|ppo_ep: 1|act_loss: 0.0082855224609375|cri_loss: 0.004657745361328125|unsuper_loss: 0.0
+average reward score: 5.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.74s (75.04%) |Training time=0.48s (20.71%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3526|ppo_ep: 1|act_loss: 0.04345703125|cri_loss: 0.02294921875|unsuper_loss: 0.0
+average reward score: 4.01953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.48s (22.01%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3527|ppo_ep: 1|act_loss: 0.0106353759765625|cri_loss: 0.00634002685546875|unsuper_loss: 0.0
+average reward score: 5.14453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.48s (22.30%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3528|ppo_ep: 1|act_loss: 0.0018854141235351562|cri_loss: 0.0011796951293945312|unsuper_loss: 0.0
+average reward score: 5.015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.10%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+[2023-04-14 10:57:40,713] [INFO] [logging.py:96:log_dist] [Rank 0] step=3530, skipped=48, lr=[6.106069475380793e-06, 6.106069475380793e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:57:40,731] [INFO] [timer.py:199:stop] epoch=0/micro_step=3530/global_step=3530, RunningAvgSamplesPerSec=105.70634007207184, CurrSamplesPerSec=100.36808836011082, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:57:40,823] [INFO] [logging.py:96:log_dist] [Rank 0] step=3530, skipped=55, lr=[3.1702598186603152e-06, 3.1702598186603152e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3529|ppo_ep: 1|act_loss: 0.022003173828125|cri_loss: 0.011566162109375|unsuper_loss: 0.0
+average reward score: 6.10546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.45%) |Training time=0.48s (22.08%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3530|ppo_ep: 1|act_loss: -0.027252197265625|cri_loss: -0.01322174072265625|unsuper_loss: 0.0
+average reward score: 5.08984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.25%) |Training time=0.48s (22.20%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3531|ppo_ep: 1|act_loss: -0.0112152099609375|cri_loss: -0.00482940673828125|unsuper_loss: 0.0
+average reward score: 5.36328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.54s |Gather latency=0.00s (0.00%) |Generate time=1.58s (62.12%) |Training time=0.49s (19.26%) |Others=0.47 (18.62%)|CurSamplesPerSec=12.59 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3532|ppo_ep: 1|act_loss: -0.002079010009765625|cri_loss: -0.0007619857788085938|unsuper_loss: 0.0
+average reward score: 5.5546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.80%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3533|ppo_ep: 1|act_loss: -0.0006546974182128906|cri_loss: -0.00023233890533447266|unsuper_loss: 0.0
+average reward score: 5.0
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.57%) |Training time=0.47s (21.87%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3534|ppo_ep: 1|act_loss: 0.02215576171875|cri_loss: 0.011871337890625|unsuper_loss: 0.0
+average reward score: 5.40625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.86%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3535|ppo_ep: 1|act_loss: -0.00395965576171875|cri_loss: -0.0013418197631835938|unsuper_loss: 0.0
+average reward score: 4.8125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.76%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3536|ppo_ep: 1|act_loss: -0.0135040283203125|cri_loss: -0.00617218017578125|unsuper_loss: 0.0
+average reward score: 5.51953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.73%) |Training time=0.47s (21.69%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3537|ppo_ep: 1|act_loss: 0.0075225830078125|cri_loss: 0.004032135009765625|unsuper_loss: 0.0
+average reward score: 6.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.81s |Gather latency=0.00s (0.00%) |Generate time=1.59s (56.84%) |Training time=0.47s (16.76%) |Others=0.74 (26.40%)|CurSamplesPerSec=11.41 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3538|ppo_ep: 1|act_loss: -0.02703857421875|cri_loss: -0.012420654296875|unsuper_loss: 0.0
+average reward score: 5.328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.61%) |Training time=0.48s (21.90%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.46
+[2023-04-14 10:58:03,434] [INFO] [logging.py:96:log_dist] [Rank 0] step=3540, skipped=48, lr=[6.0881505367766705e-06, 6.0881505367766705e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:58:03,452] [INFO] [timer.py:199:stop] epoch=0/micro_step=3540/global_step=3540, RunningAvgSamplesPerSec=105.69823743299148, CurrSamplesPerSec=104.70218269755831, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:58:03,544] [INFO] [logging.py:96:log_dist] [Rank 0] step=3540, skipped=55, lr=[3.1609822671071404e-06, 3.1609822671071404e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3539|ppo_ep: 1|act_loss: -0.00218963623046875|cri_loss: -0.0005474090576171875|unsuper_loss: 0.0
+average reward score: 5.3125
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.91%) |Training time=0.47s (20.75%) |Others=0.10 (4.33%)|CurSamplesPerSec=14.19 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3540|ppo_ep: 1|act_loss: 0.0013484954833984375|cri_loss: 0.00098419189453125|unsuper_loss: 0.0
+average reward score: 5.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.53%) |Training time=0.49s (21.99%) |Others=0.14 (6.48%)|CurSamplesPerSec=14.44 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3541|ppo_ep: 1|act_loss: 0.004383087158203125|cri_loss: 0.00302886962890625|unsuper_loss: 0.0
+average reward score: 4.9453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.50%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3542|ppo_ep: 1|act_loss: 0.0372314453125|cri_loss: 0.0189666748046875|unsuper_loss: 0.0
+average reward score: 6.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.70%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3543|ppo_ep: 1|act_loss: -0.0019016265869140625|cri_loss: -0.0007863044738769531|unsuper_loss: 0.0
+average reward score: 4.609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.83%) |Training time=0.43s (20.51%) |Others=0.10 (4.65%)|CurSamplesPerSec=15.12 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3544|ppo_ep: 1|act_loss: 0.00246429443359375|cri_loss: 0.00146484375|unsuper_loss: 0.0
+average reward score: 5.890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.97%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3545|ppo_ep: 1|act_loss: 0.002471923828125|cri_loss: 0.0014495849609375|unsuper_loss: 0.0
+average reward score: 4.5234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.22%) |Training time=0.48s (22.23%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3546|ppo_ep: 1|act_loss: -1.728534698486328e-05|cri_loss: 0.00010848045349121094|unsuper_loss: 0.0
+average reward score: 5.078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.18%) |Training time=0.48s (22.27%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3547|ppo_ep: 1|act_loss: -0.004550933837890625|cri_loss: -0.002094268798828125|unsuper_loss: 0.0
+average reward score: 4.3203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.00%) |Training time=0.49s (22.46%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3548|ppo_ep: 1|act_loss: -0.0041656494140625|cri_loss: -0.001705169677734375|unsuper_loss: 0.0
+average reward score: 5.09765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.18%) |Training time=0.48s (22.28%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
+[2023-04-14 10:58:25,043] [INFO] [logging.py:96:log_dist] [Rank 0] step=3550, skipped=48, lr=[6.070212875234407e-06, 6.070212875234407e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:58:25,628] [INFO] [timer.py:199:stop] epoch=0/micro_step=3550/global_step=3550, RunningAvgSamplesPerSec=105.63486237523144, CurrSamplesPerSec=36.294179826075954, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:58:25,721] [INFO] [logging.py:96:log_dist] [Rank 0] step=3550, skipped=55, lr=[3.151694918202235e-06, 3.151694918202235e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3549|ppo_ep: 1|act_loss: -0.04327392578125|cri_loss: -0.0212249755859375|unsuper_loss: 0.0
+average reward score: 5.03515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.72s |Gather latency=0.00s (0.00%) |Generate time=1.58s (58.02%) |Training time=1.04s (38.39%) |Others=0.10 (3.59%)|CurSamplesPerSec=11.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3550|ppo_ep: 1|act_loss: -0.011199951171875|cri_loss: -0.005176544189453125|unsuper_loss: 0.0
+average reward score: 4.8984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.21%) |Training time=0.48s (22.23%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3551|ppo_ep: 1|act_loss: -0.00017547607421875|cri_loss: 0.0007619857788085938|unsuper_loss: 0.0
+average reward score: 4.36328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.06%) |Training time=0.48s (22.37%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3552|ppo_ep: 1|act_loss: 0.0054168701171875|cri_loss: 0.00298309326171875|unsuper_loss: 0.0
+average reward score: 4.94140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.11%) |Training time=0.46s (21.29%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3553|ppo_ep: 1|act_loss: -0.0256500244140625|cri_loss: -0.0125885009765625|unsuper_loss: 0.0
+average reward score: 4.7578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.74%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3554|ppo_ep: 1|act_loss: 0.0131378173828125|cri_loss: 0.007228851318359375|unsuper_loss: 0.0
+average reward score: 5.15625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.68%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3555|ppo_ep: 1|act_loss: 0.0072784423828125|cri_loss: 0.0038852691650390625|unsuper_loss: 0.0
+average reward score: 4.77734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.74s (75.03%) |Training time=0.48s (20.73%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3556|ppo_ep: 1|act_loss: 0.004425048828125|cri_loss: 0.0025787353515625|unsuper_loss: 0.0
+average reward score: 5.47265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.67%) |Training time=0.47s (21.82%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3557|ppo_ep: 1|act_loss: 0.022064208984375|cri_loss: 0.0125885009765625|unsuper_loss: 0.0
+average reward score: 5.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.55%) |Training time=0.47s (21.90%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3558|ppo_ep: 1|act_loss: -0.018798828125|cri_loss: -0.0089874267578125|unsuper_loss: 0.0
+average reward score: 4.953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.74%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+[2023-04-14 10:58:47,342] [INFO] [logging.py:96:log_dist] [Rank 0] step=3560, skipped=48, lr=[6.0522567566334204e-06, 6.0522567566334204e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:58:47,360] [INFO] [timer.py:199:stop] epoch=0/micro_step=3560/global_step=3560, RunningAvgSamplesPerSec=105.62931678444558, CurrSamplesPerSec=108.95321962132799, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:58:47,453] [INFO] [logging.py:96:log_dist] [Rank 0] step=3560, skipped=55, lr=[3.1423979096065134e-06, 3.1423979096065134e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3559|ppo_ep: 1|act_loss: -0.027008056640625|cri_loss: -0.0125732421875|unsuper_loss: 0.0
+average reward score: 4.98046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.22%) |Training time=0.46s (21.23%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3560|ppo_ep: 1|act_loss: 0.0114288330078125|cri_loss: 0.006137847900390625|unsuper_loss: 0.0
+average reward score: 5.59765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.59s (67.66%) |Training time=0.46s (19.78%) |Others=0.30 (12.57%)|CurSamplesPerSec=13.61 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3561|ppo_ep: 1|act_loss: 0.0050811767578125|cri_loss: 0.002773284912109375|unsuper_loss: 0.0
+average reward score: 4.75
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.95%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3562|ppo_ep: 1|act_loss: -0.010101318359375|cri_loss: -0.00379180908203125|unsuper_loss: 0.0
+average reward score: 4.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.76%) |Training time=0.47s (21.66%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3563|ppo_ep: 1|act_loss: 0.033172607421875|cri_loss: 0.017578125|unsuper_loss: 0.0
+average reward score: 5.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.11%) |Training time=0.46s (21.32%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3564|ppo_ep: 1|act_loss: 0.005565643310546875|cri_loss: 0.003925323486328125|unsuper_loss: 0.0
+average reward score: 5.7421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.84%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3565|ppo_ep: 1|act_loss: 0.01776123046875|cri_loss: 0.0090789794921875|unsuper_loss: 0.0
+average reward score: 4.484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.68%) |Training time=0.47s (21.76%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3566|ppo_ep: 1|act_loss: 0.0094451904296875|cri_loss: 0.00511932373046875|unsuper_loss: 0.0
+average reward score: 5.0
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.33%) |Training time=0.45s (21.06%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3567|ppo_ep: 1|act_loss: -0.00640869140625|cri_loss: -0.00267791748046875|unsuper_loss: 0.0
+average reward score: 4.79296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.07%) |Training time=0.46s (21.07%) |Others=0.11 (4.85%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3568|ppo_ep: 1|act_loss: -0.02154541015625|cri_loss: -0.01007843017578125|unsuper_loss: 0.0
+average reward score: 4.8984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.62s (72.70%) |Training time=0.51s (22.87%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.40 |AvgSamplesPerSec=14.46
+[2023-04-14 10:59:09,144] [INFO] [logging.py:96:log_dist] [Rank 0] step=3570, skipped=48, lr=[6.0342824471267055e-06, 6.0342824471267055e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:59:09,162] [INFO] [timer.py:199:stop] epoch=0/micro_step=3570/global_step=3570, RunningAvgSamplesPerSec=105.63078928049823, CurrSamplesPerSec=105.9774002645137, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:59:09,255] [INFO] [logging.py:96:log_dist] [Rank 0] step=3570, skipped=55, lr=[3.1330913791240707e-06, 3.1330913791240707e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3569|ppo_ep: 1|act_loss: -0.0247802734375|cri_loss: -0.01129150390625|unsuper_loss: 0.0
+average reward score: 5.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.58%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3570|ppo_ep: 1|act_loss: -0.0223846435546875|cri_loss: -0.01056671142578125|unsuper_loss: 0.0
+average reward score: 5.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.66%) |Training time=0.46s (19.63%) |Others=0.13 (5.72%)|CurSamplesPerSec=13.60 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3571|ppo_ep: 1|act_loss: 0.016448974609375|cri_loss: 0.00888824462890625|unsuper_loss: 0.0
+average reward score: 4.62890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.47s (21.94%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3572|ppo_ep: 1|act_loss: -0.0006151199340820312|cri_loss: 0.00021076202392578125|unsuper_loss: 0.0
+average reward score: 5.0234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.78%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3573|ppo_ep: 1|act_loss: -0.006267547607421875|cri_loss: -0.0024242401123046875|unsuper_loss: 0.0
+average reward score: 5.28515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.29%) |Training time=0.45s (21.14%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3574|ppo_ep: 1|act_loss: -0.027252197265625|cri_loss: -0.01296234130859375|unsuper_loss: 0.0
+average reward score: 5.60546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.82%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3575|ppo_ep: 1|act_loss: -0.00567626953125|cri_loss: -0.002521514892578125|unsuper_loss: 0.0
+average reward score: 5.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.72%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3576|ppo_ep: 1|act_loss: -0.017608642578125|cri_loss: -0.0083770751953125|unsuper_loss: 0.0
+average reward score: 4.99609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.59s (68.85%) |Training time=0.46s (20.14%) |Others=0.25 (11.01%)|CurSamplesPerSec=13.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3577|ppo_ep: 1|act_loss: 0.00687408447265625|cri_loss: 0.0038509368896484375|unsuper_loss: 0.0
+average reward score: 4.546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.59%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3578|ppo_ep: 1|act_loss: -0.0240020751953125|cri_loss: -0.01154327392578125|unsuper_loss: 0.0
+average reward score: 5.15234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.70%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
+[2023-04-14 10:59:31,042] [INFO] [logging.py:96:log_dist] [Rank 0] step=3580, skipped=48, lr=[6.016290213136888e-06, 6.016290213136888e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:59:31,060] [INFO] [timer.py:199:stop] epoch=0/micro_step=3580/global_step=3580, RunningAvgSamplesPerSec=105.62996466221267, CurrSamplesPerSec=105.79012340813594, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:59:31,152] [INFO] [logging.py:96:log_dist] [Rank 0] step=3580, skipped=55, lr=[3.1237754647001396e-06, 3.1237754647001396e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3579|ppo_ep: 1|act_loss: 0.00429534912109375|cri_loss: 0.00231170654296875|unsuper_loss: 0.0
+average reward score: 5.15234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.60%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3580|ppo_ep: 1|act_loss: 0.0301055908203125|cri_loss: 0.0154571533203125|unsuper_loss: 0.0
+average reward score: 4.7109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.80%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3581|ppo_ep: 1|act_loss: -0.0161895751953125|cri_loss: -0.00774383544921875|unsuper_loss: 0.0
+average reward score: 5.6484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.51%) |Training time=0.45s (20.94%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3582|ppo_ep: 1|act_loss: -0.019378662109375|cri_loss: -0.0086212158203125|unsuper_loss: 0.0
+average reward score: 5.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.01%) |Training time=0.44s (20.43%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3583|ppo_ep: 1|act_loss: 0.026580810546875|cri_loss: 0.0140228271484375|unsuper_loss: 0.0
+average reward score: 5.296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.67%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3584|ppo_ep: 1|act_loss: 0.031585693359375|cri_loss: 0.01611328125|unsuper_loss: 0.0
+average reward score: 5.546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.62s (71.67%) |Training time=0.47s (20.92%) |Others=0.17 (7.41%)|CurSamplesPerSec=14.19 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3585|ppo_ep: 1|act_loss: 0.004833221435546875|cri_loss: 0.0026397705078125|unsuper_loss: 0.0
+average reward score: 5.5546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.97%) |Training time=0.46s (21.47%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3586|ppo_ep: 1|act_loss: 0.031219482421875|cri_loss: 0.016754150390625|unsuper_loss: 0.0
+average reward score: 5.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.86%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3587|ppo_ep: 1|act_loss: 0.01354217529296875|cri_loss: 0.006961822509765625|unsuper_loss: 0.0
+average reward score: 5.35546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.44s |Gather latency=0.00s (0.00%) |Generate time=1.59s (65.22%) |Training time=0.47s (19.17%) |Others=0.38 (15.61%)|CurSamplesPerSec=13.13 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3588|ppo_ep: 1|act_loss: 0.003223419189453125|cri_loss: 0.002239227294921875|unsuper_loss: 0.0
+average reward score: 5.09765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.67%) |Training time=0.47s (21.78%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+[2023-04-14 10:59:52,984] [INFO] [logging.py:96:log_dist] [Rank 0] step=3590, skipped=48, lr=[5.998280321352281e-06, 5.998280321352281e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 10:59:53,003] [INFO] [timer.py:199:stop] epoch=0/micro_step=3590/global_step=3590, RunningAvgSamplesPerSec=105.63170114714389, CurrSamplesPerSec=103.337966759135, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 10:59:53,095] [INFO] [logging.py:96:log_dist] [Rank 0] step=3590, skipped=55, lr=[3.1144503044190456e-06, 3.1144503044190456e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3589|ppo_ep: 1|act_loss: -0.026947021484375|cri_loss: -0.0128936767578125|unsuper_loss: 0.0
+average reward score: 5.046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.91%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3590|ppo_ep: 1|act_loss: -0.0281524658203125|cri_loss: -0.0134124755859375|unsuper_loss: 0.0
+average reward score: 5.1953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.46s (21.49%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3591|ppo_ep: 1|act_loss: -0.03668212890625|cri_loss: -0.017578125|unsuper_loss: 0.0
+average reward score: 5.39453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.88%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3592|ppo_ep: 1|act_loss: -0.001079559326171875|cri_loss: 0.00086212158203125|unsuper_loss: 0.0
+average reward score: 4.828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.66%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3593|ppo_ep: 1|act_loss: 0.0159759521484375|cri_loss: 0.00830078125|unsuper_loss: 0.0
+average reward score: 5.3828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.88%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3594|ppo_ep: 1|act_loss: 0.0513916015625|cri_loss: 0.027069091796875|unsuper_loss: 0.0
+average reward score: 4.35546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.47s (21.62%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3595|ppo_ep: 1|act_loss: 0.01678466796875|cri_loss: 0.00872039794921875|unsuper_loss: 0.0
+average reward score: 5.67578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.86%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3596|ppo_ep: 1|act_loss: 0.02386474609375|cri_loss: 0.012664794921875|unsuper_loss: 0.0
+average reward score: 3.826171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.23%) |Training time=0.46s (21.20%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3597|ppo_ep: 1|act_loss: 0.0190582275390625|cri_loss: 0.00998687744140625|unsuper_loss: 0.0
+average reward score: 4.86328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.16%) |Training time=0.45s (20.35%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3598|ppo_ep: 1|act_loss: -0.018890380859375|cri_loss: -0.0084686279296875|unsuper_loss: 0.0
+average reward score: 5.3125
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.69s (75.01%) |Training time=0.46s (20.62%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.46
+[2023-04-14 11:00:14,694] [INFO] [logging.py:96:log_dist] [Rank 0] step=3600, skipped=48, lr=[5.980253038722927e-06, 5.980253038722927e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:00:15,110] [INFO] [timer.py:199:stop] epoch=0/micro_step=3600/global_step=3600, RunningAvgSamplesPerSec=105.5920159904189, CurrSamplesPerSec=44.41792340872793, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:00:15,203] [INFO] [logging.py:96:log_dist] [Rank 0] step=3600, skipped=55, lr=[3.1051160365021587e-06, 3.1051160365021587e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3599|ppo_ep: 1|act_loss: -0.0035114288330078125|cri_loss: -0.00138092041015625|unsuper_loss: 0.0
+average reward score: 4.9375
+-------------------------------------------------------------------------------------
+|E2E latency=2.56s |Gather latency=0.00s (0.00%) |Generate time=1.58s (61.70%) |Training time=0.88s (34.49%) |Others=0.10 (3.81%)|CurSamplesPerSec=12.49 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3600|ppo_ep: 1|act_loss: 0.02923583984375|cri_loss: 0.015167236328125|unsuper_loss: 0.0
+average reward score: 5.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.54%) |Training time=0.47s (20.24%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3601|ppo_ep: 1|act_loss: -0.040283203125|cri_loss: -0.0187835693359375|unsuper_loss: 0.0
+average reward score: 4.88671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.02%) |Training time=0.49s (22.49%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3602|ppo_ep: 1|act_loss: -0.0059967041015625|cri_loss: -0.0027408599853515625|unsuper_loss: 0.0
+average reward score: 5.328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.73%) |Training time=0.47s (21.72%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3603|ppo_ep: 1|act_loss: -0.01409149169921875|cri_loss: -0.00624847412109375|unsuper_loss: 0.0
+average reward score: 4.75390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.84%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3604|ppo_ep: 1|act_loss: -0.012481689453125|cri_loss: -0.00504302978515625|unsuper_loss: 0.0
+average reward score: 5.59375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.82%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3605|ppo_ep: 1|act_loss: 0.0126190185546875|cri_loss: 0.006801605224609375|unsuper_loss: 0.0
+average reward score: 4.953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.74%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3606|ppo_ep: 1|act_loss: -0.0205230712890625|cri_loss: -0.00936126708984375|unsuper_loss: 0.0
+average reward score: 5.51171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.81%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3607|ppo_ep: 1|act_loss: 0.03607177734375|cri_loss: 0.0196533203125|unsuper_loss: 0.0
+average reward score: 4.8828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.06%) |Training time=0.46s (21.39%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3608|ppo_ep: 1|act_loss: 0.00792694091796875|cri_loss: 0.004543304443359375|unsuper_loss: 0.0
+average reward score: 5.38671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.70%) |Training time=0.47s (21.75%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+[2023-04-14 11:00:36,841] [INFO] [logging.py:96:log_dist] [Rank 0] step=3610, skipped=48, lr=[5.962208632456643e-06, 5.962208632456643e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:00:37,031] [INFO] [timer.py:199:stop] epoch=0/micro_step=3610/global_step=3610, RunningAvgSamplesPerSec=105.57008798024897, CurrSamplesPerSec=66.34913324972403, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:00:37,124] [INFO] [logging.py:96:log_dist] [Rank 0] step=3610, skipped=55, lr=[3.095772799305849e-06, 3.095772799305849e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3609|ppo_ep: 1|act_loss: 0.00445556640625|cri_loss: 0.00432586669921875|unsuper_loss: 0.0
+average reward score: 4.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.59s (68.12%) |Training time=0.64s (27.68%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3610|ppo_ep: 1|act_loss: 0.0025844573974609375|cri_loss: 0.0023670196533203125|unsuper_loss: 0.0
+average reward score: 5.9453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.84%) |Training time=0.47s (21.60%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3611|ppo_ep: 1|act_loss: 0.001483917236328125|cri_loss: 0.0009298324584960938|unsuper_loss: 0.0
+average reward score: 4.76953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.36%) |Training time=0.46s (21.10%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3612|ppo_ep: 1|act_loss: -0.013458251953125|cri_loss: -0.00569915771484375|unsuper_loss: 0.0
+average reward score: 4.8828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.22%) |Training time=0.46s (21.23%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3613|ppo_ep: 1|act_loss: 0.023895263671875|cri_loss: 0.0121612548828125|unsuper_loss: 0.0
+average reward score: 5.64453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.02%) |Training time=0.49s (22.46%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3614|ppo_ep: 1|act_loss: -0.01114654541015625|cri_loss: -0.00380706787109375|unsuper_loss: 0.0
+average reward score: 5.38671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.81s (76.90%) |Training time=0.45s (18.92%) |Others=0.10 (4.18%)|CurSamplesPerSec=13.58 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3615|ppo_ep: 1|act_loss: -0.00399017333984375|cri_loss: -0.0018091201782226562|unsuper_loss: 0.0
+average reward score: 4.8828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.54%) |Training time=0.41s (18.88%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+[2023-04-14 11:00:52,492] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 3616|ppo_ep: 1|act_loss: 0.008026123046875|cri_loss: 0.00434112548828125|unsuper_loss: 0.0
+average reward score: 4.6796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.71%) |Training time=0.46s (21.22%) |Others=0.09 (4.07%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46
+[2023-04-14 11:00:54,667] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 3617|ppo_ep: 1|act_loss: 0.002307891845703125|cri_loss: 0.00164794921875|unsuper_loss: 0.0
+average reward score: 5.015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.91%) |Training time=0.46s (21.00%) |Others=0.09 (4.09%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3618|ppo_ep: 1|act_loss: 0.0006394386291503906|cri_loss: 0.0004863739013671875|unsuper_loss: 0.0
+average reward score: 4.8125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.28%) |Training time=0.46s (21.21%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.46
+[2023-04-14 11:00:58,913] [INFO] [logging.py:96:log_dist] [Rank 0] step=3620, skipped=48, lr=[5.944147370015059e-06, 5.944147370015059e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:00:58,931] [INFO] [timer.py:199:stop] epoch=0/micro_step=3620/global_step=3620, RunningAvgSamplesPerSec=105.58135629427218, CurrSamplesPerSec=118.61425940393161, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:00:59,024] [INFO] [logging.py:96:log_dist] [Rank 0] step=3620, skipped=57, lr=[3.0882918447280204e-06, 3.0882918447280204e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3619|ppo_ep: 1|act_loss: -0.03326416015625|cri_loss: -0.01611328125|unsuper_loss: 0.0
+average reward score: 5.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.46%) |Training time=0.43s (19.96%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3620|ppo_ep: 1|act_loss: -0.02386474609375|cri_loss: -0.01171112060546875|unsuper_loss: 0.0
+average reward score: 5.25
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.70%) |Training time=0.43s (19.74%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3621|ppo_ep: 1|act_loss: 0.01934814453125|cri_loss: 0.01015472412109375|unsuper_loss: 0.0
+average reward score: 5.0
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.21%) |Training time=0.46s (21.23%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3622|ppo_ep: 1|act_loss: -0.01226806640625|cri_loss: -0.00591278076171875|unsuper_loss: 0.0
+average reward score: 4.8046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.31%) |Training time=0.46s (21.14%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3623|ppo_ep: 1|act_loss: -0.01050567626953125|cri_loss: -0.0050811767578125|unsuper_loss: 0.0
+average reward score: 5.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.17%) |Training time=0.46s (21.29%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3624|ppo_ep: 1|act_loss: -0.0005555152893066406|cri_loss: 0.0001010894775390625|unsuper_loss: 0.0
+average reward score: 5.59375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.24%) |Training time=0.46s (21.21%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3625|ppo_ep: 1|act_loss: 0.0088043212890625|cri_loss: 0.0050048828125|unsuper_loss: 0.0
+average reward score: 4.6640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.30%) |Training time=0.46s (21.14%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3626|ppo_ep: 1|act_loss: -0.041839599609375|cri_loss: -0.020050048828125|unsuper_loss: 0.0
+average reward score: 5.16015625
+-------------------------------------------------------------------------------------
+|E2E latency=3.37s |Gather latency=0.00s (0.00%) |Generate time=1.62s (48.24%) |Training time=0.47s (13.97%) |Others=1.27 (37.79%)|CurSamplesPerSec=9.50 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3627|ppo_ep: 1|act_loss: -0.0139312744140625|cri_loss: -0.0064697265625|unsuper_loss: 0.0
+average reward score: 4.82421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.17%) |Training time=0.46s (21.16%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3628|ppo_ep: 1|act_loss: -0.03765869140625|cri_loss: -0.0177001953125|unsuper_loss: 0.0
+average reward score: 5.5546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.62%) |Training time=0.44s (20.77%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46
+[2023-04-14 11:01:21,955] [INFO] [logging.py:96:log_dist] [Rank 0] step=3630, skipped=48, lr=[5.926069519109657e-06, 5.926069519109657e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:01:21,973] [INFO] [timer.py:199:stop] epoch=0/micro_step=3630/global_step=3630, RunningAvgSamplesPerSec=105.5895945710597, CurrSamplesPerSec=105.85695582357191, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:01:22,071] [INFO] [logging.py:96:log_dist] [Rank 0] step=3630, skipped=57, lr=[3.078932811909849e-06, 3.078932811909849e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3629|ppo_ep: 1|act_loss: -0.034912109375|cri_loss: -0.0164337158203125|unsuper_loss: 0.0
+average reward score: 5.29296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.60%) |Training time=0.47s (19.87%) |Others=0.11 (4.53%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3630|ppo_ep: 1|act_loss: -0.0015287399291992188|cri_loss: 3.528594970703125e-05|unsuper_loss: 0.0
+average reward score: 4.5
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.70%) |Training time=0.44s (20.62%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3631|ppo_ep: 1|act_loss: 0.0217132568359375|cri_loss: 0.01214599609375|unsuper_loss: 0.0
+average reward score: 4.7421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.87%) |Training time=0.44s (20.54%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3632|ppo_ep: 1|act_loss: -0.015869140625|cri_loss: -0.00717926025390625|unsuper_loss: 0.0
+average reward score: 4.9453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.23%) |Training time=0.45s (21.11%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3633|ppo_ep: 1|act_loss: 0.01430511474609375|cri_loss: 0.0074462890625|unsuper_loss: 0.0
+average reward score: 5.79296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.46s (21.47%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3634|ppo_ep: 1|act_loss: -0.0142669677734375|cri_loss: -0.006793975830078125|unsuper_loss: 0.0
+average reward score: 3.82421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.46s (21.47%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3635|ppo_ep: 1|act_loss: 0.01023101806640625|cri_loss: 0.00545501708984375|unsuper_loss: 0.0
+average reward score: 5.4296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.44%) |Training time=0.45s (20.97%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3636|ppo_ep: 1|act_loss: 0.01654052734375|cri_loss: 0.0084381103515625|unsuper_loss: 0.0
+average reward score: 4.94921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.12%) |Training time=0.46s (21.31%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3637|ppo_ep: 1|act_loss: -0.031585693359375|cri_loss: -0.01507568359375|unsuper_loss: 0.0
+average reward score: 6.13671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.91%) |Training time=0.46s (21.51%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3638|ppo_ep: 1|act_loss: 0.0236053466796875|cri_loss: 0.01245880126953125|unsuper_loss: 0.0
+average reward score: 5.23828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.46%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+[2023-04-14 11:01:43,424] [INFO] [logging.py:96:log_dist] [Rank 0] step=3640, skipped=48, lr=[5.907975347697794e-06, 5.907975347697794e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:01:43,443] [INFO] [timer.py:199:stop] epoch=0/micro_step=3640/global_step=3640, RunningAvgSamplesPerSec=105.59923223574494, CurrSamplesPerSec=105.7564471787954, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:01:43,535] [INFO] [logging.py:96:log_dist] [Rank 0] step=3640, skipped=57, lr=[3.069565197910806e-06, 3.069565197910806e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3639|ppo_ep: 1|act_loss: 0.0116119384765625|cri_loss: 0.00627899169921875|unsuper_loss: 0.0
+average reward score: 5.40625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.77%) |Training time=0.47s (21.68%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3640|ppo_ep: 1|act_loss: -0.004119873046875|cri_loss: -0.0014057159423828125|unsuper_loss: 0.0
+average reward score: 4.40625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.88%) |Training time=0.46s (21.53%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3641|ppo_ep: 1|act_loss: 0.00606536865234375|cri_loss: 0.004093170166015625|unsuper_loss: 0.0
+average reward score: 4.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.46s (21.60%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3642|ppo_ep: 1|act_loss: 0.00603485107421875|cri_loss: 0.00371551513671875|unsuper_loss: 0.0
+average reward score: 4.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.57%) |Training time=0.45s (20.88%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3643|ppo_ep: 1|act_loss: 0.00029468536376953125|cri_loss: 0.0005202293395996094|unsuper_loss: 0.0
+average reward score: 4.921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.92%) |Training time=0.46s (21.50%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3644|ppo_ep: 1|act_loss: -0.00589752197265625|cri_loss: -0.00200653076171875|unsuper_loss: 0.0
+average reward score: 4.6640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.89%) |Training time=0.46s (19.85%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3645|ppo_ep: 1|act_loss: 0.0184326171875|cri_loss: 0.009765625|unsuper_loss: 0.0
+average reward score: 5.7265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.72%) |Training time=0.44s (20.70%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3646|ppo_ep: 1|act_loss: 0.01568603515625|cri_loss: 0.00806427001953125|unsuper_loss: 0.0
+average reward score: 4.5234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.30%) |Training time=0.45s (21.13%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3647|ppo_ep: 1|act_loss: -0.00588226318359375|cri_loss: -0.0027637481689453125|unsuper_loss: 0.0
+average reward score: 5.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.49s |Gather latency=0.00s (0.00%) |Generate time=1.59s (63.95%) |Training time=0.46s (18.28%) |Others=0.44 (17.77%)|CurSamplesPerSec=12.85 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3648|ppo_ep: 1|act_loss: -0.0154571533203125|cri_loss: -0.00659942626953125|unsuper_loss: 0.0
+average reward score: 5.08203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.12%) |Training time=0.46s (21.30%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
+[2023-04-14 11:02:05,410] [INFO] [logging.py:96:log_dist] [Rank 0] step=3650, skipped=48, lr=[5.889865123978745e-06, 5.889865123978745e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:02:05,429] [INFO] [timer.py:199:stop] epoch=0/micro_step=3650/global_step=3650, RunningAvgSamplesPerSec=105.60741290301202, CurrSamplesPerSec=106.42917645640826, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:02:05,521] [INFO] [logging.py:96:log_dist] [Rank 0] step=3650, skipped=57, lr=[3.0601891415815286e-06, 3.0601891415815286e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3649|ppo_ep: 1|act_loss: -0.001522064208984375|cri_loss: -0.0004944801330566406|unsuper_loss: 0.0
+average reward score: 5.62890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.90%) |Training time=0.46s (21.54%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
+[2023-04-14 11:02:07,543] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 3650|ppo_ep: 1|act_loss: -0.002902984619140625|cri_loss: -0.0012159347534179688|unsuper_loss: 0.0
+average reward score: 5.1328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.59s (75.42%) |Training time=0.42s (19.95%) |Others=0.10 (4.63%)|CurSamplesPerSec=15.14 |AvgSamplesPerSec=14.46
+[2023-04-14 11:02:09,663] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 3651|ppo_ep: 1|act_loss: -0.030426025390625|cri_loss: -0.01433563232421875|unsuper_loss: 0.0
+average reward score: 4.93359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.59s (75.21%) |Training time=0.43s (20.16%) |Others=0.10 (4.62%)|CurSamplesPerSec=15.10 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3652|ppo_ep: 1|act_loss: -0.01470947265625|cri_loss: -0.00661468505859375|unsuper_loss: 0.0
+average reward score: 4.55859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.48%) |Training time=0.45s (20.93%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3653|ppo_ep: 1|act_loss: 0.00142669677734375|cri_loss: 0.0014524459838867188|unsuper_loss: 0.0
+average reward score: 5.515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.37%) |Training time=0.45s (21.03%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3654|ppo_ep: 1|act_loss: 0.007843017578125|cri_loss: 0.0045013427734375|unsuper_loss: 0.0
+average reward score: 5.23828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.11%) |Training time=0.46s (21.30%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3655|ppo_ep: 1|act_loss: 0.038421630859375|cri_loss: 0.020263671875|unsuper_loss: 0.0
+average reward score: 5.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.45s (20.67%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3656|ppo_ep: 1|act_loss: 0.031494140625|cri_loss: 0.016754150390625|unsuper_loss: 0.0
+average reward score: 4.28125
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.32%) |Training time=0.46s (20.29%) |Others=0.10 (4.39%)|CurSamplesPerSec=14.19 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3657|ppo_ep: 1|act_loss: 0.0087890625|cri_loss: 0.004703521728515625|unsuper_loss: 0.0
+average reward score: 4.9453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.46s (21.46%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3658|ppo_ep: 1|act_loss: 0.00679779052734375|cri_loss: 0.003719329833984375|unsuper_loss: 0.0
+average reward score: 4.96484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.57%) |Training time=0.40s (18.52%) |Others=0.17 (7.92%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+[2023-04-14 11:02:27,160] [INFO] [logging.py:96:log_dist] [Rank 0] step=3660, skipped=50, lr=[5.875365567724234e-06, 5.875365567724234e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:02:27,178] [INFO] [timer.py:199:stop] epoch=0/micro_step=3660/global_step=3660, RunningAvgSamplesPerSec=105.62991461506235, CurrSamplesPerSec=107.63474458970275, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:02:27,271] [INFO] [logging.py:96:log_dist] [Rank 0] step=3660, skipped=57, lr=[3.050804781897791e-06, 3.050804781897791e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3659|ppo_ep: 1|act_loss: -0.00601959228515625|cri_loss: -0.002696990966796875|unsuper_loss: 0.0
+average reward score: 5.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.01%) |Training time=0.46s (21.42%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3660|ppo_ep: 1|act_loss: -0.065185546875|cri_loss: -0.03179931640625|unsuper_loss: 0.0
+average reward score: 5.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.09%) |Training time=0.46s (21.33%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3661|ppo_ep: 1|act_loss: -0.0310211181640625|cri_loss: -0.013946533203125|unsuper_loss: 0.0
+average reward score: 5.1953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.61%) |Training time=0.47s (21.80%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3662|ppo_ep: 1|act_loss: -0.03411865234375|cri_loss: -0.01593017578125|unsuper_loss: 0.0
+average reward score: 5.3671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.65%) |Training time=0.47s (21.76%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3663|ppo_ep: 1|act_loss: 0.043853759765625|cri_loss: 0.0233917236328125|unsuper_loss: 0.0
+average reward score: 5.7109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.73%) |Training time=0.47s (21.70%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3664|ppo_ep: 1|act_loss: 0.02154541015625|cri_loss: 0.0111846923828125|unsuper_loss: 0.0
+average reward score: 4.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.57%) |Training time=0.47s (21.85%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3665|ppo_ep: 1|act_loss: 0.023284912109375|cri_loss: 0.0124053955078125|unsuper_loss: 0.0
+average reward score: 5.203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.56%) |Training time=0.47s (21.87%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3666|ppo_ep: 1|act_loss: -0.0031890869140625|cri_loss: -0.0012874603271484375|unsuper_loss: 0.0
+average reward score: 4.8203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.99%) |Training time=0.46s (21.46%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3667|ppo_ep: 1|act_loss: 0.03533935546875|cri_loss: 0.0186767578125|unsuper_loss: 0.0
+average reward score: 4.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.62%) |Training time=0.47s (21.77%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3668|ppo_ep: 1|act_loss: -0.030364990234375|cri_loss: -0.01441192626953125|unsuper_loss: 0.0
+average reward score: 5.51953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.64%) |Training time=0.47s (21.79%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
+[2023-04-14 11:02:48,613] [INFO] [logging.py:96:log_dist] [Rank 0] step=3670, skipped=50, lr=[5.85722712647135e-06, 5.85722712647135e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:02:48,631] [INFO] [timer.py:199:stop] epoch=0/micro_step=3670/global_step=3670, RunningAvgSamplesPerSec=105.6294239805947, CurrSamplesPerSec=105.60508599933907, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:02:48,724] [INFO] [logging.py:96:log_dist] [Rank 0] step=3670, skipped=57, lr=[3.0414122579584403e-06, 3.0414122579584403e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3669|ppo_ep: 1|act_loss: -0.0185546875|cri_loss: -0.0088653564453125|unsuper_loss: 0.0
+average reward score: 5.953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.69%) |Training time=0.47s (21.72%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3670|ppo_ep: 1|act_loss: -0.027069091796875|cri_loss: -0.01312255859375|unsuper_loss: 0.0
+average reward score: 5.16015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.44%) |Training time=0.47s (21.98%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3671|ppo_ep: 1|act_loss: -0.01959228515625|cri_loss: -0.00933074951171875|unsuper_loss: 0.0
+average reward score: 5.1796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.79%) |Training time=0.46s (21.45%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3672|ppo_ep: 1|act_loss: -0.012786865234375|cri_loss: -0.005970001220703125|unsuper_loss: 0.0
+average reward score: 4.66015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.10%) |Training time=0.49s (22.29%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3673|ppo_ep: 1|act_loss: 0.0139007568359375|cri_loss: 0.00730133056640625|unsuper_loss: 0.0
+average reward score: 5.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.53s |Gather latency=0.00s (0.00%) |Generate time=1.59s (62.96%) |Training time=0.48s (19.10%) |Others=0.45 (17.94%)|CurSamplesPerSec=12.66 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3674|ppo_ep: 1|act_loss: 0.0052490234375|cri_loss: 0.0029621124267578125|unsuper_loss: 0.0
+average reward score: 5.703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.48s (22.29%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3675|ppo_ep: 1|act_loss: 0.02960205078125|cri_loss: 0.01507568359375|unsuper_loss: 0.0
+average reward score: 4.94140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.28%) |Training time=0.48s (22.13%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3676|ppo_ep: 1|act_loss: 0.0081329345703125|cri_loss: 0.00453948974609375|unsuper_loss: 0.0
+average reward score: 5.44921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.90%) |Training time=0.49s (22.50%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3677|ppo_ep: 1|act_loss: 0.030242919921875|cri_loss: 0.01534271240234375|unsuper_loss: 0.0
+average reward score: 4.87890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.10%) |Training time=0.48s (22.34%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3678|ppo_ep: 1|act_loss: 0.009796142578125|cri_loss: 0.005096435546875|unsuper_loss: 0.0
+average reward score: 4.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (22.02%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+[2023-04-14 11:03:10,632] [INFO] [logging.py:96:log_dist] [Rank 0] step=3680, skipped=50, lr=[5.8390733851223396e-06, 5.8390733851223396e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:03:10,651] [INFO] [timer.py:199:stop] epoch=0/micro_step=3680/global_step=3680, RunningAvgSamplesPerSec=105.61558640234166, CurrSamplesPerSec=100.31370274593043, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:03:10,744] [INFO] [logging.py:96:log_dist] [Rank 0] step=3680, skipped=57, lr=[3.0320117089833414e-06, 3.0320117089833414e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3679|ppo_ep: 1|act_loss: -0.0201873779296875|cri_loss: -0.0089111328125|unsuper_loss: 0.0
+average reward score: 5.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.23%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3680|ppo_ep: 1|act_loss: -0.060821533203125|cri_loss: -0.028778076171875|unsuper_loss: 0.0
+average reward score: 5.39453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.02%) |Training time=0.49s (22.39%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3681|ppo_ep: 1|act_loss: 0.003955841064453125|cri_loss: 0.004291534423828125|unsuper_loss: 0.0
+average reward score: 4.5
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.61%) |Training time=0.47s (21.84%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3682|ppo_ep: 1|act_loss: -0.01025390625|cri_loss: -0.0044708251953125|unsuper_loss: 0.0
+average reward score: 4.9609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.97%) |Training time=0.49s (22.48%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3683|ppo_ep: 1|act_loss: -0.026702880859375|cri_loss: -0.0126190185546875|unsuper_loss: 0.0
+average reward score: 5.83984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.48s (21.94%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3684|ppo_ep: 1|act_loss: -0.0031147003173828125|cri_loss: -0.0009250640869140625|unsuper_loss: 0.0
+average reward score: 5.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.30%) |Training time=0.48s (22.19%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3685|ppo_ep: 1|act_loss: 0.017364501953125|cri_loss: 0.00927734375|unsuper_loss: 0.0
+average reward score: 5.3671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.26%) |Training time=0.49s (22.07%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3686|ppo_ep: 1|act_loss: 0.017181396484375|cri_loss: 0.0089111328125|unsuper_loss: 0.0
+average reward score: 5.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.67s (74.08%) |Training time=0.49s (21.48%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.16 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3687|ppo_ep: 1|act_loss: 0.025054931640625|cri_loss: 0.01331329345703125|unsuper_loss: 0.0
+average reward score: 5.5
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.61%) |Training time=0.47s (21.82%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3688|ppo_ep: 1|act_loss: 0.032684326171875|cri_loss: 0.0173187255859375|unsuper_loss: 0.0
+average reward score: 5.28515625
+-------------------------------------------------------------------------------------
+|E2E latency=3.45s |Gather latency=0.00s (0.00%) |Generate time=1.59s (45.98%) |Training time=0.49s (14.10%) |Others=1.38 (39.92%)|CurSamplesPerSec=9.28 |AvgSamplesPerSec=14.46
+[2023-04-14 11:03:33,753] [INFO] [logging.py:96:log_dist] [Rank 0] step=3690, skipped=50, lr=[5.820904612759442e-06, 5.820904612759442e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:03:33,772] [INFO] [timer.py:199:stop] epoch=0/micro_step=3690/global_step=3690, RunningAvgSamplesPerSec=105.59878176187217, CurrSamplesPerSec=96.78853632804528, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:03:33,865] [INFO] [logging.py:96:log_dist] [Rank 0] step=3690, skipped=57, lr=[3.022603274311307e-06, 3.022603274311307e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3689|ppo_ep: 1|act_loss: -0.01277923583984375|cri_loss: -0.0057220458984375|unsuper_loss: 0.0
+average reward score: 5.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.77%) |Training time=0.49s (22.68%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3690|ppo_ep: 1|act_loss: 0.0052642822265625|cri_loss: 0.00312042236328125|unsuper_loss: 0.0
+average reward score: 4.484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.29%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3691|ppo_ep: 1|act_loss: -0.028533935546875|cri_loss: -0.01375579833984375|unsuper_loss: 0.0
+average reward score: 5.3125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.87%) |Training time=0.49s (22.59%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3692|ppo_ep: 1|act_loss: -0.03851318359375|cri_loss: -0.0184783935546875|unsuper_loss: 0.0
+average reward score: 5.12109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.64%) |Training time=0.49s (22.79%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3693|ppo_ep: 1|act_loss: 0.05462646484375|cri_loss: 0.031951904296875|unsuper_loss: 0.0
+average reward score: 5.15625
+-------------------------------------------------------------------------------------
+|E2E latency=3.03s |Gather latency=0.00s (0.00%) |Generate time=1.59s (52.57%) |Training time=0.49s (16.28%) |Others=0.94 (31.15%)|CurSamplesPerSec=10.55 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3694|ppo_ep: 1|act_loss: -0.03289794921875|cri_loss: -0.015838623046875|unsuper_loss: 0.0
+average reward score: 4.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.56%) |Training time=0.50s (22.90%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3695|ppo_ep: 1|act_loss: 0.0087890625|cri_loss: 0.004550933837890625|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.63%) |Training time=0.49s (22.80%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+[2023-04-14 11:03:49,795] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
+epoch: 0|step: 3696|ppo_ep: 1|act_loss: -0.009063720703125|cri_loss: -0.0033855438232421875|unsuper_loss: 0.0
+average reward score: 4.74609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.64%) |Training time=0.47s (21.74%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
+[2023-04-14 11:03:51,942] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192
+epoch: 0|step: 3697|ppo_ep: 1|act_loss: -0.06842041015625|cri_loss: -0.03167724609375|unsuper_loss: 0.0
+average reward score: 5.03125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.41%) |Training time=0.47s (21.99%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3698|ppo_ep: 1|act_loss: 0.05010986328125|cri_loss: 0.0263214111328125|unsuper_loss: 0.0
+average reward score: 5.234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.61%) |Training time=0.50s (22.86%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+[2023-04-14 11:03:56,267] [INFO] [logging.py:96:log_dist] [Rank 0] step=3700, skipped=52, lr=[5.806358953504726e-06, 5.806358953504726e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:03:56,286] [INFO] [timer.py:199:stop] epoch=0/micro_step=3700/global_step=3700, RunningAvgSamplesPerSec=105.5771961459714, CurrSamplesPerSec=97.05807384695485, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:03:56,380] [INFO] [logging.py:96:log_dist] [Rank 0] step=3700, skipped=57, lr=[3.013187093398035e-06, 3.013187093398035e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3699|ppo_ep: 1|act_loss: -0.003143310546875|cri_loss: -0.001316070556640625|unsuper_loss: 0.0
+average reward score: 4.859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.75%) |Training time=0.49s (22.67%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3700|ppo_ep: 1|act_loss: -0.018280029296875|cri_loss: -0.008575439453125|unsuper_loss: 0.0
+average reward score: 4.87890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.48s (22.36%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3701|ppo_ep: 1|act_loss: -0.018524169921875|cri_loss: -0.00849151611328125|unsuper_loss: 0.0
+average reward score: 4.8125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.24%) |Training time=0.48s (22.16%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3702|ppo_ep: 1|act_loss: 0.01904296875|cri_loss: 0.00995635986328125|unsuper_loss: 0.0
+average reward score: 5.17578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.31%) |Training time=0.48s (22.14%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3703|ppo_ep: 1|act_loss: 0.00037097930908203125|cri_loss: 0.0003833770751953125|unsuper_loss: 0.0
+average reward score: 5.90234375
+-------------------------------------------------------------------------------------
+|E2E latency=3.27s |Gather latency=0.00s (0.00%) |Generate time=1.75s (53.58%) |Training time=0.49s (14.82%) |Others=1.03 (31.60%)|CurSamplesPerSec=9.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3704|ppo_ep: 1|act_loss: -0.04229736328125|cri_loss: -0.01947021484375|unsuper_loss: 0.0
+average reward score: 5.7265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.03%) |Training time=0.49s (22.37%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3705|ppo_ep: 1|act_loss: -0.007030487060546875|cri_loss: -0.0033054351806640625|unsuper_loss: 0.0
+average reward score: 4.59375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.01%) |Training time=0.49s (22.40%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3706|ppo_ep: 1|act_loss: -0.02105712890625|cri_loss: -0.0096282958984375|unsuper_loss: 0.0
+average reward score: 5.09375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.17%) |Training time=0.48s (22.25%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3707|ppo_ep: 1|act_loss: -0.01012420654296875|cri_loss: -0.00421905517578125|unsuper_loss: 0.0
+average reward score: 5.02734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.91%) |Training time=0.49s (22.56%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3708|ppo_ep: 1|act_loss: 0.0220947265625|cri_loss: 0.01151275634765625|unsuper_loss: 0.0
+average reward score: 5.20703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.70s |Gather latency=0.00s (0.00%) |Generate time=1.59s (58.93%) |Training time=0.48s (17.65%) |Others=0.63 (23.42%)|CurSamplesPerSec=11.85 |AvgSamplesPerSec=14.46
+[2023-04-14 11:04:19,624] [INFO] [logging.py:96:log_dist] [Rank 0] step=3710, skipped=52, lr=[5.788163804112695e-06, 5.788163804112695e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:04:19,642] [INFO] [timer.py:199:stop] epoch=0/micro_step=3710/global_step=3710, RunningAvgSamplesPerSec=105.55912844115356, CurrSamplesPerSec=96.80682664850997, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:04:19,736] [INFO] [logging.py:96:log_dist] [Rank 0] step=3710, skipped=57, lr=[3.0037633058140433e-06, 3.0037633058140433e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3709|ppo_ep: 1|act_loss: 0.0076446533203125|cri_loss: 0.00484466552734375|unsuper_loss: 0.0
+average reward score: 5.703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.63%) |Training time=0.49s (22.78%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3710|ppo_ep: 1|act_loss: 0.03350830078125|cri_loss: 0.0171356201171875|unsuper_loss: 0.0
+average reward score: 4.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.71%) |Training time=0.49s (22.71%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3711|ppo_ep: 1|act_loss: -0.0281982421875|cri_loss: -0.01369476318359375|unsuper_loss: 0.0
+average reward score: 5.3359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.98%) |Training time=0.49s (22.45%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3712|ppo_ep: 1|act_loss: 0.04644775390625|cri_loss: 0.02423095703125|unsuper_loss: 0.0
+average reward score: 5.6015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.90%) |Training time=0.49s (22.59%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3713|ppo_ep: 1|act_loss: 0.0093536376953125|cri_loss: 0.0048980712890625|unsuper_loss: 0.0
+average reward score: 4.2109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.46%) |Training time=0.51s (22.91%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.38 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3714|ppo_ep: 1|act_loss: -0.018890380859375|cri_loss: -0.0089263916015625|unsuper_loss: 0.0
+average reward score: 5.390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.02%) |Training time=0.49s (22.41%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3715|ppo_ep: 1|act_loss: -0.0121917724609375|cri_loss: -0.00533294677734375|unsuper_loss: 0.0
+average reward score: 5.9140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.09%) |Training time=0.49s (22.28%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3716|ppo_ep: 1|act_loss: -0.04791259765625|cri_loss: -0.0214996337890625|unsuper_loss: 0.0
+average reward score: 5.19140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.10%) |Training time=0.46s (21.30%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3717|ppo_ep: 1|act_loss: -0.0043792724609375|cri_loss: -0.00199127197265625|unsuper_loss: 0.0
+average reward score: 5.00390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.02%) |Training time=0.48s (20.75%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.46
+[2023-04-14 11:04:39,479] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 3718|ppo_ep: 1|act_loss: -0.0160369873046875|cri_loss: -0.007083892822265625|unsuper_loss: 0.0
+average reward score: 5.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.49s (22.54%) |Others=0.09 (4.09%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+[2023-04-14 11:04:41,549] [INFO] [logging.py:96:log_dist] [Rank 0] step=3720, skipped=52, lr=[5.769954378309581e-06, 5.769954378309581e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:04:41,567] [INFO] [timer.py:199:stop] epoch=0/micro_step=3720/global_step=3720, RunningAvgSamplesPerSec=105.53938682509363, CurrSamplesPerSec=100.27136032034784, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:04:42,121] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+[2023-04-14 11:04:42,122] [INFO] [logging.py:96:log_dist] [Rank 0] step=3720, skipped=59, lr=[2.99621889280735e-06, 2.99621889280735e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3719|ppo_ep: 1|act_loss: 0.00487518310546875|cri_loss: 0.0027008056640625|unsuper_loss: 0.0
+average reward score: 5.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.64s |Gather latency=0.00s (0.00%) |Generate time=1.60s (60.67%) |Training time=0.95s (36.04%) |Others=0.09 (3.29%)|CurSamplesPerSec=12.11 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3720|ppo_ep: 1|act_loss: 0.0572509765625|cri_loss: 0.030029296875|unsuper_loss: 0.0
+average reward score: 5.40234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.60%) |Training time=0.50s (22.83%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3721|ppo_ep: 1|act_loss: 0.03192138671875|cri_loss: 0.0172576904296875|unsuper_loss: 0.0
+average reward score: 4.87109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.63%) |Training time=0.50s (22.84%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3722|ppo_ep: 1|act_loss: 0.0190887451171875|cri_loss: 0.01143646240234375|unsuper_loss: 0.0
+average reward score: 5.48828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.74%) |Training time=0.49s (22.65%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3723|ppo_ep: 1|act_loss: 0.0341796875|cri_loss: 0.0178985595703125|unsuper_loss: 0.0
+average reward score: 5.59375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.79%) |Training time=0.49s (22.67%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3724|ppo_ep: 1|act_loss: 0.007442474365234375|cri_loss: 0.005657196044921875|unsuper_loss: 0.0
+average reward score: 4.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.63%) |Training time=0.49s (22.77%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3725|ppo_ep: 1|act_loss: 0.047576904296875|cri_loss: 0.026275634765625|unsuper_loss: 0.0
+average reward score: 4.34375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.29%) |Training time=0.50s (23.21%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3726|ppo_ep: 1|act_loss: -0.020843505859375|cri_loss: -0.009979248046875|unsuper_loss: 0.0
+average reward score: 5.66015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.68%) |Training time=0.49s (22.72%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3727|ppo_ep: 1|act_loss: -0.0172119140625|cri_loss: -0.0084991455078125|unsuper_loss: 0.0
+average reward score: 5.3671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.55%) |Training time=0.50s (22.93%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3728|ppo_ep: 1|act_loss: -0.02239990234375|cri_loss: -0.010528564453125|unsuper_loss: 0.0
+average reward score: 5.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.51%) |Training time=0.50s (22.95%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+[2023-04-14 11:05:03,709] [INFO] [logging.py:96:log_dist] [Rank 0] step=3730, skipped=52, lr=[5.7517309460030025e-06, 5.7517309460030025e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:05:03,727] [INFO] [timer.py:199:stop] epoch=0/micro_step=3730/global_step=3730, RunningAvgSamplesPerSec=105.51180342958422, CurrSamplesPerSec=98.24020143168742, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:05:03,821] [INFO] [logging.py:96:log_dist] [Rank 0] step=3730, skipped=59, lr=[2.9867817652923354e-06, 2.9867817652923354e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3729|ppo_ep: 1|act_loss: -0.0194091796875|cri_loss: -0.009429931640625|unsuper_loss: 0.0
+average reward score: 4.7265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.96%) |Training time=0.49s (22.49%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3730|ppo_ep: 1|act_loss: -0.02294921875|cri_loss: -0.01116180419921875|unsuper_loss: 0.0
+average reward score: 5.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.87%) |Training time=0.49s (22.58%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3731|ppo_ep: 1|act_loss: -0.0149078369140625|cri_loss: -0.006526947021484375|unsuper_loss: 0.0
+average reward score: 4.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.15%) |Training time=0.48s (22.30%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3732|ppo_ep: 1|act_loss: 0.040771484375|cri_loss: 0.021240234375|unsuper_loss: 0.0
+average reward score: 5.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.33%) |Training time=0.48s (20.48%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.70 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3733|ppo_ep: 1|act_loss: 0.03448486328125|cri_loss: 0.0180511474609375|unsuper_loss: 0.0
+average reward score: 5.48828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.12%) |Training time=0.49s (22.37%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3734|ppo_ep: 1|act_loss: 0.04742431640625|cri_loss: 0.02508544921875|unsuper_loss: 0.0
+average reward score: 5.1015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.22%) |Training time=0.48s (22.26%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3735|ppo_ep: 1|act_loss: 0.0178375244140625|cri_loss: 0.00946807861328125|unsuper_loss: 0.0
+average reward score: 4.83203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.59s (67.02%) |Training time=0.48s (20.40%) |Others=0.30 (12.59%)|CurSamplesPerSec=13.50 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3736|ppo_ep: 1|act_loss: 0.0396728515625|cri_loss: 0.0210418701171875|unsuper_loss: 0.0
+average reward score: 6.1640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.61%) |Training time=0.48s (22.08%) |Others=0.12 (5.31%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3737|ppo_ep: 1|act_loss: 0.00572967529296875|cri_loss: 0.0031280517578125|unsuper_loss: 0.0
+average reward score: 4.50390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.91%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3738|ppo_ep: 1|act_loss: 0.01273345947265625|cri_loss: 0.00933074951171875|unsuper_loss: 0.0
+average reward score: 4.9140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.80%) |Training time=0.49s (22.68%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
+[2023-04-14 11:05:25,794] [INFO] [logging.py:96:log_dist] [Rank 0] step=3740, skipped=52, lr=[5.733493777308187e-06, 5.733493777308187e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:05:25,813] [INFO] [timer.py:199:stop] epoch=0/micro_step=3740/global_step=3740, RunningAvgSamplesPerSec=105.49538247453388, CurrSamplesPerSec=100.76580001006023, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:05:25,905] [INFO] [logging.py:96:log_dist] [Rank 0] step=3740, skipped=59, lr=[2.977337422497263e-06, 2.977337422497263e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3739|ppo_ep: 1|act_loss: -0.03662109375|cri_loss: -0.0174713134765625|unsuper_loss: 0.0
+average reward score: 5.36328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.33%) |Training time=0.48s (22.16%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3740|ppo_ep: 1|act_loss: -0.02685546875|cri_loss: -0.01302337646484375|unsuper_loss: 0.0
+average reward score: 5.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.61%) |Training time=0.50s (22.89%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3741|ppo_ep: 1|act_loss: -0.000637054443359375|cri_loss: 0.001438140869140625|unsuper_loss: 0.0
+average reward score: 5.6796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.42s |Gather latency=0.00s (0.00%) |Generate time=1.61s (66.66%) |Training time=0.48s (19.73%) |Others=0.33 (13.61%)|CurSamplesPerSec=13.23 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3742|ppo_ep: 1|act_loss: -0.0020751953125|cri_loss: -0.0006694793701171875|unsuper_loss: 0.0
+average reward score: 5.17578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.66s (72.44%) |Training time=0.47s (20.73%) |Others=0.16 (6.83%)|CurSamplesPerSec=13.99 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3743|ppo_ep: 1|act_loss: -0.0175628662109375|cri_loss: -0.0082855224609375|unsuper_loss: 0.0
+average reward score: 3.8359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.62%) |Training time=0.45s (20.86%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3744|ppo_ep: 1|act_loss: 0.035675048828125|cri_loss: 0.0195159912109375|unsuper_loss: 0.0
+average reward score: 5.43359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.40%) |Training time=0.44s (20.06%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3745|ppo_ep: 1|act_loss: -0.037872314453125|cri_loss: -0.0181121826171875|unsuper_loss: 0.0
+average reward score: 4.8359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.82%) |Training time=0.48s (21.66%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3746|ppo_ep: 1|act_loss: -0.006008148193359375|cri_loss: -0.002582550048828125|unsuper_loss: 0.0
+average reward score: 5.44140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.39s |Gather latency=0.00s (0.00%) |Generate time=1.80s (75.25%) |Training time=0.49s (20.63%) |Others=0.10 (4.13%)|CurSamplesPerSec=13.39 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3747|ppo_ep: 1|act_loss: -0.001678466796875|cri_loss: -0.0007061958312988281|unsuper_loss: 0.0
+average reward score: 5.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.73%) |Training time=0.44s (20.68%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3748|ppo_ep: 1|act_loss: 0.01319122314453125|cri_loss: 0.00751495361328125|unsuper_loss: 0.0
+average reward score: 5.69140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.06%) |Training time=0.44s (20.31%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
+[2023-04-14 11:05:48,055] [INFO] [logging.py:96:log_dist] [Rank 0] step=3750, skipped=52, lr=[5.7152431425439704e-06, 5.7152431425439704e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:05:48,073] [INFO] [timer.py:199:stop] epoch=0/micro_step=3750/global_step=3750, RunningAvgSamplesPerSec=105.49968423227209, CurrSamplesPerSec=121.1488010831547, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:05:48,166] [INFO] [logging.py:96:log_dist] [Rank 0] step=3750, skipped=59, lr=[2.9678860044100745e-06, 2.9678860044100745e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3749|ppo_ep: 1|act_loss: -0.014923095703125|cri_loss: -0.0067596435546875|unsuper_loss: 0.0
+average reward score: 4.671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.50%) |Training time=0.43s (19.92%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3750|ppo_ep: 1|act_loss: 0.04046630859375|cri_loss: 0.0205535888671875|unsuper_loss: 0.0
+average reward score: 5.59375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.05%) |Training time=0.44s (20.32%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3751|ppo_ep: 1|act_loss: 0.0002613067626953125|cri_loss: 0.0005140304565429688|unsuper_loss: 0.0
+average reward score: 5.6484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.45s (20.87%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3752|ppo_ep: 1|act_loss: 0.02374267578125|cri_loss: 0.01258087158203125|unsuper_loss: 0.0
+average reward score: 6.01953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.63s |Gather latency=0.00s (0.00%) |Generate time=1.60s (60.84%) |Training time=0.45s (17.18%) |Others=0.58 (21.99%)|CurSamplesPerSec=12.18 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3753|ppo_ep: 1|act_loss: -0.004230499267578125|cri_loss: -0.001766204833984375|unsuper_loss: 0.0
+average reward score: 4.390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.26%) |Training time=0.45s (21.07%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3754|ppo_ep: 1|act_loss: -0.0200347900390625|cri_loss: -0.0087890625|unsuper_loss: 0.0
+average reward score: 5.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.67%) |Training time=0.44s (20.74%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3755|ppo_ep: 1|act_loss: -0.004261016845703125|cri_loss: -0.0016603469848632812|unsuper_loss: 0.0
+average reward score: 5.82421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.24%) |Training time=0.45s (21.10%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3756|ppo_ep: 1|act_loss: -0.032989501953125|cri_loss: -0.0160675048828125|unsuper_loss: 0.0
+average reward score: 5.93359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.39%) |Training time=0.45s (21.03%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3757|ppo_ep: 1|act_loss: 0.015899658203125|cri_loss: 0.0082550048828125|unsuper_loss: 0.0
+average reward score: 5.26171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.80%) |Training time=0.44s (20.51%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3758|ppo_ep: 1|act_loss: 0.00212860107421875|cri_loss: 0.002384185791015625|unsuper_loss: 0.0
+average reward score: 5.05859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.60s (69.51%) |Training time=0.44s (19.25%) |Others=0.26 (11.24%)|CurSamplesPerSec=13.87 |AvgSamplesPerSec=14.46
+[2023-04-14 11:06:10,155] [INFO] [logging.py:96:log_dist] [Rank 0] step=3760, skipped=52, lr=[5.6969793122287855e-06, 5.6969793122287855e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:06:10,173] [INFO] [timer.py:199:stop] epoch=0/micro_step=3760/global_step=3760, RunningAvgSamplesPerSec=105.51794677764592, CurrSamplesPerSec=115.36689304357319, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:06:10,265] [INFO] [logging.py:96:log_dist] [Rank 0] step=3760, skipped=59, lr=[2.9584276511235884e-06, 2.9584276511235884e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3759|ppo_ep: 1|act_loss: -0.00699615478515625|cri_loss: -0.0029449462890625|unsuper_loss: 0.0
+average reward score: 4.09375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.77%) |Training time=0.44s (20.55%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3760|ppo_ep: 1|act_loss: -0.0217742919921875|cri_loss: -0.00971221923828125|unsuper_loss: 0.0
+average reward score: 5.3359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.03%) |Training time=0.44s (20.35%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3761|ppo_ep: 1|act_loss: -0.029144287109375|cri_loss: -0.0140380859375|unsuper_loss: 0.0
+average reward score: 5.48828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.45%) |Training time=0.51s (22.99%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.46 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3762|ppo_ep: 1|act_loss: -0.08831787109375|cri_loss: -0.04180908203125|unsuper_loss: 0.0
+average reward score: 5.45703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.11%) |Training time=0.46s (21.30%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3763|ppo_ep: 1|act_loss: 0.020843505859375|cri_loss: 0.01064300537109375|unsuper_loss: 0.0
+average reward score: 5.05859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.69%) |Training time=0.46s (21.57%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3764|ppo_ep: 1|act_loss: 0.033203125|cri_loss: 0.0171661376953125|unsuper_loss: 0.0
+average reward score: 4.83203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.16%) |Training time=0.46s (21.26%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3765|ppo_ep: 1|act_loss: -0.01349639892578125|cri_loss: -0.00501251220703125|unsuper_loss: 0.0
+average reward score: 5.8984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.57%) |Training time=0.47s (21.77%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3766|ppo_ep: 1|act_loss: 0.0176239013671875|cri_loss: 0.00907135009765625|unsuper_loss: 0.0
+average reward score: 5.2734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.91%) |Training time=0.46s (21.50%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3767|ppo_ep: 1|act_loss: -0.004486083984375|cri_loss: -0.0013856887817382812|unsuper_loss: 0.0
+average reward score: 4.35546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.64%) |Training time=0.47s (21.71%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3768|ppo_ep: 1|act_loss: -0.0341796875|cri_loss: -0.016265869140625|unsuper_loss: 0.0
+average reward score: 5.515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.91%) |Training time=0.46s (21.42%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
+[2023-04-14 11:06:31,673] [INFO] [logging.py:96:log_dist] [Rank 0] step=3770, skipped=52, lr=[5.678702557076659e-06, 5.678702557076659e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:06:31,691] [INFO] [timer.py:199:stop] epoch=0/micro_step=3770/global_step=3770, RunningAvgSamplesPerSec=105.52253519060744, CurrSamplesPerSec=106.89682394300014, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:06:31,784] [INFO] [logging.py:96:log_dist] [Rank 0] step=3770, skipped=59, lr=[2.9489625028334145e-06, 2.9489625028334145e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3769|ppo_ep: 1|act_loss: 0.001468658447265625|cri_loss: 0.0014200210571289062|unsuper_loss: 0.0
+average reward score: 5.06640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.82%) |Training time=0.46s (21.53%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3770|ppo_ep: 1|act_loss: 0.00737762451171875|cri_loss: 0.0038051605224609375|unsuper_loss: 0.0
+average reward score: 4.51171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.65%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3771|ppo_ep: 1|act_loss: 0.0081634521484375|cri_loss: 0.00421142578125|unsuper_loss: 0.0
+average reward score: 5.03125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.53%) |Training time=0.48s (21.96%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3772|ppo_ep: 1|act_loss: -0.021820068359375|cri_loss: -0.00951385498046875|unsuper_loss: 0.0
+average reward score: 4.21875
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.50%) |Training time=0.48s (21.18%) |Others=0.10 (4.32%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3773|ppo_ep: 1|act_loss: -0.02081298828125|cri_loss: -0.0070648193359375|unsuper_loss: 0.0
+average reward score: 4.50390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.59s (70.19%) |Training time=0.46s (20.09%) |Others=0.22 (9.72%)|CurSamplesPerSec=14.12 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3774|ppo_ep: 1|act_loss: -0.003711700439453125|cri_loss: -0.0015134811401367188|unsuper_loss: 0.0
+average reward score: 4.95703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.46s (21.15%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3775|ppo_ep: 1|act_loss: -0.018524169921875|cri_loss: -0.0089569091796875|unsuper_loss: 0.0
+average reward score: 4.953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (21.02%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3776|ppo_ep: 1|act_loss: 0.0028858184814453125|cri_loss: 0.0022220611572265625|unsuper_loss: 0.0
+average reward score: 5.6640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.50%) |Training time=0.43s (19.88%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3777|ppo_ep: 1|act_loss: 0.037353515625|cri_loss: 0.01947021484375|unsuper_loss: 0.0
+average reward score: 5.20703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.44%) |Training time=0.47s (20.25%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.70 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3778|ppo_ep: 1|act_loss: 0.0109710693359375|cri_loss: 0.005802154541015625|unsuper_loss: 0.0
+average reward score: 4.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.87%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+[2023-04-14 11:06:53,644] [INFO] [logging.py:96:log_dist] [Rank 0] step=3780, skipped=52, lr=[5.6604131479931914e-06, 5.6604131479931914e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:06:53,662] [INFO] [timer.py:199:stop] epoch=0/micro_step=3780/global_step=3780, RunningAvgSamplesPerSec=105.52500263158235, CurrSamplesPerSec=103.33717113747495, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:06:53,754] [INFO] [logging.py:96:log_dist] [Rank 0] step=3780, skipped=59, lr=[2.939490699835887e-06, 2.939490699835887e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3779|ppo_ep: 1|act_loss: 0.00405120849609375|cri_loss: 0.002605438232421875|unsuper_loss: 0.0
+average reward score: 6.30859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.93%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3780|ppo_ep: 1|act_loss: -0.017364501953125|cri_loss: -0.0079803466796875|unsuper_loss: 0.0
+average reward score: 5.4453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.82%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3781|ppo_ep: 1|act_loss: 0.0308074951171875|cri_loss: 0.0172119140625|unsuper_loss: 0.0
+average reward score: 4.98828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.48%) |Training time=0.47s (21.86%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3782|ppo_ep: 1|act_loss: -0.003948211669921875|cri_loss: -0.0017042160034179688|unsuper_loss: 0.0
+average reward score: 5.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.98%) |Training time=0.48s (22.45%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3783|ppo_ep: 1|act_loss: -0.0244140625|cri_loss: -0.0100250244140625|unsuper_loss: 0.0
+average reward score: 5.87109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.52%) |Training time=0.47s (21.82%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3784|ppo_ep: 1|act_loss: -0.016387939453125|cri_loss: -0.007472991943359375|unsuper_loss: 0.0
+average reward score: 5.04296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.83%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3785|ppo_ep: 1|act_loss: -0.031341552734375|cri_loss: -0.0128021240234375|unsuper_loss: 0.0
+average reward score: 5.25390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.48s (22.10%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3786|ppo_ep: 1|act_loss: 0.003833770751953125|cri_loss: 0.002223968505859375|unsuper_loss: 0.0
+average reward score: 5.15234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.87%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3787|ppo_ep: 1|act_loss: 0.01019287109375|cri_loss: 0.0052642822265625|unsuper_loss: 0.0
+average reward score: 6.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.33%) |Training time=0.48s (22.05%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3788|ppo_ep: 1|act_loss: 0.01226806640625|cri_loss: 0.0078887939453125|unsuper_loss: 0.0
+average reward score: 5.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.44%) |Training time=0.47s (21.92%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+[2023-04-14 11:07:15,210] [INFO] [logging.py:96:log_dist] [Rank 0] step=3790, skipped=52, lr=[5.642111356071544e-06, 5.642111356071544e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:07:15,508] [INFO] [timer.py:199:stop] epoch=0/micro_step=3790/global_step=3790, RunningAvgSamplesPerSec=105.49148721233601, CurrSamplesPerSec=53.862661932836275, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:07:15,602] [INFO] [logging.py:96:log_dist] [Rank 0] step=3790, skipped=59, lr=[2.9300123825259737e-06, 2.9300123825259737e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3789|ppo_ep: 1|act_loss: 0.031036376953125|cri_loss: 0.0164794921875|unsuper_loss: 0.0
+average reward score: 5.59375
+-------------------------------------------------------------------------------------
+|E2E latency=2.44s |Gather latency=0.00s (0.00%) |Generate time=1.58s (64.83%) |Training time=0.76s (31.04%) |Others=0.10 (4.12%)|CurSamplesPerSec=13.12 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3790|ppo_ep: 1|act_loss: 0.0275115966796875|cri_loss: 0.0144805908203125|unsuper_loss: 0.0
+average reward score: 6.15625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.48s (21.99%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3791|ppo_ep: 1|act_loss: 0.03350830078125|cri_loss: 0.0183563232421875|unsuper_loss: 0.0
+average reward score: 5.69921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.68%) |Training time=0.50s (21.06%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.61 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3792|ppo_ep: 1|act_loss: 0.039764404296875|cri_loss: 0.0226287841796875|unsuper_loss: 0.0
+average reward score: 5.9375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.92%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3793|ppo_ep: 1|act_loss: -0.01373291015625|cri_loss: -0.006465911865234375|unsuper_loss: 0.0
+average reward score: 5.77734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.47s (21.95%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3794|ppo_ep: 1|act_loss: -0.0089263916015625|cri_loss: -0.003650665283203125|unsuper_loss: 0.0
+average reward score: 5.875
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.16%) |Training time=0.47s (21.27%) |Others=0.17 (7.57%)|CurSamplesPerSec=14.38 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3795|ppo_ep: 1|act_loss: -0.058349609375|cri_loss: -0.0270233154296875|unsuper_loss: 0.0
+average reward score: 4.83203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.48s (22.06%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3796|ppo_ep: 1|act_loss: -0.0509033203125|cri_loss: -0.024871826171875|unsuper_loss: 0.0
+average reward score: 4.83984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.62%) |Training time=0.47s (21.83%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3797|ppo_ep: 1|act_loss: -0.01873779296875|cri_loss: -0.0084228515625|unsuper_loss: 0.0
+average reward score: 5.01171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.22%) |Training time=0.48s (22.15%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3798|ppo_ep: 1|act_loss: -0.00775909423828125|cri_loss: -0.0035953521728515625|unsuper_loss: 0.0
+average reward score: 5.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.34%) |Training time=0.48s (22.12%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+[2023-04-14 11:07:37,337] [INFO] [logging.py:96:log_dist] [Rank 0] step=3800, skipped=52, lr=[5.623797452588428e-06, 5.623797452588428e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:07:37,355] [INFO] [timer.py:199:stop] epoch=0/micro_step=3800/global_step=3800, RunningAvgSamplesPerSec=105.48203854925829, CurrSamplesPerSec=105.53218149137692, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:07:37,448] [INFO] [logging.py:96:log_dist] [Rank 0] step=3800, skipped=59, lr=[2.9205276913952023e-06, 2.9205276913952023e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3799|ppo_ep: 1|act_loss: 0.007511138916015625|cri_loss: 0.00420379638671875|unsuper_loss: 0.0
+average reward score: 4.9296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.74%) |Training time=0.47s (21.63%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3800|ppo_ep: 1|act_loss: 0.02117919921875|cri_loss: 0.01085662841796875|unsuper_loss: 0.0
+average reward score: 6.04296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.68%) |Training time=0.47s (21.53%) |Others=0.10 (4.79%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3801|ppo_ep: 1|act_loss: 0.028350830078125|cri_loss: 0.01506805419921875|unsuper_loss: 0.0
+average reward score: 4.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.64s (71.46%) |Training time=0.56s (24.16%) |Others=0.10 (4.39%)|CurSamplesPerSec=13.92 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3802|ppo_ep: 1|act_loss: 0.019287109375|cri_loss: 0.010101318359375|unsuper_loss: 0.0
+average reward score: 5.16015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.47%) |Training time=0.47s (21.98%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3803|ppo_ep: 1|act_loss: 0.031219482421875|cri_loss: 0.016265869140625|unsuper_loss: 0.0
+average reward score: 4.6953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.59s (67.29%) |Training time=0.47s (19.70%) |Others=0.31 (13.01%)|CurSamplesPerSec=13.52 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3804|ppo_ep: 1|act_loss: -0.0006227493286132812|cri_loss: 0.0002913475036621094|unsuper_loss: 0.0
+average reward score: 6.16796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.76%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3805|ppo_ep: 1|act_loss: 0.031890869140625|cri_loss: 0.0168304443359375|unsuper_loss: 0.0
+average reward score: 5.765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.19%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3806|ppo_ep: 1|act_loss: -0.005077362060546875|cri_loss: -0.0024261474609375|unsuper_loss: 0.0
+average reward score: 5.09765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.60%) |Training time=0.50s (21.22%) |Others=0.10 (4.18%)|CurSamplesPerSec=13.54 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3807|ppo_ep: 1|act_loss: 0.024078369140625|cri_loss: 0.0128631591796875|unsuper_loss: 0.0
+average reward score: 5.3828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.78%) |Training time=0.46s (21.57%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3808|ppo_ep: 1|act_loss: -0.0394287109375|cri_loss: -0.01849365234375|unsuper_loss: 0.0
+average reward score: 5.20703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.98%) |Training time=0.46s (21.45%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+[2023-04-14 11:07:59,978] [INFO] [logging.py:96:log_dist] [Rank 0] step=3810, skipped=52, lr=[5.605471709000069e-06, 5.605471709000069e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:07:59,996] [INFO] [timer.py:199:stop] epoch=0/micro_step=3810/global_step=3810, RunningAvgSamplesPerSec=105.47664181442124, CurrSamplesPerSec=143.0142483899703, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:08:00,088] [INFO] [logging.py:96:log_dist] [Rank 0] step=3810, skipped=59, lr=[2.911036767029578e-06, 2.911036767029578e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3809|ppo_ep: 1|act_loss: -0.0309906005859375|cri_loss: -0.0133514404296875|unsuper_loss: 0.0
+average reward score: 5.75
+-------------------------------------------------------------------------------------
+|E2E latency=2.07s |Gather latency=0.00s (0.00%) |Generate time=1.59s (76.55%) |Training time=0.39s (18.63%) |Others=0.10 (4.82%)|CurSamplesPerSec=15.43 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3810|ppo_ep: 1|act_loss: 0.0113983154296875|cri_loss: 0.006107330322265625|unsuper_loss: 0.0
+average reward score: 6.3203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.37%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3811|ppo_ep: 1|act_loss: 0.0013666152954101562|cri_loss: 0.00107574462890625|unsuper_loss: 0.0
+average reward score: 6.32421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.84%) |Training time=0.46s (21.51%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3812|ppo_ep: 1|act_loss: 2.09808349609375e-05|cri_loss: 0.001407623291015625|unsuper_loss: 0.0
+average reward score: 4.8203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.17%) |Training time=0.46s (21.25%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3813|ppo_ep: 1|act_loss: 0.0174560546875|cri_loss: 0.00921630859375|unsuper_loss: 0.0
+average reward score: 5.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.46s (21.56%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3814|ppo_ep: 1|act_loss: 0.01617431640625|cri_loss: 0.0084228515625|unsuper_loss: 0.0
+average reward score: 5.453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.83%) |Training time=0.46s (21.60%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3815|ppo_ep: 1|act_loss: -0.023834228515625|cri_loss: -0.01117706298828125|unsuper_loss: 0.0
+average reward score: 4.58984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.94s |Gather latency=0.00s (0.00%) |Generate time=1.59s (54.07%) |Training time=0.46s (15.80%) |Others=0.89 (30.13%)|CurSamplesPerSec=10.89 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3816|ppo_ep: 1|act_loss: 0.005046844482421875|cri_loss: 0.003204345703125|unsuper_loss: 0.0
+average reward score: 4.73046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.69%) |Training time=0.45s (20.73%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3817|ppo_ep: 1|act_loss: 0.00411224365234375|cri_loss: 0.005214691162109375|unsuper_loss: 0.0
+average reward score: 5.6796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.89%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3818|ppo_ep: 1|act_loss: 0.0379638671875|cri_loss: 0.0204315185546875|unsuper_loss: 0.0
+average reward score: 4.8203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.64%) |Training time=0.45s (20.85%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.46
+[2023-04-14 11:08:22,322] [INFO] [logging.py:96:log_dist] [Rank 0] step=3820, skipped=52, lr=[5.587134396938199e-06, 5.587134396938199e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:08:22,340] [INFO] [timer.py:199:stop] epoch=0/micro_step=3820/global_step=3820, RunningAvgSamplesPerSec=105.48597808754263, CurrSamplesPerSec=126.9696646911081, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:08:22,433] [INFO] [logging.py:96:log_dist] [Rank 0] step=3820, skipped=59, lr=[2.9015397501074932e-06, 2.9015397501074932e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3819|ppo_ep: 1|act_loss: -0.00598907470703125|cri_loss: -0.00261688232421875|unsuper_loss: 0.0
+average reward score: 5.70703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.24%) |Training time=0.42s (19.22%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
+[2023-04-14 11:08:24,724] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 3820|ppo_ep: 1|act_loss: -0.0023822784423828125|cri_loss: -0.0002593994140625|unsuper_loss: 0.0
+average reward score: 5.48828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.61s (70.45%) |Training time=0.59s (25.67%) |Others=0.09 (3.87%)|CurSamplesPerSec=13.97 |AvgSamplesPerSec=14.46
+[2023-04-14 11:08:26,907] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 3821|ppo_ep: 1|act_loss: -0.03167724609375|cri_loss: -0.015045166015625|unsuper_loss: 0.0
+average reward score: 4.75
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.45%) |Training time=0.47s (21.52%) |Others=0.09 (4.03%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3822|ppo_ep: 1|act_loss: -0.001605987548828125|cri_loss: -0.0004248619079589844|unsuper_loss: 0.0
+average reward score: 4.37890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.65%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3823|ppo_ep: 1|act_loss: -0.0477294921875|cri_loss: -0.022308349609375|unsuper_loss: 0.0
+average reward score: 4.9765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.28%) |Training time=0.48s (22.10%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3824|ppo_ep: 1|act_loss: 0.05181884765625|cri_loss: 0.028594970703125|unsuper_loss: 0.0
+average reward score: 5.33984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.62%) |Training time=0.45s (20.83%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3825|ppo_ep: 1|act_loss: -0.0362548828125|cri_loss: -0.0172576904296875|unsuper_loss: 0.0
+average reward score: 5.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.52%) |Training time=0.48s (21.83%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3826|ppo_ep: 1|act_loss: -0.0181884765625|cri_loss: -0.00885009765625|unsuper_loss: 0.0
+average reward score: 6.09375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.04%) |Training time=0.47s (21.43%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3827|ppo_ep: 1|act_loss: 0.01497650146484375|cri_loss: 0.00775909423828125|unsuper_loss: 0.0
+average reward score: 6.15234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.56%) |Training time=0.47s (21.80%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3828|ppo_ep: 1|act_loss: -0.00310516357421875|cri_loss: -0.001155853271484375|unsuper_loss: 0.0
+average reward score: 5.66015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.95%) |Training time=0.49s (22.46%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.46
+[2023-04-14 11:08:44,218] [INFO] [logging.py:96:log_dist] [Rank 0] step=3830, skipped=52, lr=[5.568785788206016e-06, 5.568785788206016e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:08:45,110] [INFO] [timer.py:199:stop] epoch=0/micro_step=3830/global_step=3830, RunningAvgSamplesPerSec=105.39178371825996, CurrSamplesPerSec=26.9550695045682, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:08:45,204] [INFO] [logging.py:96:log_dist] [Rank 0] step=3830, skipped=61, lr=[2.8939378445227608e-06, 2.8939378445227608e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3829|ppo_ep: 1|act_loss: 0.023345947265625|cri_loss: 0.01226043701171875|unsuper_loss: 0.0
+average reward score: 4.90234375
+-------------------------------------------------------------------------------------
+|E2E latency=3.07s |Gather latency=0.00s (0.00%) |Generate time=1.61s (52.64%) |Training time=1.35s (44.08%) |Others=0.10 (3.28%)|CurSamplesPerSec=10.43 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3830|ppo_ep: 1|act_loss: 0.0284423828125|cri_loss: 0.0149993896484375|unsuper_loss: 0.0
+average reward score: 6.21875
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.77%) |Training time=0.48s (20.91%) |Others=0.10 (4.32%)|CurSamplesPerSec=14.05 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3831|ppo_ep: 1|act_loss: 0.025787353515625|cri_loss: 0.01403045654296875|unsuper_loss: 0.0
+average reward score: 5.296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.22%) |Training time=0.48s (22.17%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3832|ppo_ep: 1|act_loss: 0.03790283203125|cri_loss: 0.01959228515625|unsuper_loss: 0.0
+average reward score: 5.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.76%) |Training time=0.47s (21.72%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3833|ppo_ep: 1|act_loss: 0.0158233642578125|cri_loss: 0.00843048095703125|unsuper_loss: 0.0
+average reward score: 5.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.90%) |Training time=0.46s (21.39%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3834|ppo_ep: 1|act_loss: -0.0014019012451171875|cri_loss: 6.198883056640625e-05|unsuper_loss: 0.0
+average reward score: 5.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.95%) |Training time=0.47s (21.50%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3835|ppo_ep: 1|act_loss: -0.026611328125|cri_loss: -0.0128631591796875|unsuper_loss: 0.0
+average reward score: 6.39453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.04%) |Training time=0.46s (19.65%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.65 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3836|ppo_ep: 1|act_loss: -0.03594970703125|cri_loss: -0.0166778564453125|unsuper_loss: 0.0
+average reward score: 5.04296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.72%) |Training time=0.47s (21.74%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3837|ppo_ep: 1|act_loss: 0.0117034912109375|cri_loss: 0.008209228515625|unsuper_loss: 0.0
+average reward score: 5.5546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.28%) |Training time=0.48s (22.18%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3838|ppo_ep: 1|act_loss: -0.00482940673828125|cri_loss: -0.002147674560546875|unsuper_loss: 0.0
+average reward score: 5.25390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.04%) |Training time=0.49s (22.35%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
+[2023-04-14 11:09:07,082] [INFO] [logging.py:96:log_dist] [Rank 0] step=3840, skipped=52, lr=[5.550426154774167e-06, 5.550426154774167e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:09:07,100] [INFO] [timer.py:199:stop] epoch=0/micro_step=3840/global_step=3840, RunningAvgSamplesPerSec=105.38504342863314, CurrSamplesPerSec=101.54163110909366, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:09:07,193] [INFO] [logging.py:96:log_dist] [Rank 0] step=3840, skipped=61, lr=[2.8844302157955294e-06, 2.8844302157955294e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3839|ppo_ep: 1|act_loss: 0.00982666015625|cri_loss: 0.005313873291015625|unsuper_loss: 0.0
+average reward score: 5.8515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.48s (22.10%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3840|ppo_ep: 1|act_loss: -0.0025463104248046875|cri_loss: -0.000339508056640625|unsuper_loss: 0.0
+average reward score: 5.328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (22.08%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3841|ppo_ep: 1|act_loss: 0.02935791015625|cri_loss: 0.01509857177734375|unsuper_loss: 0.0
+average reward score: 4.37890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.30%) |Training time=0.48s (22.09%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3842|ppo_ep: 1|act_loss: 0.0176239013671875|cri_loss: 0.00960540771484375|unsuper_loss: 0.0
+average reward score: 5.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.29%) |Training time=0.48s (22.17%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3843|ppo_ep: 1|act_loss: 0.036529541015625|cri_loss: 0.018768310546875|unsuper_loss: 0.0
+average reward score: 5.59765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.28%) |Training time=0.48s (22.09%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3844|ppo_ep: 1|act_loss: 0.022552490234375|cri_loss: 0.01169586181640625|unsuper_loss: 0.0
+average reward score: 5.55859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.28%) |Training time=0.48s (22.18%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3845|ppo_ep: 1|act_loss: 0.029296875|cri_loss: 0.01503753662109375|unsuper_loss: 0.0
+average reward score: 4.78515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.20%) |Training time=0.48s (22.27%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3846|ppo_ep: 1|act_loss: -0.00392913818359375|cri_loss: -0.001155853271484375|unsuper_loss: 0.0
+average reward score: 5.19140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.99%) |Training time=0.49s (22.48%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3847|ppo_ep: 1|act_loss: 0.02215576171875|cri_loss: 0.011810302734375|unsuper_loss: 0.0
+average reward score: 5.90234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.41%) |Training time=0.47s (21.94%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3848|ppo_ep: 1|act_loss: -0.0194549560546875|cri_loss: -0.00933074951171875|unsuper_loss: 0.0
+average reward score: 5.67578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.14%) |Training time=0.48s (22.34%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+[2023-04-14 11:09:28,729] [INFO] [logging.py:96:log_dist] [Rank 0] step=3850, skipped=52, lr=[5.5320557687767085e-06, 5.5320557687767085e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:09:28,747] [INFO] [timer.py:199:stop] epoch=0/micro_step=3850/global_step=3850, RunningAvgSamplesPerSec=105.37252725245156, CurrSamplesPerSec=102.7891293792571, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:09:28,840] [INFO] [logging.py:96:log_dist] [Rank 0] step=3850, skipped=61, lr=[2.8749168888851126e-06, 2.8749168888851126e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3849|ppo_ep: 1|act_loss: 0.01404571533203125|cri_loss: 0.007328033447265625|unsuper_loss: 0.0
+average reward score: 4.97265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.47s (21.93%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3850|ppo_ep: 1|act_loss: 0.01168060302734375|cri_loss: 0.00693511962890625|unsuper_loss: 0.0
+average reward score: 5.19140625
+-------------------------------------------------------------------------------------
+|E2E latency=3.28s |Gather latency=0.00s (0.00%) |Generate time=1.76s (53.80%) |Training time=0.48s (14.71%) |Others=1.03 (31.49%)|CurSamplesPerSec=9.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3851|ppo_ep: 1|act_loss: 0.00482177734375|cri_loss: 0.0029544830322265625|unsuper_loss: 0.0
+average reward score: 5.19140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.51%) |Training time=0.47s (21.84%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3852|ppo_ep: 1|act_loss: -0.00400543212890625|cri_loss: -0.0009441375732421875|unsuper_loss: 0.0
+average reward score: 5.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.09%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3853|ppo_ep: 1|act_loss: 0.00644683837890625|cri_loss: 0.003875732421875|unsuper_loss: 0.0
+average reward score: 6.10546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.19%) |Training time=0.48s (22.18%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3854|ppo_ep: 1|act_loss: 0.0124053955078125|cri_loss: 0.00745391845703125|unsuper_loss: 0.0
+average reward score: 5.50390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.30%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3855|ppo_ep: 1|act_loss: 0.0143280029296875|cri_loss: 0.007793426513671875|unsuper_loss: 0.0
+average reward score: 6.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.47%) |Training time=0.48s (21.93%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3856|ppo_ep: 1|act_loss: 0.018798828125|cri_loss: 0.00968170166015625|unsuper_loss: 0.0
+average reward score: 5.70703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.47s (21.98%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3857|ppo_ep: 1|act_loss: 0.09454345703125|cri_loss: 0.0516357421875|unsuper_loss: 0.0
+average reward score: 5.06640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.30%) |Training time=0.48s (22.06%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3858|ppo_ep: 1|act_loss: 0.0253448486328125|cri_loss: 0.01302337646484375|unsuper_loss: 0.0
+average reward score: 5.0
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.37%) |Training time=0.49s (22.16%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.46
+[2023-04-14 11:09:51,625] [INFO] [logging.py:96:log_dist] [Rank 0] step=3860, skipped=52, lr=[5.513674902507077e-06, 5.513674902507077e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:09:51,643] [INFO] [timer.py:199:stop] epoch=0/micro_step=3860/global_step=3860, RunningAvgSamplesPerSec=105.36149393062692, CurrSamplesPerSec=103.64847575007799, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:09:51,736] [INFO] [logging.py:96:log_dist] [Rank 0] step=3860, skipped=61, lr=[2.8653980048019654e-06, 2.8653980048019654e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3859|ppo_ep: 1|act_loss: -0.00516510009765625|cri_loss: -0.002277374267578125|unsuper_loss: 0.0
+average reward score: 5.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.76%) |Training time=0.47s (20.83%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3860|ppo_ep: 1|act_loss: -0.0166778564453125|cri_loss: -0.007465362548828125|unsuper_loss: 0.0
+average reward score: 5.73046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.38%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3861|ppo_ep: 1|act_loss: -0.0172576904296875|cri_loss: -0.0084228515625|unsuper_loss: 0.0
+average reward score: 3.95703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.26%) |Training time=0.48s (22.13%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3862|ppo_ep: 1|act_loss: 0.013824462890625|cri_loss: 0.008209228515625|unsuper_loss: 0.0
+average reward score: 4.9921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (22.09%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3863|ppo_ep: 1|act_loss: -0.019683837890625|cri_loss: -0.00853729248046875|unsuper_loss: 0.0
+average reward score: 5.33984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (21.95%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3864|ppo_ep: 1|act_loss: 0.01343536376953125|cri_loss: 0.006969451904296875|unsuper_loss: 0.0
+average reward score: 4.765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.80%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3865|ppo_ep: 1|act_loss: -0.0426025390625|cri_loss: -0.0206146240234375|unsuper_loss: 0.0
+average reward score: 6.3671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.00%) |Training time=0.46s (19.77%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3866|ppo_ep: 1|act_loss: 0.01137542724609375|cri_loss: 0.0087432861328125|unsuper_loss: 0.0
+average reward score: 4.78515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.08%) |Training time=0.49s (22.41%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3867|ppo_ep: 1|act_loss: 0.05126953125|cri_loss: 0.027069091796875|unsuper_loss: 0.0
+average reward score: 4.73046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.85%) |Training time=0.49s (22.61%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3868|ppo_ep: 1|act_loss: 0.019317626953125|cri_loss: 0.01029205322265625|unsuper_loss: 0.0
+average reward score: 5.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.73%) |Training time=0.49s (22.73%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+[2023-04-14 11:10:13,439] [INFO] [logging.py:96:log_dist] [Rank 0] step=3870, skipped=52, lr=[5.495283828414054e-06, 5.495283828414054e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:10:13,457] [INFO] [timer.py:199:stop] epoch=0/micro_step=3870/global_step=3870, RunningAvgSamplesPerSec=105.35058199086996, CurrSamplesPerSec=98.91628601455396, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:10:13,549] [INFO] [logging.py:96:log_dist] [Rank 0] step=3870, skipped=61, lr=[2.855873704638912e-06, 2.855873704638912e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3869|ppo_ep: 1|act_loss: 0.003063201904296875|cri_loss: 0.00177764892578125|unsuper_loss: 0.0
+average reward score: 5.06640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.89%) |Training time=0.49s (22.49%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3870|ppo_ep: 1|act_loss: 0.069091796875|cri_loss: 0.035552978515625|unsuper_loss: 0.0
+average reward score: 6.2109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.81%) |Training time=0.49s (22.66%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3871|ppo_ep: 1|act_loss: 0.02947998046875|cri_loss: 0.01541900634765625|unsuper_loss: 0.0
+average reward score: 5.1953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.79%) |Training time=0.49s (22.59%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3872|ppo_ep: 1|act_loss: -0.00611114501953125|cri_loss: -0.002147674560546875|unsuper_loss: 0.0
+average reward score: 6.3359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.30%) |Training time=0.48s (22.20%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3873|ppo_ep: 1|act_loss: -0.0058135986328125|cri_loss: -0.00225830078125|unsuper_loss: 0.0
+average reward score: 4.65625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.90%) |Training time=0.49s (22.46%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3874|ppo_ep: 1|act_loss: -0.0141143798828125|cri_loss: -0.00676727294921875|unsuper_loss: 0.0
+average reward score: 5.3828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.48s (22.40%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3875|ppo_ep: 1|act_loss: 0.0024662017822265625|cri_loss: 0.0014801025390625|unsuper_loss: 0.0
+average reward score: 4.8828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.18%) |Training time=0.48s (22.29%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3876|ppo_ep: 1|act_loss: -0.0263214111328125|cri_loss: -0.01288604736328125|unsuper_loss: 0.0
+average reward score: 6.01953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.27%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3877|ppo_ep: 1|act_loss: 0.031494140625|cri_loss: 0.017913818359375|unsuper_loss: 0.0
+average reward score: 5.48828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.31%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3878|ppo_ep: 1|act_loss: 0.002197265625|cri_loss: 0.0018177032470703125|unsuper_loss: 0.0
+average reward score: 4.921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.49s (22.36%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+[2023-04-14 11:10:35,093] [INFO] [logging.py:96:log_dist] [Rank 0] step=3880, skipped=52, lr=[5.476882819097722e-06, 5.476882819097722e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:10:35,112] [INFO] [timer.py:199:stop] epoch=0/micro_step=3880/global_step=3880, RunningAvgSamplesPerSec=105.33392161732702, CurrSamplesPerSec=100.01730920417752, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:10:35,204] [INFO] [logging.py:96:log_dist] [Rank 0] step=3880, skipped=61, lr=[2.846344129569057e-06, 2.846344129569057e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3879|ppo_ep: 1|act_loss: -0.0154266357421875|cri_loss: -0.007171630859375|unsuper_loss: 0.0
+average reward score: 5.5
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.49s (22.42%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3880|ppo_ep: 1|act_loss: 0.00566864013671875|cri_loss: 0.00415802001953125|unsuper_loss: 0.0
+average reward score: 4.89453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.17%) |Training time=0.48s (20.61%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3881|ppo_ep: 1|act_loss: 0.030059814453125|cri_loss: 0.015655517578125|unsuper_loss: 0.0
+average reward score: 5.875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.21%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3882|ppo_ep: 1|act_loss: -0.04449462890625|cri_loss: -0.0208892822265625|unsuper_loss: 0.0
+average reward score: 5.44140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.26%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3883|ppo_ep: 1|act_loss: -0.00847625732421875|cri_loss: -0.00353240966796875|unsuper_loss: 0.0
+average reward score: 5.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.99s |Gather latency=0.00s (0.00%) |Generate time=1.61s (53.79%) |Training time=0.47s (15.83%) |Others=0.91 (30.38%)|CurSamplesPerSec=10.71 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3884|ppo_ep: 1|act_loss: -0.033355712890625|cri_loss: -0.01551055908203125|unsuper_loss: 0.0
+average reward score: 5.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.20%) |Training time=0.48s (22.18%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3885|ppo_ep: 1|act_loss: 0.0021610260009765625|cri_loss: 0.0013217926025390625|unsuper_loss: 0.0
+average reward score: 5.03125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.67%) |Training time=0.47s (21.66%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3886|ppo_ep: 1|act_loss: 0.01102447509765625|cri_loss: 0.005680084228515625|unsuper_loss: 0.0
+average reward score: 5.15234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.66%) |Training time=0.47s (21.74%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3887|ppo_ep: 1|act_loss: -0.0107879638671875|cri_loss: -0.004840850830078125|unsuper_loss: 0.0
+average reward score: 4.83203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.11%) |Training time=0.47s (21.33%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3888|ppo_ep: 1|act_loss: 0.03143310546875|cri_loss: 0.0162811279296875|unsuper_loss: 0.0
+average reward score: 5.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.67%) |Training time=0.47s (20.90%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.12 |AvgSamplesPerSec=14.46
+[2023-04-14 11:10:57,877] [INFO] [logging.py:96:log_dist] [Rank 0] step=3890, skipped=52, lr=[5.4584721473054284e-06, 5.4584721473054284e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:10:57,896] [INFO] [timer.py:199:stop] epoch=0/micro_step=3890/global_step=3890, RunningAvgSamplesPerSec=105.32536333025105, CurrSamplesPerSec=101.78936631216996, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:10:57,988] [INFO] [logging.py:96:log_dist] [Rank 0] step=3890, skipped=61, lr=[2.836809420843692e-06, 2.836809420843692e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3889|ppo_ep: 1|act_loss: 0.041259765625|cri_loss: 0.0216522216796875|unsuper_loss: 0.0
+average reward score: 5.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.33%) |Training time=0.48s (22.06%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3890|ppo_ep: 1|act_loss: -0.0309295654296875|cri_loss: -0.014801025390625|unsuper_loss: 0.0
+average reward score: 5.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.29%) |Training time=0.48s (22.07%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3891|ppo_ep: 1|act_loss: 0.016632080078125|cri_loss: 0.00905609130859375|unsuper_loss: 0.0
+average reward score: 5.7109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.46%) |Training time=0.47s (21.91%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3892|ppo_ep: 1|act_loss: 0.07305908203125|cri_loss: 0.03955078125|unsuper_loss: 0.0
+average reward score: 5.05859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.21%) |Training time=0.48s (22.18%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3893|ppo_ep: 1|act_loss: -0.0031833648681640625|cri_loss: -0.00118255615234375|unsuper_loss: 0.0
+average reward score: 4.765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.48s (21.98%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3894|ppo_ep: 1|act_loss: -0.0030231475830078125|cri_loss: -0.0012531280517578125|unsuper_loss: 0.0
+average reward score: 5.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.20%) |Training time=0.48s (22.18%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3895|ppo_ep: 1|act_loss: -0.056243896484375|cri_loss: -0.0274505615234375|unsuper_loss: 0.0
+average reward score: 4.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.51%) |Training time=0.47s (20.18%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3896|ppo_ep: 1|act_loss: -0.024444580078125|cri_loss: -0.0117950439453125|unsuper_loss: 0.0
+average reward score: 5.8984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.77%) |Training time=0.47s (21.59%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3897|ppo_ep: 1|act_loss: 0.00799560546875|cri_loss: 0.004730224609375|unsuper_loss: 0.0
+average reward score: 5.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.86%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3898|ppo_ep: 1|act_loss: 0.0122528076171875|cri_loss: 0.006862640380859375|unsuper_loss: 0.0
+average reward score: 5.33984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.29%) |Training time=0.48s (22.09%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+[2023-04-14 11:11:19,675] [INFO] [logging.py:96:log_dist] [Rank 0] step=3900, skipped=52, lr=[5.440052085927744e-06, 5.440052085927744e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:11:19,693] [INFO] [timer.py:199:stop] epoch=0/micro_step=3900/global_step=3900, RunningAvgSamplesPerSec=105.3175314438355, CurrSamplesPerSec=102.41601813027702, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:11:19,786] [INFO] [logging.py:96:log_dist] [Rank 0] step=3900, skipped=61, lr=[2.827269719790202e-06, 2.827269719790202e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3899|ppo_ep: 1|act_loss: -0.003170013427734375|cri_loss: -0.0006551742553710938|unsuper_loss: 0.0
+average reward score: 5.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.47s (21.98%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3900|ppo_ep: 1|act_loss: 0.026153564453125|cri_loss: 0.0141448974609375|unsuper_loss: 0.0
+average reward score: 5.56640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.47%) |Training time=0.47s (21.91%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3901|ppo_ep: 1|act_loss: 0.0148773193359375|cri_loss: 0.0092315673828125|unsuper_loss: 0.0
+average reward score: 5.57421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.26%) |Training time=0.48s (22.13%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3902|ppo_ep: 1|act_loss: 0.0027313232421875|cri_loss: 0.00159454345703125|unsuper_loss: 0.0
+average reward score: 6.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (21.97%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3903|ppo_ep: 1|act_loss: -0.05126953125|cri_loss: -0.024444580078125|unsuper_loss: 0.0
+average reward score: 4.95703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (22.00%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3904|ppo_ep: 1|act_loss: 0.018585205078125|cri_loss: 0.0104217529296875|unsuper_loss: 0.0
+average reward score: 5.40625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.47%) |Training time=0.47s (21.91%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3905|ppo_ep: 1|act_loss: -0.03497314453125|cri_loss: -0.0162200927734375|unsuper_loss: 0.0
+average reward score: 5.390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.47s (21.93%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3906|ppo_ep: 1|act_loss: -0.01453399658203125|cri_loss: -0.00646209716796875|unsuper_loss: 0.0
+average reward score: 6.3203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.85%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3907|ppo_ep: 1|act_loss: -0.013519287109375|cri_loss: -0.00604248046875|unsuper_loss: 0.0
+average reward score: 5.9375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.64%) |Others=0.10 (4.80%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3908|ppo_ep: 1|act_loss: 0.0009617805480957031|cri_loss: 0.0007853507995605469|unsuper_loss: 0.0
+average reward score: 5.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.84%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+[2023-04-14 11:11:41,331] [INFO] [logging.py:96:log_dist] [Rank 0] step=3910, skipped=52, lr=[5.421622907994414e-06, 5.421622907994414e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:11:41,349] [INFO] [timer.py:199:stop] epoch=0/micro_step=3910/global_step=3910, RunningAvgSamplesPerSec=105.31036604765642, CurrSamplesPerSec=103.50970950158406, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:11:41,442] [INFO] [logging.py:96:log_dist] [Rank 0] step=3910, skipped=61, lr=[2.8177251678099694e-06, 2.8177251678099694e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3909|ppo_ep: 1|act_loss: 0.005268096923828125|cri_loss: 0.003101348876953125|unsuper_loss: 0.0
+average reward score: 5.37109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.81%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3910|ppo_ep: 1|act_loss: 0.0406494140625|cri_loss: 0.021026611328125|unsuper_loss: 0.0
+average reward score: 5.24609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.03%) |Training time=0.46s (19.70%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.59 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3911|ppo_ep: 1|act_loss: 0.0535888671875|cri_loss: 0.0287322998046875|unsuper_loss: 0.0
+average reward score: 5.9296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.18%) |Training time=0.46s (21.21%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3912|ppo_ep: 1|act_loss: -0.007843017578125|cri_loss: -0.0031261444091796875|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.84%) |Training time=0.47s (21.53%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3913|ppo_ep: 1|act_loss: 0.02130126953125|cri_loss: 0.01087188720703125|unsuper_loss: 0.0
+average reward score: 5.41796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.11%) |Training time=0.46s (21.23%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3914|ppo_ep: 1|act_loss: -0.03924560546875|cri_loss: -0.017730712890625|unsuper_loss: 0.0
+average reward score: 6.35546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.70%) |Training time=0.47s (21.69%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3915|ppo_ep: 1|act_loss: -0.000682830810546875|cri_loss: 0.001865386962890625|unsuper_loss: 0.0
+average reward score: 4.71875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.56%) |Training time=0.45s (20.80%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3916|ppo_ep: 1|act_loss: -0.022735595703125|cri_loss: -0.00836181640625|unsuper_loss: 0.0
+average reward score: 5.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=3.21s |Gather latency=0.00s (0.00%) |Generate time=1.61s (50.36%) |Training time=0.47s (14.53%) |Others=1.13 (35.11%)|CurSamplesPerSec=9.98 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3917|ppo_ep: 1|act_loss: 0.05841064453125|cri_loss: 0.03216552734375|unsuper_loss: 0.0
+average reward score: 4.9921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.23%) |Training time=0.46s (20.33%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.13 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3918|ppo_ep: 1|act_loss: 0.02142333984375|cri_loss: 0.0140228271484375|unsuper_loss: 0.0
+average reward score: 5.390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.46s (21.38%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+[2023-04-14 11:12:04,287] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, but hysteresis is 2. Reducing hysteresis to 1
+[2023-04-14 11:12:04,287] [INFO] [logging.py:96:log_dist] [Rank 0] step=3920, skipped=53, lr=[5.405029078967381e-06, 5.405029078967381e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:12:04,288] [INFO] [timer.py:199:stop] epoch=0/micro_step=3920/global_step=3920, RunningAvgSamplesPerSec=105.31687299274512, CurrSamplesPerSec=117.73918688232374, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:12:04,379] [INFO] [logging.py:96:log_dist] [Rank 0] step=3920, skipped=61, lr=[2.8081759063762797e-06, 2.8081759063762797e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3919|ppo_ep: 1|act_loss: 0.022247314453125|cri_loss: 0.0158843994140625|unsuper_loss: 0.0
+average reward score: 4.6015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.96%) |Training time=0.43s (20.35%) |Others=0.10 (4.68%)|CurSamplesPerSec=15.02 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3920|ppo_ep: 1|act_loss: 0.0309906005859375|cri_loss: 0.016754150390625|unsuper_loss: 0.0
+average reward score: 6.3125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.30%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3921|ppo_ep: 1|act_loss: 0.02581787109375|cri_loss: 0.01375579833984375|unsuper_loss: 0.0
+average reward score: 4.296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.85s |Gather latency=0.00s (0.00%) |Generate time=1.60s (56.01%) |Training time=0.46s (16.14%) |Others=0.79 (27.85%)|CurSamplesPerSec=11.22 |AvgSamplesPerSec=14.46
+[2023-04-14 11:12:11,547] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 3922|ppo_ep: 1|act_loss: -0.020843505859375|cri_loss: -0.00952911376953125|unsuper_loss: 0.0
+average reward score: 5.50390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.40%) |Training time=0.46s (21.43%) |Others=0.09 (4.17%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
+[2023-04-14 11:12:13,588] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
+[2023-04-14 11:12:13,672] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 3923|ppo_ep: 1|act_loss: -0.0687255859375|cri_loss: -0.0307464599609375|unsuper_loss: 0.0
+average reward score: 5.453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.32%) |Training time=0.43s (20.47%) |Others=0.09 (4.21%)|CurSamplesPerSec=15.06 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3924|ppo_ep: 1|act_loss: 0.0050811767578125|cri_loss: 0.0030078887939453125|unsuper_loss: 0.0
+average reward score: 5.58984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.60s (70.56%) |Training time=0.57s (25.04%) |Others=0.10 (4.40%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3925|ppo_ep: 1|act_loss: -0.13818359375|cri_loss: -0.047271728515625|unsuper_loss: 0.0
+average reward score: 6.3828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.01%) |Training time=0.44s (20.35%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3926|ppo_ep: 1|act_loss: -0.06524658203125|cri_loss: -0.027069091796875|unsuper_loss: 0.0
+average reward score: 5.17578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.21%) |Training time=0.46s (21.15%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3927|ppo_ep: 1|act_loss: -0.07269287109375|cri_loss: -0.0286712646484375|unsuper_loss: 0.0
+average reward score: 4.8046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.82s |Gather latency=0.00s (0.00%) |Generate time=1.60s (56.85%) |Training time=0.46s (16.34%) |Others=0.76 (26.80%)|CurSamplesPerSec=11.36 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3928|ppo_ep: 1|act_loss: 0.0650634765625|cri_loss: 0.034942626953125|unsuper_loss: 0.0
+average reward score: 4.80859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.30%) |Training time=0.45s (21.06%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+[2023-04-14 11:12:27,276] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192
+[2023-04-14 11:12:27,277] [INFO] [logging.py:96:log_dist] [Rank 0] step=3930, skipped=55, lr=[5.390273156270772e-06, 5.390273156270772e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:12:27,277] [INFO] [timer.py:199:stop] epoch=0/micro_step=3930/global_step=3930, RunningAvgSamplesPerSec=105.3224964159603, CurrSamplesPerSec=117.4507008482986, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:12:27,362] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
+[2023-04-14 11:12:27,362] [INFO] [logging.py:96:log_dist] [Rank 0] step=3930, skipped=64, lr=[2.8014886970414734e-06, 2.8014886970414734e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3929|ppo_ep: 1|act_loss: 0.0504150390625|cri_loss: 0.028167724609375|unsuper_loss: 0.0
+average reward score: 4.67578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.29%) |Training time=0.43s (20.42%) |Others=0.09 (4.29%)|CurSamplesPerSec=15.05 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3930|ppo_ep: 1|act_loss: 0.089111328125|cri_loss: 0.047210693359375|unsuper_loss: 0.0
+average reward score: 5.7421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.84%) |Training time=0.47s (21.55%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3931|ppo_ep: 1|act_loss: -0.0012054443359375|cri_loss: 0.0017375946044921875|unsuper_loss: 0.0
+average reward score: 4.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.02%) |Training time=0.46s (21.35%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3932|ppo_ep: 1|act_loss: 0.01161956787109375|cri_loss: 0.006717681884765625|unsuper_loss: 0.0
+average reward score: 4.55859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.21%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3933|ppo_ep: 1|act_loss: 0.048187255859375|cri_loss: 0.025787353515625|unsuper_loss: 0.0
+average reward score: 5.55078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.99%) |Training time=0.46s (21.39%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3934|ppo_ep: 1|act_loss: 0.11151123046875|cri_loss: 0.0634765625|unsuper_loss: 0.0
+average reward score: 4.71484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.41%) |Training time=0.45s (20.95%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3935|ppo_ep: 1|act_loss: -0.0048980712890625|cri_loss: 0.000118255615234375|unsuper_loss: 0.0
+average reward score: 4.828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.25%) |Training time=0.46s (21.11%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3936|ppo_ep: 1|act_loss: 0.01910400390625|cri_loss: 0.026641845703125|unsuper_loss: 0.0
+average reward score: 5.4921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.46%) |Training time=0.45s (20.88%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3937|ppo_ep: 1|act_loss: -0.007503509521484375|cri_loss: -0.001201629638671875|unsuper_loss: 0.0
+average reward score: 3.75
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.38%) |Training time=0.46s (21.04%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.46
+[2023-04-14 11:12:46,720] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096
+epoch: 0|step: 3938|ppo_ep: 1|act_loss: -0.015869140625|cri_loss: -0.006702423095703125|unsuper_loss: 0.0
+average reward score: 5.1328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.17%) |Training time=0.43s (20.13%) |Others=0.10 (4.70%)|CurSamplesPerSec=15.03 |AvgSamplesPerSec=14.46
+[2023-04-14 11:12:49,033] [INFO] [logging.py:96:log_dist] [Rank 0] step=3940, skipped=56, lr=[5.373666340493332e-06, 5.373666340493332e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:12:49,083] [INFO] [timer.py:199:stop] epoch=0/micro_step=3940/global_step=3940, RunningAvgSamplesPerSec=105.32905620645766, CurrSamplesPerSec=98.3321840781513, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:12:49,175] [INFO] [logging.py:96:log_dist] [Rank 0] step=3940, skipped=64, lr=[2.7919317544146405e-06, 2.7919317544146405e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3939|ppo_ep: 1|act_loss: 0.0667724609375|cri_loss: 0.049713134765625|unsuper_loss: 0.0
+average reward score: 4.6953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.08%) |Training time=0.49s (20.67%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.54 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3940|ppo_ep: 1|act_loss: -0.027984619140625|cri_loss: -0.01288604736328125|unsuper_loss: 0.0
+average reward score: 4.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.83%) |Training time=0.47s (21.55%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3941|ppo_ep: 1|act_loss: -0.0123443603515625|cri_loss: -0.005859375|unsuper_loss: 0.0
+average reward score: 4.54296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.66%) |Training time=0.47s (21.74%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3942|ppo_ep: 1|act_loss: -0.01959228515625|cri_loss: -0.00902557373046875|unsuper_loss: 0.0
+average reward score: 4.09765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.84%) |Training time=0.47s (21.54%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3943|ppo_ep: 1|act_loss: -0.01531982421875|cri_loss: -0.005657196044921875|unsuper_loss: 0.0
+average reward score: 4.59765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.75%) |Training time=0.47s (21.63%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3944|ppo_ep: 1|act_loss: 0.011962890625|cri_loss: 0.018524169921875|unsuper_loss: 0.0
+average reward score: 4.62109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.12%) |Training time=0.46s (21.29%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3945|ppo_ep: 1|act_loss: 0.092529296875|cri_loss: 0.0631103515625|unsuper_loss: 0.0
+average reward score: 4.703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.28%) |Training time=0.47s (21.18%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3946|ppo_ep: 1|act_loss: -0.0811767578125|cri_loss: -0.03668212890625|unsuper_loss: 0.0
+average reward score: 5.59765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.83%) |Training time=0.47s (20.57%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3947|ppo_ep: 1|act_loss: -0.0601806640625|cri_loss: -0.0255279541015625|unsuper_loss: 0.0
+average reward score: 4.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.46%) |Training time=0.48s (21.93%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3948|ppo_ep: 1|act_loss: 0.005146026611328125|cri_loss: 0.005069732666015625|unsuper_loss: 0.0
+average reward score: 5.078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.57%) |Training time=0.48s (21.82%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.46
+[2023-04-14 11:13:10,960] [INFO] [logging.py:96:log_dist] [Rank 0] step=3950, skipped=56, lr=[5.355206605693846e-06, 5.355206605693846e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:13:10,979] [INFO] [timer.py:199:stop] epoch=0/micro_step=3950/global_step=3950, RunningAvgSamplesPerSec=105.32556865211501, CurrSamplesPerSec=104.41765903501886, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:13:11,071] [INFO] [logging.py:96:log_dist] [Rank 0] step=3950, skipped=64, lr=[2.7823704846549614e-06, 2.7823704846549614e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3949|ppo_ep: 1|act_loss: -0.02288818359375|cri_loss: -0.0092010498046875|unsuper_loss: 0.0
+average reward score: 5.62109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.86%) |Training time=0.47s (21.55%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3950|ppo_ep: 1|act_loss: 0.00533294677734375|cri_loss: 0.0029754638671875|unsuper_loss: 0.0
+average reward score: 4.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.56%) |Training time=0.48s (21.85%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3951|ppo_ep: 1|act_loss: -0.02294921875|cri_loss: -0.0074462890625|unsuper_loss: 0.0
+average reward score: 5.00390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.66%) |Training time=0.47s (21.73%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3952|ppo_ep: 1|act_loss: -0.01934814453125|cri_loss: -0.00791168212890625|unsuper_loss: 0.0
+average reward score: 5.703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.62%) |Training time=0.48s (21.79%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3953|ppo_ep: 1|act_loss: -0.001678466796875|cri_loss: 0.00296783447265625|unsuper_loss: 0.0
+average reward score: 4.6640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.53%) |Training time=0.48s (21.89%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3954|ppo_ep: 1|act_loss: 0.05401611328125|cri_loss: 0.03033447265625|unsuper_loss: 0.0
+average reward score: 5.73828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.88s |Gather latency=0.00s (0.00%) |Generate time=1.77s (61.51%) |Training time=0.47s (16.29%) |Others=0.64 (22.20%)|CurSamplesPerSec=11.12 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3955|ppo_ep: 1|act_loss: 0.0494384765625|cri_loss: 0.028076171875|unsuper_loss: 0.0
+average reward score: 4.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.24%) |Training time=0.46s (21.09%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3956|ppo_ep: 1|act_loss: 0.08917236328125|cri_loss: 0.04974365234375|unsuper_loss: 0.0
+average reward score: 4.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.84%) |Training time=0.47s (21.55%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3957|ppo_ep: 1|act_loss: 0.04241943359375|cri_loss: 0.024505615234375|unsuper_loss: 0.0
+average reward score: 4.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.91%) |Training time=0.47s (21.49%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3958|ppo_ep: 1|act_loss: 0.0198974609375|cri_loss: 0.01181793212890625|unsuper_loss: 0.0
+average reward score: 4.52734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.76%) |Training time=0.47s (21.63%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+[2023-04-14 11:13:33,404] [INFO] [logging.py:96:log_dist] [Rank 0] step=3960, skipped=56, lr=[5.3367390119534095e-06, 5.3367390119534095e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:13:33,423] [INFO] [timer.py:199:stop] epoch=0/micro_step=3960/global_step=3960, RunningAvgSamplesPerSec=105.32290912174726, CurrSamplesPerSec=106.21121211185998, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:13:33,516] [INFO] [logging.py:96:log_dist] [Rank 0] step=3960, skipped=64, lr=[2.772805029483517e-06, 2.772805029483517e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3959|ppo_ep: 1|act_loss: -0.020965576171875|cri_loss: -0.0093231201171875|unsuper_loss: 0.0
+average reward score: 5.3671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.01%) |Training time=0.46s (21.39%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3960|ppo_ep: 1|act_loss: 0.0175323486328125|cri_loss: 0.0121612548828125|unsuper_loss: 0.0
+average reward score: 5.56640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.61s (69.47%) |Training time=0.47s (20.32%) |Others=0.24 (10.21%)|CurSamplesPerSec=13.85 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3961|ppo_ep: 1|act_loss: -0.018310546875|cri_loss: -0.00521087646484375|unsuper_loss: 0.0
+average reward score: 5.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.82%) |Training time=0.47s (21.58%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3962|ppo_ep: 1|act_loss: 0.04412841796875|cri_loss: 0.03289794921875|unsuper_loss: 0.0
+average reward score: 5.0859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.29%) |Training time=0.46s (21.09%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3963|ppo_ep: 1|act_loss: 0.0105743408203125|cri_loss: 0.006397247314453125|unsuper_loss: 0.0
+average reward score: 5.08203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.66%) |Training time=0.47s (21.74%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3964|ppo_ep: 1|act_loss: 0.006893157958984375|cri_loss: 0.008575439453125|unsuper_loss: 0.0
+average reward score: 4.8203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.35%) |Training time=0.46s (21.06%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3965|ppo_ep: 1|act_loss: 0.0237274169921875|cri_loss: 0.01345062255859375|unsuper_loss: 0.0
+average reward score: 5.015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.07%) |Training time=0.46s (21.18%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3966|ppo_ep: 1|act_loss: 0.03277587890625|cri_loss: 0.0184478759765625|unsuper_loss: 0.0
+average reward score: 5.08203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (72.99%) |Training time=0.45s (20.54%) |Others=0.14 (6.47%)|CurSamplesPerSec=14.46 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3967|ppo_ep: 1|act_loss: 0.0333251953125|cri_loss: 0.0197906494140625|unsuper_loss: 0.0
+average reward score: 4.6328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.10%) |Training time=0.46s (21.29%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3968|ppo_ep: 1|act_loss: -0.0061798095703125|cri_loss: -0.0029392242431640625|unsuper_loss: 0.0
+average reward score: 5.046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.95%) |Training time=0.47s (21.43%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
+[2023-04-14 11:13:55,489] [INFO] [logging.py:96:log_dist] [Rank 0] step=3970, skipped=56, lr=[5.318263833006314e-06, 5.318263833006314e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:13:55,507] [INFO] [timer.py:199:stop] epoch=0/micro_step=3970/global_step=3970, RunningAvgSamplesPerSec=105.32585319821645, CurrSamplesPerSec=109.12267687612147, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:13:55,603] [INFO] [logging.py:96:log_dist] [Rank 0] step=3970, skipped=64, lr=[2.7632355306834306e-06, 2.7632355306834306e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3969|ppo_ep: 1|act_loss: 0.009552001953125|cri_loss: 0.0053863525390625|unsuper_loss: 0.0
+average reward score: 4.78515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.08%) |Training time=0.46s (19.51%) |Others=0.10 (4.41%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3970|ppo_ep: 1|act_loss: 0.018157958984375|cri_loss: 0.0101318359375|unsuper_loss: 0.0
+average reward score: 4.83203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.63%) |Training time=0.48s (21.76%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3971|ppo_ep: 1|act_loss: 0.004344940185546875|cri_loss: 0.00278472900390625|unsuper_loss: 0.0
+average reward score: 5.15625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.04%) |Training time=0.46s (21.28%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3972|ppo_ep: 1|act_loss: -0.0034942626953125|cri_loss: -0.001590728759765625|unsuper_loss: 0.0
+average reward score: 5.06640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.27%) |Training time=0.46s (21.08%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3973|ppo_ep: 1|act_loss: -0.0035877227783203125|cri_loss: -0.0016889572143554688|unsuper_loss: 0.0
+average reward score: 4.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.66%) |Training time=0.48s (21.76%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3974|ppo_ep: 1|act_loss: 0.02191162109375|cri_loss: 0.016937255859375|unsuper_loss: 0.0
+average reward score: 4.9609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.75%) |Training time=0.48s (21.73%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.40 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3975|ppo_ep: 1|act_loss: -0.0308837890625|cri_loss: -0.01500701904296875|unsuper_loss: 0.0
+average reward score: 5.4921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.38%) |Training time=0.49s (21.23%) |Others=0.10 (4.39%)|CurSamplesPerSec=13.99 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3976|ppo_ep: 1|act_loss: 0.0119476318359375|cri_loss: 0.008209228515625|unsuper_loss: 0.0
+average reward score: 5.3515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.28%) |Training time=0.48s (22.08%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3977|ppo_ep: 1|act_loss: -0.05389404296875|cri_loss: -0.0259246826171875|unsuper_loss: 0.0
+average reward score: 4.953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.31%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3978|ppo_ep: 1|act_loss: -0.01357269287109375|cri_loss: -0.006313323974609375|unsuper_loss: 0.0
+average reward score: 5.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.33%) |Training time=0.48s (22.08%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.46
+[2023-04-14 11:14:17,440] [INFO] [logging.py:96:log_dist] [Rank 0] step=3980, skipped=56, lr=[5.2997813426992765e-06, 5.2997813426992765e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:14:17,459] [INFO] [timer.py:199:stop] epoch=0/micro_step=3980/global_step=3980, RunningAvgSamplesPerSec=105.31787614664505, CurrSamplesPerSec=97.28826867151642, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:14:17,551] [INFO] [logging.py:96:log_dist] [Rank 0] step=3980, skipped=64, lr=[2.753662130097758e-06, 2.753662130097758e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3979|ppo_ep: 1|act_loss: 0.04278564453125|cri_loss: 0.0223236083984375|unsuper_loss: 0.0
+average reward score: 5.42578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.05%) |Training time=0.49s (22.40%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3980|ppo_ep: 1|act_loss: 0.0094451904296875|cri_loss: 0.0048828125|unsuper_loss: 0.0
+average reward score: 5.40625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.54%) |Training time=0.48s (21.88%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3981|ppo_ep: 1|act_loss: 0.016387939453125|cri_loss: 0.00946044921875|unsuper_loss: 0.0
+average reward score: 5.8203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.60s (69.80%) |Training time=0.47s (20.64%) |Others=0.22 (9.56%)|CurSamplesPerSec=13.98 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3982|ppo_ep: 1|act_loss: 0.06060791015625|cri_loss: 0.032379150390625|unsuper_loss: 0.0
+average reward score: 4.15234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.68%) |Training time=0.47s (21.71%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3983|ppo_ep: 1|act_loss: 0.026519775390625|cri_loss: 0.01392364501953125|unsuper_loss: 0.0
+average reward score: 5.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.41%) |Training time=0.54s (24.12%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.30 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3984|ppo_ep: 1|act_loss: -0.050628662109375|cri_loss: -0.02349853515625|unsuper_loss: 0.0
+average reward score: 4.87890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.92%) |Training time=0.46s (21.45%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3985|ppo_ep: 1|act_loss: -0.0207672119140625|cri_loss: -0.01001739501953125|unsuper_loss: 0.0
+average reward score: 3.953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.48%) |Training time=0.48s (21.90%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3986|ppo_ep: 1|act_loss: 0.05877685546875|cri_loss: 0.03070068359375|unsuper_loss: 0.0
+average reward score: 4.40234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.15%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3987|ppo_ep: 1|act_loss: -0.003879547119140625|cri_loss: -0.0012159347534179688|unsuper_loss: 0.0
+average reward score: 6.18359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.90%) |Training time=0.47s (21.50%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3988|ppo_ep: 1|act_loss: -0.00310516357421875|cri_loss: -0.0013723373413085938|unsuper_loss: 0.0
+average reward score: 5.609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.72%) |Training time=0.47s (21.66%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+[2023-04-14 11:14:39,322] [INFO] [logging.py:96:log_dist] [Rank 0] step=3990, skipped=56, lr=[5.281291814987394e-06, 5.281291814987394e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:14:39,341] [INFO] [timer.py:199:stop] epoch=0/micro_step=3990/global_step=3990, RunningAvgSamplesPerSec=105.31172796465408, CurrSamplesPerSec=106.26090811783111, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:14:39,433] [INFO] [logging.py:96:log_dist] [Rank 0] step=3990, skipped=64, lr=[2.7440849696273907e-06, 2.7440849696273907e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3989|ppo_ep: 1|act_loss: -0.0141143798828125|cri_loss: -0.0065765380859375|unsuper_loss: 0.0
+average reward score: 5.0
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.97%) |Training time=0.46s (21.42%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3990|ppo_ep: 1|act_loss: -0.00800323486328125|cri_loss: -0.003459930419921875|unsuper_loss: 0.0
+average reward score: 5.8984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.78%) |Training time=0.47s (21.60%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3991|ppo_ep: 1|act_loss: -0.013031005859375|cri_loss: -0.00531005859375|unsuper_loss: 0.0
+average reward score: 5.6953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.02%) |Training time=0.47s (21.40%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3992|ppo_ep: 1|act_loss: 0.0122833251953125|cri_loss: 0.0067596435546875|unsuper_loss: 0.0
+average reward score: 5.18359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.97%) |Training time=0.46s (21.39%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3993|ppo_ep: 1|act_loss: 0.0008268356323242188|cri_loss: 0.0008487701416015625|unsuper_loss: 0.0
+average reward score: 5.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.04%) |Training time=0.46s (21.35%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3994|ppo_ep: 1|act_loss: 0.050140380859375|cri_loss: 0.02655029296875|unsuper_loss: 0.0
+average reward score: 5.16796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.30%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3995|ppo_ep: 1|act_loss: 0.032928466796875|cri_loss: 0.0180206298828125|unsuper_loss: 0.0
+average reward score: 4.8046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.13%) |Training time=0.46s (21.25%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3996|ppo_ep: 1|act_loss: -0.0299072265625|cri_loss: -0.01325225830078125|unsuper_loss: 0.0
+average reward score: 5.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.32%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3997|ppo_ep: 1|act_loss: -0.0191192626953125|cri_loss: -0.00928497314453125|unsuper_loss: 0.0
+average reward score: 5.83203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.02%) |Training time=0.46s (21.36%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 3998|ppo_ep: 1|act_loss: 0.00289154052734375|cri_loss: 0.00174713134765625|unsuper_loss: 0.0
+average reward score: 5.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.30%) |Training time=0.48s (20.45%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.58 |AvgSamplesPerSec=14.46
+[2023-04-14 11:15:01,190] [INFO] [logging.py:96:log_dist] [Rank 0] step=4000, skipped=56, lr=[5.262795523930069e-06, 5.262795523930069e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:15:01,208] [INFO] [timer.py:199:stop] epoch=0/micro_step=4000/global_step=4000, RunningAvgSamplesPerSec=105.31314272642126, CurrSamplesPerSec=108.65087362544847, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:15:01,301] [INFO] [logging.py:96:log_dist] [Rank 0] step=4000, skipped=64, lr=[2.73450419122895e-06, 2.73450419122895e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 3999|ppo_ep: 1|act_loss: 0.01080322265625|cri_loss: 0.00595855712890625|unsuper_loss: 0.0
+average reward score: 4.98046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.21%) |Training time=0.46s (21.16%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4000|ppo_ep: 1|act_loss: 0.00574493408203125|cri_loss: 0.0033092498779296875|unsuper_loss: 0.0
+average reward score: 6.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.83%) |Training time=0.47s (21.55%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4001|ppo_ep: 1|act_loss: 0.040130615234375|cri_loss: 0.021820068359375|unsuper_loss: 0.0
+average reward score: 4.5703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.87s |Gather latency=0.00s (0.00%) |Generate time=1.61s (55.99%) |Training time=0.46s (16.10%) |Others=0.80 (27.91%)|CurSamplesPerSec=11.16 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4002|ppo_ep: 1|act_loss: -0.0129547119140625|cri_loss: -0.005870819091796875|unsuper_loss: 0.0
+average reward score: 5.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.33%) |Training time=0.46s (21.05%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4003|ppo_ep: 1|act_loss: 0.00415802001953125|cri_loss: 0.0026340484619140625|unsuper_loss: 0.0
+average reward score: 4.9140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.92%) |Training time=0.47s (21.43%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4004|ppo_ep: 1|act_loss: 0.003192901611328125|cri_loss: 0.0031108856201171875|unsuper_loss: 0.0
+average reward score: 5.65234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.17%) |Training time=0.49s (21.44%) |Others=0.10 (4.38%)|CurSamplesPerSec=13.98 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4005|ppo_ep: 1|act_loss: 0.03448486328125|cri_loss: 0.017974853515625|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.87%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4006|ppo_ep: 1|act_loss: 0.00943756103515625|cri_loss: 0.005268096923828125|unsuper_loss: 0.0
+average reward score: 5.8828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.24%) |Training time=0.46s (21.13%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4007|ppo_ep: 1|act_loss: 0.0208740234375|cri_loss: 0.0106964111328125|unsuper_loss: 0.0
+average reward score: 4.76953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (21.96%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4008|ppo_ep: 1|act_loss: 0.01227569580078125|cri_loss: 0.006473541259765625|unsuper_loss: 0.0
+average reward score: 4.453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.56%) |Training time=0.48s (21.84%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46
+[2023-04-14 11:15:23,749] [INFO] [logging.py:96:log_dist] [Rank 0] step=4010, skipped=56, lr=[5.244292743686956e-06, 5.244292743686956e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:15:23,767] [INFO] [timer.py:199:stop] epoch=0/micro_step=4010/global_step=4010, RunningAvgSamplesPerSec=105.30904361496287, CurrSamplesPerSec=102.18848468098487, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:15:23,859] [INFO] [logging.py:96:log_dist] [Rank 0] step=4010, skipped=64, lr=[2.7249199369126855e-06, 2.7249199369126855e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4009|ppo_ep: 1|act_loss: -0.009063720703125|cri_loss: -0.0037689208984375|unsuper_loss: 0.0
+average reward score: 5.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (22.00%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4010|ppo_ep: 1|act_loss: -0.0294189453125|cri_loss: -0.0133209228515625|unsuper_loss: 0.0
+average reward score: 5.39453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.29%) |Training time=0.48s (22.09%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4011|ppo_ep: 1|act_loss: 0.0192413330078125|cri_loss: 0.010101318359375|unsuper_loss: 0.0
+average reward score: 4.64453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.47s (21.60%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4012|ppo_ep: 1|act_loss: -0.01611328125|cri_loss: -0.00748443603515625|unsuper_loss: 0.0
+average reward score: 5.3515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.90%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4013|ppo_ep: 1|act_loss: -0.01181793212890625|cri_loss: -0.0032501220703125|unsuper_loss: 0.0
+average reward score: 4.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.58%) |Training time=0.50s (21.18%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.56 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4014|ppo_ep: 1|act_loss: -0.007022857666015625|cri_loss: -0.002872467041015625|unsuper_loss: 0.0
+average reward score: 6.125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.38%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4015|ppo_ep: 1|act_loss: 0.009246826171875|cri_loss: 0.0051422119140625|unsuper_loss: 0.0
+average reward score: 5.23828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.24%) |Training time=0.46s (21.13%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4016|ppo_ep: 1|act_loss: 0.0111846923828125|cri_loss: 0.00629425048828125|unsuper_loss: 0.0
+average reward score: 5.8828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.26%) |Training time=0.48s (22.14%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4017|ppo_ep: 1|act_loss: 0.0282135009765625|cri_loss: 0.015533447265625|unsuper_loss: 0.0
+average reward score: 4.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.46%) |Training time=0.47s (21.92%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4018|ppo_ep: 1|act_loss: 0.022796630859375|cri_loss: 0.01273345947265625|unsuper_loss: 0.0
+average reward score: 5.46484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.27%) |Training time=0.48s (22.12%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+[2023-04-14 11:15:45,614] [INFO] [logging.py:96:log_dist] [Rank 0] step=4020, skipped=56, lr=[5.2257837485138914e-06, 5.2257837485138914e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:15:45,632] [INFO] [timer.py:199:stop] epoch=0/micro_step=4020/global_step=4020, RunningAvgSamplesPerSec=105.30119557757467, CurrSamplesPerSec=102.19229714995767, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:15:45,725] [INFO] [logging.py:96:log_dist] [Rank 0] step=4020, skipped=64, lr=[2.7153323487403653e-06, 2.7153323487403653e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4019|ppo_ep: 1|act_loss: 0.0105133056640625|cri_loss: 0.006256103515625|unsuper_loss: 0.0
+average reward score: 6.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.46%) |Training time=0.48s (21.95%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4020|ppo_ep: 1|act_loss: -0.015472412109375|cri_loss: -0.007476806640625|unsuper_loss: 0.0
+average reward score: 5.1796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.54%) |Training time=0.47s (21.84%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4021|ppo_ep: 1|act_loss: -0.0288848876953125|cri_loss: -0.0139923095703125|unsuper_loss: 0.0
+average reward score: 5.26171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.69%) |Training time=0.47s (21.69%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4022|ppo_ep: 1|act_loss: -0.01552581787109375|cri_loss: -0.0073699951171875|unsuper_loss: 0.0
+average reward score: 5.39453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.78s |Gather latency=0.00s (0.00%) |Generate time=1.59s (57.30%) |Training time=0.47s (16.97%) |Others=0.71 (25.73%)|CurSamplesPerSec=11.52 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4023|ppo_ep: 1|act_loss: -0.01531982421875|cri_loss: -0.00725555419921875|unsuper_loss: 0.0
+average reward score: 5.74609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.46%) |Training time=0.48s (21.92%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4024|ppo_ep: 1|act_loss: 0.029022216796875|cri_loss: 0.015380859375|unsuper_loss: 0.0
+average reward score: 5.20703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.38%) |Training time=0.45s (20.97%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4025|ppo_ep: 1|act_loss: -0.012115478515625|cri_loss: -0.0057373046875|unsuper_loss: 0.0
+average reward score: 5.8125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.74%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4026|ppo_ep: 1|act_loss: -0.0033702850341796875|cri_loss: -0.0012416839599609375|unsuper_loss: 0.0
+average reward score: 4.88671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.48s (21.95%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4027|ppo_ep: 1|act_loss: 0.007076263427734375|cri_loss: 0.00379180908203125|unsuper_loss: 0.0
+average reward score: 4.6015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.58%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4028|ppo_ep: 1|act_loss: -0.0110015869140625|cri_loss: -0.0049896240234375|unsuper_loss: 0.0
+average reward score: 5.2734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.54%) |Training time=0.50s (21.21%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.55 |AvgSamplesPerSec=14.46
+[2023-04-14 11:16:08,106] [INFO] [logging.py:96:log_dist] [Rank 0] step=4030, skipped=56, lr=[5.207268812758837e-06, 5.207268812758837e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:16:08,125] [INFO] [timer.py:199:stop] epoch=0/micro_step=4030/global_step=4030, RunningAvgSamplesPerSec=105.29472441602836, CurrSamplesPerSec=101.67324172839862, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:16:08,218] [INFO] [logging.py:96:log_dist] [Rank 0] step=4030, skipped=64, lr=[2.7057415688231765e-06, 2.7057415688231765e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4029|ppo_ep: 1|act_loss: 0.039093017578125|cri_loss: 0.020843505859375|unsuper_loss: 0.0
+average reward score: 4.5703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (21.98%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4030|ppo_ep: 1|act_loss: -0.017974853515625|cri_loss: -0.00848388671875|unsuper_loss: 0.0
+average reward score: 5.48828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.23%) |Training time=0.48s (22.17%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4031|ppo_ep: 1|act_loss: -0.046630859375|cri_loss: -0.021697998046875|unsuper_loss: 0.0
+average reward score: 5.17578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.95%) |Training time=0.47s (21.43%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4032|ppo_ep: 1|act_loss: -0.0113372802734375|cri_loss: -0.004917144775390625|unsuper_loss: 0.0
+average reward score: 4.6640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.03%) |Training time=0.47s (21.40%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4033|ppo_ep: 1|act_loss: 0.027557373046875|cri_loss: 0.0144195556640625|unsuper_loss: 0.0
+average reward score: 5.30859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.60%) |Training time=0.47s (20.97%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.13 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4034|ppo_ep: 1|act_loss: -0.0010023117065429688|cri_loss: -0.00019073486328125|unsuper_loss: 0.0
+average reward score: 5.515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.51%) |Training time=0.47s (21.86%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4035|ppo_ep: 1|act_loss: -0.023345947265625|cri_loss: -0.0111083984375|unsuper_loss: 0.0
+average reward score: 5.25390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.81%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4036|ppo_ep: 1|act_loss: 0.03778076171875|cri_loss: 0.0196075439453125|unsuper_loss: 0.0
+average reward score: 4.6953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.35%) |Training time=0.48s (22.04%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4037|ppo_ep: 1|act_loss: 0.00446319580078125|cri_loss: 0.0024566650390625|unsuper_loss: 0.0
+average reward score: 5.546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.63s |Gather latency=0.00s (0.00%) |Generate time=1.59s (60.51%) |Training time=0.47s (17.72%) |Others=0.57 (21.77%)|CurSamplesPerSec=12.18 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4038|ppo_ep: 1|act_loss: -0.009307861328125|cri_loss: -0.0042724609375|unsuper_loss: 0.0
+average reward score: 4.84765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.91%) |Training time=0.46s (21.45%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+[2023-04-14 11:16:30,361] [INFO] [logging.py:96:log_dist] [Rank 0] step=4040, skipped=56, lr=[5.188748210857804e-06, 5.188748210857804e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:16:30,379] [INFO] [timer.py:199:stop] epoch=0/micro_step=4040/global_step=4040, RunningAvgSamplesPerSec=105.28961572115509, CurrSamplesPerSec=100.88236204918982, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:16:30,472] [INFO] [logging.py:96:log_dist] [Rank 0] step=4040, skipped=64, lr=[2.696147739319613e-06, 2.696147739319613e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4039|ppo_ep: 1|act_loss: 0.0245361328125|cri_loss: 0.01309967041015625|unsuper_loss: 0.0
+average reward score: 5.0078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.21%) |Training time=0.48s (22.19%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4040|ppo_ep: 1|act_loss: -0.0211181640625|cri_loss: -0.01006317138671875|unsuper_loss: 0.0
+average reward score: 5.4453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.73%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4041|ppo_ep: 1|act_loss: -0.00572967529296875|cri_loss: -0.0024394989013671875|unsuper_loss: 0.0
+average reward score: 6.11328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.48s (22.00%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4042|ppo_ep: 1|act_loss: 6.723403930664062e-05|cri_loss: 0.0003829002380371094|unsuper_loss: 0.0
+average reward score: 4.8515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.70%) |Training time=0.51s (23.09%) |Others=0.12 (5.22%)|CurSamplesPerSec=14.45 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4043|ppo_ep: 1|act_loss: 0.01611328125|cri_loss: 0.0083465576171875|unsuper_loss: 0.0
+average reward score: 5.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.72%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4044|ppo_ep: 1|act_loss: 0.017364501953125|cri_loss: 0.009002685546875|unsuper_loss: 0.0
+average reward score: 5.78515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.86%) |Training time=0.47s (21.47%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4045|ppo_ep: 1|act_loss: 0.04150390625|cri_loss: 0.021636962890625|unsuper_loss: 0.0
+average reward score: 6.04296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.49%) |Training time=0.44s (20.25%) |Others=0.11 (5.25%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4046|ppo_ep: 1|act_loss: 0.03741455078125|cri_loss: 0.0192413330078125|unsuper_loss: 0.0
+average reward score: 5.203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.79%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4047|ppo_ep: 1|act_loss: 0.017913818359375|cri_loss: 0.009490966796875|unsuper_loss: 0.0
+average reward score: 5.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.71%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4048|ppo_ep: 1|act_loss: 0.023406982421875|cri_loss: 0.0120697021484375|unsuper_loss: 0.0
+average reward score: 7.00390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.51%) |Training time=0.47s (21.87%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+[2023-04-14 11:16:52,062] [INFO] [logging.py:96:log_dist] [Rank 0] step=4050, skipped=56, lr=[5.170222217330791e-06, 5.170222217330791e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:16:52,081] [INFO] [timer.py:199:stop] epoch=0/micro_step=4050/global_step=4050, RunningAvgSamplesPerSec=105.28449685090004, CurrSamplesPerSec=100.30200816658396, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:16:52,173] [INFO] [logging.py:96:log_dist] [Rank 0] step=4050, skipped=64, lr=[2.686551002433372e-06, 2.686551002433372e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4049|ppo_ep: 1|act_loss: 0.0283660888671875|cri_loss: 0.01503753662109375|unsuper_loss: 0.0
+average reward score: 4.09375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.19%) |Training time=0.48s (22.21%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4050|ppo_ep: 1|act_loss: -0.0017862319946289062|cri_loss: -0.00014972686767578125|unsuper_loss: 0.0
+average reward score: 4.4296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.47s (21.60%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4051|ppo_ep: 1|act_loss: -0.0023956298828125|cri_loss: -0.0001583099365234375|unsuper_loss: 0.0
+average reward score: 4.73828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.80s |Gather latency=0.00s (0.00%) |Generate time=1.59s (56.88%) |Training time=0.47s (16.86%) |Others=0.73 (26.26%)|CurSamplesPerSec=11.44 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4052|ppo_ep: 1|act_loss: -0.0013427734375|cri_loss: 0.0010967254638671875|unsuper_loss: 0.0
+average reward score: 4.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.39%) |Training time=0.47s (21.94%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4053|ppo_ep: 1|act_loss: -0.03289794921875|cri_loss: -0.015869140625|unsuper_loss: 0.0
+average reward score: 5.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.40%) |Training time=0.47s (21.95%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4054|ppo_ep: 1|act_loss: -0.0076904296875|cri_loss: -0.0036258697509765625|unsuper_loss: 0.0
+average reward score: 4.6484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.05%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4055|ppo_ep: 1|act_loss: 0.014862060546875|cri_loss: 0.007785797119140625|unsuper_loss: 0.0
+average reward score: 5.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.48s (22.29%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4056|ppo_ep: 1|act_loss: 0.003627777099609375|cri_loss: 0.00479888916015625|unsuper_loss: 0.0
+average reward score: 5.81640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.47s (21.91%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4057|ppo_ep: 1|act_loss: -0.003406524658203125|cri_loss: -0.0008668899536132812|unsuper_loss: 0.0
+average reward score: 5.44921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.02%) |Training time=0.48s (20.69%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4058|ppo_ep: 1|act_loss: 0.0282440185546875|cri_loss: 0.0143585205078125|unsuper_loss: 0.0
+average reward score: 5.64453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.25%) |Training time=0.48s (22.11%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+[2023-04-14 11:17:14,465] [INFO] [logging.py:96:log_dist] [Rank 0] step=4060, skipped=56, lr=[5.151691106777714e-06, 5.151691106777714e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:17:14,483] [INFO] [timer.py:199:stop] epoch=0/micro_step=4060/global_step=4060, RunningAvgSamplesPerSec=105.27758348616538, CurrSamplesPerSec=104.85874689254413, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:17:14,576] [INFO] [logging.py:96:log_dist] [Rank 0] step=4060, skipped=64, lr=[2.6769515004112453e-06, 2.6769515004112453e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4059|ppo_ep: 1|act_loss: 0.018707275390625|cri_loss: 0.00998687744140625|unsuper_loss: 0.0
+average reward score: 5.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.67%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4060|ppo_ep: 1|act_loss: 0.01556396484375|cri_loss: 0.0082855224609375|unsuper_loss: 0.0
+average reward score: 5.63671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.35%) |Training time=0.48s (22.00%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4061|ppo_ep: 1|act_loss: -0.0537109375|cri_loss: -0.0259552001953125|unsuper_loss: 0.0
+average reward score: 5.68359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.27%) |Training time=0.49s (22.15%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.41 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4062|ppo_ep: 1|act_loss: -0.0125885009765625|cri_loss: -0.005950927734375|unsuper_loss: 0.0
+average reward score: 5.3359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.74s (75.58%) |Training time=0.46s (19.99%) |Others=0.10 (4.43%)|CurSamplesPerSec=13.93 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4063|ppo_ep: 1|act_loss: -0.029144287109375|cri_loss: -0.01389312744140625|unsuper_loss: 0.0
+average reward score: 4.890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.42%) |Training time=0.43s (19.88%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4064|ppo_ep: 1|act_loss: -0.018829345703125|cri_loss: -0.0090179443359375|unsuper_loss: 0.0
+average reward score: 4.671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.28%) |Training time=0.44s (20.06%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4065|ppo_ep: 1|act_loss: -0.03253173828125|cri_loss: -0.0152435302734375|unsuper_loss: 0.0
+average reward score: 5.1640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.20%) |Training time=0.46s (21.06%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4066|ppo_ep: 1|act_loss: -0.001811981201171875|cri_loss: -0.0007963180541992188|unsuper_loss: 0.0
+average reward score: 4.7578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.40s |Gather latency=0.00s (0.00%) |Generate time=1.61s (67.20%) |Training time=0.46s (19.05%) |Others=0.33 (13.75%)|CurSamplesPerSec=13.34 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4067|ppo_ep: 1|act_loss: -0.01019287109375|cri_loss: -0.004711151123046875|unsuper_loss: 0.0
+average reward score: 4.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.31%) |Training time=0.46s (21.04%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4068|ppo_ep: 1|act_loss: -0.00948333740234375|cri_loss: -0.004062652587890625|unsuper_loss: 0.0
+average reward score: 5.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.09%) |Training time=0.44s (20.23%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+[2023-04-14 11:17:36,585] [INFO] [logging.py:96:log_dist] [Rank 0] step=4070, skipped=56, lr=[5.133155153874335e-06, 5.133155153874335e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:17:36,603] [INFO] [timer.py:199:stop] epoch=0/micro_step=4070/global_step=4070, RunningAvgSamplesPerSec=105.28821586996683, CurrSamplesPerSec=112.36743147915281, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:17:36,695] [INFO] [logging.py:96:log_dist] [Rank 0] step=4070, skipped=64, lr=[2.6673493755410096e-06, 2.6673493755410096e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4069|ppo_ep: 1|act_loss: -0.03155517578125|cri_loss: -0.015045166015625|unsuper_loss: 0.0
+average reward score: 4.50390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.79%) |Training time=0.45s (20.58%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4070|ppo_ep: 1|act_loss: 0.026214599609375|cri_loss: 0.01456451416015625|unsuper_loss: 0.0
+average reward score: 4.82421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.29%) |Training time=0.46s (21.07%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4071|ppo_ep: 1|act_loss: 0.01337432861328125|cri_loss: 0.00728607177734375|unsuper_loss: 0.0
+average reward score: 5.1171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.57%) |Training time=0.45s (20.78%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4072|ppo_ep: 1|act_loss: -0.022918701171875|cri_loss: -0.0107879638671875|unsuper_loss: 0.0
+average reward score: 4.2734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.78s (74.87%) |Training time=0.49s (20.81%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.49 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4073|ppo_ep: 1|act_loss: -0.0123291015625|cri_loss: -0.00556182861328125|unsuper_loss: 0.0
+average reward score: 4.3125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.18%) |Training time=0.46s (21.21%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4074|ppo_ep: 1|act_loss: 0.0379638671875|cri_loss: 0.021881103515625|unsuper_loss: 0.0
+average reward score: 4.9921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.55%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4075|ppo_ep: 1|act_loss: -0.0139617919921875|cri_loss: -0.006725311279296875|unsuper_loss: 0.0
+average reward score: 5.1953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.31%) |Training time=0.48s (22.07%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4076|ppo_ep: 1|act_loss: -0.0021991729736328125|cri_loss: -0.0007162094116210938|unsuper_loss: 0.0
+average reward score: 5.4921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.03%) |Training time=0.46s (21.33%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4077|ppo_ep: 1|act_loss: 0.04083251953125|cri_loss: 0.0215606689453125|unsuper_loss: 0.0
+average reward score: 4.984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.46%) |Training time=0.45s (20.91%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4078|ppo_ep: 1|act_loss: -0.03167724609375|cri_loss: -0.014312744140625|unsuper_loss: 0.0
+average reward score: 5.1171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.30%) |Training time=0.46s (21.06%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+[2023-04-14 11:17:58,512] [INFO] [logging.py:96:log_dist] [Rank 0] step=4080, skipped=56, lr=[5.114614633368193e-06, 5.114614633368193e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:17:58,530] [INFO] [timer.py:199:stop] epoch=0/micro_step=4080/global_step=4080, RunningAvgSamplesPerSec=105.29089003351561, CurrSamplesPerSec=111.21455442164469, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:17:58,622] [INFO] [logging.py:96:log_dist] [Rank 0] step=4080, skipped=64, lr=[2.657744770149321e-06, 2.657744770149321e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4079|ppo_ep: 1|act_loss: -0.0562744140625|cri_loss: -0.0266876220703125|unsuper_loss: 0.0
+average reward score: 5.36328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.57%) |Training time=0.45s (20.80%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4080|ppo_ep: 1|act_loss: -0.01227569580078125|cri_loss: -0.00440216064453125|unsuper_loss: 0.0
+average reward score: 5.19921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.23%) |Training time=0.46s (21.12%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4081|ppo_ep: 1|act_loss: 0.013671875|cri_loss: 0.00727081298828125|unsuper_loss: 0.0
+average reward score: 5.0625
+-------------------------------------------------------------------------------------
+|E2E latency=3.27s |Gather latency=0.00s (0.00%) |Generate time=1.60s (48.92%) |Training time=0.47s (14.48%) |Others=1.20 (36.60%)|CurSamplesPerSec=9.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4082|ppo_ep: 1|act_loss: -0.0014829635620117188|cri_loss: 5.340576171875e-05|unsuper_loss: 0.0
+average reward score: 5.01171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.77%) |Training time=0.47s (21.59%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4083|ppo_ep: 1|act_loss: -0.007236480712890625|cri_loss: -0.002948760986328125|unsuper_loss: 0.0
+average reward score: 5.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.58%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4084|ppo_ep: 1|act_loss: 0.002590179443359375|cri_loss: 0.001953125|unsuper_loss: 0.0
+average reward score: 5.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.64%) |Training time=0.47s (21.73%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4085|ppo_ep: 1|act_loss: 0.0026874542236328125|cri_loss: 0.0015869140625|unsuper_loss: 0.0
+average reward score: 4.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.65%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4086|ppo_ep: 1|act_loss: 0.030548095703125|cri_loss: 0.018157958984375|unsuper_loss: 0.0
+average reward score: 4.9609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.55%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4087|ppo_ep: 1|act_loss: -0.02142333984375|cri_loss: -0.01007080078125|unsuper_loss: 0.0
+average reward score: 5.02734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.77s (74.70%) |Training time=0.50s (21.09%) |Others=0.10 (4.21%)|CurSamplesPerSec=13.49 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4088|ppo_ep: 1|act_loss: 0.0472412109375|cri_loss: 0.025299072265625|unsuper_loss: 0.0
+average reward score: 4.94921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.56%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+[2023-04-14 11:18:21,553] [INFO] [logging.py:96:log_dist] [Rank 0] step=4090, skipped=56, lr=[5.096069820074525e-06, 5.096069820074525e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:18:21,571] [INFO] [timer.py:199:stop] epoch=0/micro_step=4090/global_step=4090, RunningAvgSamplesPerSec=105.28498538237922, CurrSamplesPerSec=98.45610699342147, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:18:21,664] [INFO] [logging.py:96:log_dist] [Rank 0] step=4090, skipped=64, lr=[2.6481378265995993e-06, 2.6481378265995993e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4089|ppo_ep: 1|act_loss: -0.0207672119140625|cri_loss: -0.0098876953125|unsuper_loss: 0.0
+average reward score: 5.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.24%) |Training time=0.49s (22.18%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4090|ppo_ep: 1|act_loss: -0.00774383544921875|cri_loss: -0.0029888153076171875|unsuper_loss: 0.0
+average reward score: 5.19921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.26%) |Training time=0.50s (22.23%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.34 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4091|ppo_ep: 1|act_loss: -0.01397705078125|cri_loss: -0.006793975830078125|unsuper_loss: 0.0
+average reward score: 5.12890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.63%) |Training time=0.48s (20.95%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.10 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4092|ppo_ep: 1|act_loss: 0.02203369140625|cri_loss: 0.0111541748046875|unsuper_loss: 0.0
+average reward score: 4.75390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.15%) |Training time=0.45s (21.18%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4093|ppo_ep: 1|act_loss: 0.019866943359375|cri_loss: 0.01026153564453125|unsuper_loss: 0.0
+average reward score: 5.515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.86%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4094|ppo_ep: 1|act_loss: 0.034454345703125|cri_loss: 0.018218994140625|unsuper_loss: 0.0
+average reward score: 5.3828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.39%) |Training time=0.45s (20.93%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4095|ppo_ep: 1|act_loss: 0.031097412109375|cri_loss: 0.0160675048828125|unsuper_loss: 0.0
+average reward score: 4.54296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.29%) |Training time=0.45s (21.02%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4096|ppo_ep: 1|act_loss: 0.00621795654296875|cri_loss: 0.0036296844482421875|unsuper_loss: 0.0
+average reward score: 5.5
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.18%) |Training time=0.46s (21.17%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4097|ppo_ep: 1|act_loss: -0.00027561187744140625|cri_loss: 0.0006251335144042969|unsuper_loss: 0.0
+average reward score: 5.3828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.46s (21.13%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4098|ppo_ep: 1|act_loss: 0.01508331298828125|cri_loss: 0.00829315185546875|unsuper_loss: 0.0
+average reward score: 5.44921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.48%) |Training time=0.45s (20.86%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
+[2023-04-14 11:18:43,269] [INFO] [logging.py:96:log_dist] [Rank 0] step=4100, skipped=56, lr=[5.077520988872205e-06, 5.077520988872205e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:18:43,287] [INFO] [timer.py:199:stop] epoch=0/micro_step=4100/global_step=4100, RunningAvgSamplesPerSec=105.28956076846497, CurrSamplesPerSec=102.20793903058447, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:18:43,379] [INFO] [logging.py:96:log_dist] [Rank 0] step=4100, skipped=64, lr=[2.638528687289925e-06, 2.638528687289925e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4099|ppo_ep: 1|act_loss: -0.001491546630859375|cri_loss: -0.0004725456237792969|unsuper_loss: 0.0
+average reward score: 5.9921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.50%) |Training time=0.48s (21.91%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4100|ppo_ep: 1|act_loss: 0.002899169921875|cri_loss: 0.002117156982421875|unsuper_loss: 0.0
+average reward score: 5.15625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.23%) |Training time=0.46s (21.15%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4101|ppo_ep: 1|act_loss: 0.00829315185546875|cri_loss: 0.004711151123046875|unsuper_loss: 0.0
+average reward score: 5.30859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.13%) |Training time=0.44s (20.12%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4102|ppo_ep: 1|act_loss: -0.0052642822265625|cri_loss: -0.00102996826171875|unsuper_loss: 0.0
+average reward score: 4.90234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.61s |Gather latency=0.00s (0.00%) |Generate time=1.78s (68.45%) |Training time=0.46s (17.74%) |Others=0.36 (13.81%)|CurSamplesPerSec=12.27 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4103|ppo_ep: 1|act_loss: 0.014617919921875|cri_loss: 0.00801849365234375|unsuper_loss: 0.0
+average reward score: 5.3046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.52%) |Training time=0.43s (19.75%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4104|ppo_ep: 1|act_loss: 0.0107421875|cri_loss: 0.005863189697265625|unsuper_loss: 0.0
+average reward score: 4.99609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.66%) |Training time=0.42s (19.66%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4105|ppo_ep: 1|act_loss: 0.0217437744140625|cri_loss: 0.01155853271484375|unsuper_loss: 0.0
+average reward score: 5.71875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.80%) |Training time=0.44s (20.54%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4106|ppo_ep: 1|act_loss: 0.020599365234375|cri_loss: 0.01052093505859375|unsuper_loss: 0.0
+average reward score: 5.765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.88%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4107|ppo_ep: 1|act_loss: 0.05413818359375|cri_loss: 0.0306396484375|unsuper_loss: 0.0
+average reward score: 4.96484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.07%) |Training time=0.46s (21.29%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4108|ppo_ep: 1|act_loss: 0.0389404296875|cri_loss: 0.0228271484375|unsuper_loss: 0.0
+average reward score: 4.6796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.82%) |Training time=0.44s (20.49%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+[2023-04-14 11:19:05,339] [INFO] [logging.py:96:log_dist] [Rank 0] step=4110, skipped=56, lr=[5.058968414699655e-06, 5.058968414699655e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:19:05,358] [INFO] [timer.py:199:stop] epoch=0/micro_step=4110/global_step=4110, RunningAvgSamplesPerSec=105.3072645068091, CurrSamplesPerSec=110.64967844108436, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:19:05,450] [INFO] [logging.py:96:log_dist] [Rank 0] step=4110, skipped=64, lr=[2.628917494650923e-06, 2.628917494650923e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4109|ppo_ep: 1|act_loss: -0.0223541259765625|cri_loss: -0.01024627685546875|unsuper_loss: 0.0
+average reward score: 5.34765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.41%) |Training time=0.45s (20.94%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4110|ppo_ep: 1|act_loss: -0.004241943359375|cri_loss: -0.0017843246459960938|unsuper_loss: 0.0
+average reward score: 5.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.60%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4111|ppo_ep: 1|act_loss: -0.0271453857421875|cri_loss: -0.0132598876953125|unsuper_loss: 0.0
+average reward score: 6.109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.46%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4112|ppo_ep: 1|act_loss: -0.0033969879150390625|cri_loss: -0.001392364501953125|unsuper_loss: 0.0
+average reward score: 4.546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.47s (21.89%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4113|ppo_ep: 1|act_loss: -0.0269622802734375|cri_loss: -0.01311492919921875|unsuper_loss: 0.0
+average reward score: 5.859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.65%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4114|ppo_ep: 1|act_loss: 0.013763427734375|cri_loss: 0.007221221923828125|unsuper_loss: 0.0
+average reward score: 4.4296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.76%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4115|ppo_ep: 1|act_loss: 0.00577545166015625|cri_loss: 0.0037689208984375|unsuper_loss: 0.0
+average reward score: 4.8515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.72%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4116|ppo_ep: 1|act_loss: 0.0099029541015625|cri_loss: 0.0052490234375|unsuper_loss: 0.0
+average reward score: 4.92578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.77%) |Training time=0.46s (21.30%) |Others=0.11 (4.92%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4117|ppo_ep: 1|act_loss: 0.0099945068359375|cri_loss: 0.005489349365234375|unsuper_loss: 0.0
+average reward score: 5.71484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.40%) |Training time=0.48s (20.70%) |Others=0.11 (4.90%)|CurSamplesPerSec=13.66 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4118|ppo_ep: 1|act_loss: 0.04541015625|cri_loss: 0.023223876953125|unsuper_loss: 0.0
+average reward score: 5.296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.95%) |Training time=0.46s (21.08%) |Others=0.11 (4.98%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+[2023-04-14 11:19:27,136] [INFO] [logging.py:96:log_dist] [Rank 0] step=4120, skipped=56, lr=[5.040412372550783e-06, 5.040412372550783e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:19:27,154] [INFO] [timer.py:199:stop] epoch=0/micro_step=4120/global_step=4120, RunningAvgSamplesPerSec=105.30798613236172, CurrSamplesPerSec=111.58476661744949, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:19:27,247] [INFO] [logging.py:96:log_dist] [Rank 0] step=4120, skipped=64, lr=[2.6193043911436534e-06, 2.6193043911436534e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4119|ppo_ep: 1|act_loss: -0.01499176025390625|cri_loss: -0.007236480712890625|unsuper_loss: 0.0
+average reward score: 4.42578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.81%) |Training time=0.45s (20.59%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4120|ppo_ep: 1|act_loss: -0.0572509765625|cri_loss: -0.027008056640625|unsuper_loss: 0.0
+average reward score: 4.7578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.84%) |Training time=0.47s (20.74%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.06 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4121|ppo_ep: 1|act_loss: -0.0028820037841796875|cri_loss: -0.0010728836059570312|unsuper_loss: 0.0
+average reward score: 5.9375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.32%) |Training time=0.45s (20.98%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4122|ppo_ep: 1|act_loss: 0.009918212890625|cri_loss: 0.005207061767578125|unsuper_loss: 0.0
+average reward score: 5.60546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.30%) |Training time=0.45s (21.02%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4123|ppo_ep: 1|act_loss: 0.0007719993591308594|cri_loss: 0.0010890960693359375|unsuper_loss: 0.0
+average reward score: 5.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.59s (67.95%) |Training time=0.45s (19.46%) |Others=0.29 (12.59%)|CurSamplesPerSec=13.70 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4124|ppo_ep: 1|act_loss: -0.02691650390625|cri_loss: -0.013092041015625|unsuper_loss: 0.0
+average reward score: 5.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.29%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4125|ppo_ep: 1|act_loss: -0.0169830322265625|cri_loss: -0.00821685791015625|unsuper_loss: 0.0
+average reward score: 4.98828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.77%) |Training time=0.46s (21.57%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4126|ppo_ep: 1|act_loss: 0.0174407958984375|cri_loss: 0.0089874267578125|unsuper_loss: 0.0
+average reward score: 6.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.02%) |Training time=0.46s (21.29%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4127|ppo_ep: 1|act_loss: 0.0179443359375|cri_loss: 0.00942230224609375|unsuper_loss: 0.0
+average reward score: 5.984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.67%) |Training time=0.47s (21.70%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4128|ppo_ep: 1|act_loss: 0.0059051513671875|cri_loss: 0.0035343170166015625|unsuper_loss: 0.0
+average reward score: 4.87109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.78%) |Training time=0.46s (21.55%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.46
+[2023-04-14 11:19:48,896] [INFO] [logging.py:96:log_dist] [Rank 0] step=4130, skipped=56, lr=[5.021853137470893e-06, 5.021853137470893e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:19:48,914] [INFO] [timer.py:199:stop] epoch=0/micro_step=4130/global_step=4130, RunningAvgSamplesPerSec=105.31352380297785, CurrSamplesPerSec=104.6573610334602, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:19:49,007] [INFO] [logging.py:96:log_dist] [Rank 0] step=4130, skipped=64, lr=[2.6096895192575005e-06, 2.6096895192575005e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4129|ppo_ep: 1|act_loss: 0.0205078125|cri_loss: 0.01119232177734375|unsuper_loss: 0.0
+average reward score: 5.18359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.53%) |Training time=0.47s (21.82%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
+[2023-04-14 11:19:51,138] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 4130|ppo_ep: 1|act_loss: -0.0105438232421875|cri_loss: -0.00447845458984375|unsuper_loss: 0.0
+average reward score: 5.0234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.18%) |Training time=0.46s (21.63%) |Others=0.09 (4.19%)|CurSamplesPerSec=15.03 |AvgSamplesPerSec=14.46
+[2023-04-14 11:19:53,458] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 4131|ppo_ep: 1|act_loss: 0.0074615478515625|cri_loss: 0.0038928985595703125|unsuper_loss: 0.0
+average reward score: 5.30859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.67s (71.92%) |Training time=0.56s (24.20%) |Others=0.09 (3.87%)|CurSamplesPerSec=13.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4132|ppo_ep: 1|act_loss: -0.0083770751953125|cri_loss: -0.0036468505859375|unsuper_loss: 0.0
+average reward score: 4.953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.01%) |Training time=0.46s (21.31%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4133|ppo_ep: 1|act_loss: -0.0003032684326171875|cri_loss: 0.001373291015625|unsuper_loss: 0.0
+average reward score: 4.80859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.54%) |Training time=0.44s (20.76%) |Others=0.10 (4.70%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4134|ppo_ep: 1|act_loss: -0.030120849609375|cri_loss: -0.013824462890625|unsuper_loss: 0.0
+average reward score: 5.36328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.46s (21.50%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4135|ppo_ep: 1|act_loss: 0.003726959228515625|cri_loss: 0.002178192138671875|unsuper_loss: 0.0
+average reward score: 5.06640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.17%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4136|ppo_ep: 1|act_loss: -0.03802490234375|cri_loss: -0.0180511474609375|unsuper_loss: 0.0
+average reward score: 4.85546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.01%) |Training time=0.46s (21.32%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4137|ppo_ep: 1|act_loss: 0.008544921875|cri_loss: 0.00445556640625|unsuper_loss: 0.0
+average reward score: 5.66796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.35%) |Training time=0.45s (20.98%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4138|ppo_ep: 1|act_loss: 0.0252685546875|cri_loss: 0.01383209228515625|unsuper_loss: 0.0
+average reward score: 5.7421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.82%) |Training time=0.46s (21.51%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
+[2023-04-14 11:20:10,518] [INFO] [logging.py:96:log_dist] [Rank 0] step=4140, skipped=56, lr=[5.003290984552626e-06, 5.003290984552626e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:20:10,536] [INFO] [timer.py:199:stop] epoch=0/micro_step=4140/global_step=4140, RunningAvgSamplesPerSec=105.31356977645355, CurrSamplesPerSec=108.1466958779957, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:20:10,629] [INFO] [logging.py:96:log_dist] [Rank 0] step=4140, skipped=66, lr=[2.6019964442854366e-06, 2.6019964442854366e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4139|ppo_ep: 1|act_loss: 0.03631591796875|cri_loss: 0.0190582275390625|unsuper_loss: 0.0
+average reward score: 6.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.93%) |Training time=0.46s (21.40%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4140|ppo_ep: 1|act_loss: 0.0304107666015625|cri_loss: 0.0156097412109375|unsuper_loss: 0.0
+average reward score: 5.5703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.68%) |Training time=0.47s (21.65%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4141|ppo_ep: 1|act_loss: 0.02288818359375|cri_loss: 0.01279449462890625|unsuper_loss: 0.0
+average reward score: 4.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.59%) |Training time=0.47s (21.76%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4142|ppo_ep: 1|act_loss: -0.0200042724609375|cri_loss: -0.00955963134765625|unsuper_loss: 0.0
+average reward score: 5.38671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.58%) |Training time=0.47s (21.75%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4143|ppo_ep: 1|act_loss: -0.025634765625|cri_loss: -0.0119476318359375|unsuper_loss: 0.0
+average reward score: 5.26171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.52%) |Training time=0.47s (21.82%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4144|ppo_ep: 1|act_loss: -0.01433563232421875|cri_loss: -0.006839752197265625|unsuper_loss: 0.0
+average reward score: 5.3203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.59%) |Training time=0.47s (21.81%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4145|ppo_ep: 1|act_loss: -0.00942230224609375|cri_loss: -0.0035762786865234375|unsuper_loss: 0.0
+average reward score: 5.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.87%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4146|ppo_ep: 1|act_loss: 0.019073486328125|cri_loss: 0.01001739501953125|unsuper_loss: 0.0
+average reward score: 4.6171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.61s (69.23%) |Training time=0.61s (26.14%) |Others=0.11 (4.63%)|CurSamplesPerSec=13.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4147|ppo_ep: 1|act_loss: 0.0037994384765625|cri_loss: 0.002201080322265625|unsuper_loss: 0.0
+average reward score: 5.17578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.52%) |Training time=0.47s (21.82%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4148|ppo_ep: 1|act_loss: -0.037200927734375|cri_loss: -0.0171966552734375|unsuper_loss: 0.0
+average reward score: 5.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.48s (21.98%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+[2023-04-14 11:20:32,261] [INFO] [logging.py:96:log_dist] [Rank 0] step=4150, skipped=56, lr=[4.984726188931862e-06, 4.984726188931862e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:20:32,296] [INFO] [timer.py:199:stop] epoch=0/micro_step=4150/global_step=4150, RunningAvgSamplesPerSec=105.30342576879355, CurrSamplesPerSec=100.15022639020299, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:20:32,388] [INFO] [logging.py:96:log_dist] [Rank 0] step=4150, skipped=66, lr=[2.592378748472863e-06, 2.592378748472863e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4149|ppo_ep: 1|act_loss: -0.03155517578125|cri_loss: -0.0149993896484375|unsuper_loss: 0.0
+average reward score: 5.51171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.65%) |Training time=0.48s (21.83%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4150|ppo_ep: 1|act_loss: 0.03875732421875|cri_loss: 0.0203857421875|unsuper_loss: 0.0
+average reward score: 5.96484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.10%) |Training time=0.46s (20.47%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.09 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4151|ppo_ep: 1|act_loss: 0.056396484375|cri_loss: 0.029937744140625|unsuper_loss: 0.0
+average reward score: 6.0234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.16%) |Training time=0.45s (21.19%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4152|ppo_ep: 1|act_loss: 0.032470703125|cri_loss: 0.0168304443359375|unsuper_loss: 0.0
+average reward score: 6.14453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.99%) |Training time=0.46s (21.34%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4153|ppo_ep: 1|act_loss: 0.056732177734375|cri_loss: 0.0311279296875|unsuper_loss: 0.0
+average reward score: 4.90234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.96%) |Training time=0.46s (21.40%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4154|ppo_ep: 1|act_loss: -0.00653839111328125|cri_loss: -0.001399993896484375|unsuper_loss: 0.0
+average reward score: 5.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.98%) |Training time=0.46s (21.39%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4155|ppo_ep: 1|act_loss: 0.0716552734375|cri_loss: 0.03912353515625|unsuper_loss: 0.0
+average reward score: 5.58984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.38s |Gather latency=0.00s (0.00%) |Generate time=1.61s (67.83%) |Training time=0.45s (19.08%) |Others=0.31 (13.09%)|CurSamplesPerSec=13.47 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4156|ppo_ep: 1|act_loss: -0.0033702850341796875|cri_loss: -0.000652313232421875|unsuper_loss: 0.0
+average reward score: 6.1640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.96%) |Training time=0.46s (21.38%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4157|ppo_ep: 1|act_loss: -0.047393798828125|cri_loss: -0.022369384765625|unsuper_loss: 0.0
+average reward score: 4.6484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.34%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4158|ppo_ep: 1|act_loss: -0.033660888671875|cri_loss: -0.0152130126953125|unsuper_loss: 0.0
+average reward score: 5.66796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.45s (21.10%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
+[2023-04-14 11:20:54,125] [INFO] [logging.py:96:log_dist] [Rank 0] step=4160, skipped=56, lr=[4.966159025783661e-06, 4.966159025783661e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:20:54,143] [INFO] [timer.py:199:stop] epoch=0/micro_step=4160/global_step=4160, RunningAvgSamplesPerSec=105.31034590166539, CurrSamplesPerSec=109.2526283915123, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:20:54,235] [INFO] [logging.py:96:log_dist] [Rank 0] step=4160, skipped=66, lr=[2.5827596833843844e-06, 2.5827596833843844e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4159|ppo_ep: 1|act_loss: -0.035186767578125|cri_loss: -0.016571044921875|unsuper_loss: 0.0
+average reward score: 5.31640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.46s (21.16%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4160|ppo_ep: 1|act_loss: 0.023956298828125|cri_loss: 0.0150146484375|unsuper_loss: 0.0
+average reward score: 4.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.21%) |Training time=0.45s (21.13%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4161|ppo_ep: 1|act_loss: 0.032257080078125|cri_loss: 0.0177764892578125|unsuper_loss: 0.0
+average reward score: 5.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.55%) |Training time=0.47s (20.15%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4162|ppo_ep: 1|act_loss: 0.0159454345703125|cri_loss: 0.00836181640625|unsuper_loss: 0.0
+average reward score: 5.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.52%) |Training time=0.45s (20.79%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4163|ppo_ep: 1|act_loss: 0.042205810546875|cri_loss: 0.0225067138671875|unsuper_loss: 0.0
+average reward score: 6.37109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.00%) |Training time=0.46s (21.35%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4164|ppo_ep: 1|act_loss: 0.018768310546875|cri_loss: 0.0106201171875|unsuper_loss: 0.0
+average reward score: 5.2109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.17%) |Training time=0.45s (21.15%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4165|ppo_ep: 1|act_loss: 0.022857666015625|cri_loss: 0.01219940185546875|unsuper_loss: 0.0
+average reward score: 4.62109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.74%) |Training time=0.47s (21.61%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4166|ppo_ep: 1|act_loss: -0.009429931640625|cri_loss: -0.003875732421875|unsuper_loss: 0.0
+average reward score: 5.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.43%) |Training time=0.45s (20.90%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4167|ppo_ep: 1|act_loss: 0.03271484375|cri_loss: 0.018402099609375|unsuper_loss: 0.0
+average reward score: 5.53515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.67s |Gather latency=0.00s (0.00%) |Generate time=1.59s (59.66%) |Training time=0.44s (16.65%) |Others=0.63 (23.69%)|CurSamplesPerSec=11.98 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4168|ppo_ep: 1|act_loss: -0.02325439453125|cri_loss: -0.01129150390625|unsuper_loss: 0.0
+average reward score: 4.9296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.65%) |Training time=0.44s (20.65%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+[2023-04-14 11:21:16,351] [INFO] [logging.py:96:log_dist] [Rank 0] step=4170, skipped=56, lr=[4.94758977031817e-06, 4.94758977031817e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:21:16,369] [INFO] [timer.py:199:stop] epoch=0/micro_step=4170/global_step=4170, RunningAvgSamplesPerSec=105.3199038682373, CurrSamplesPerSec=108.86511188022202, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:21:16,461] [INFO] [logging.py:96:log_dist] [Rank 0] step=4170, skipped=66, lr=[2.5731393915977522e-06, 2.5731393915977522e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4169|ppo_ep: 1|act_loss: -0.0391845703125|cri_loss: -0.0189056396484375|unsuper_loss: 0.0
+average reward score: 5.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.16%) |Training time=0.46s (21.22%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4170|ppo_ep: 1|act_loss: -0.0283660888671875|cri_loss: -0.0117950439453125|unsuper_loss: 0.0
+average reward score: 6.078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.33%) |Training time=0.45s (20.99%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4171|ppo_ep: 1|act_loss: 0.000972747802734375|cri_loss: 0.0014219284057617188|unsuper_loss: 0.0
+average reward score: 4.6171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.62%) |Training time=0.44s (20.69%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4172|ppo_ep: 1|act_loss: -0.00641632080078125|cri_loss: -0.00286102294921875|unsuper_loss: 0.0
+average reward score: 4.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.36%) |Training time=0.45s (20.99%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4173|ppo_ep: 1|act_loss: 0.0224609375|cri_loss: 0.01314544677734375|unsuper_loss: 0.0
+average reward score: 5.78515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.46s (21.40%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4174|ppo_ep: 1|act_loss: -0.035736083984375|cri_loss: -0.0157012939453125|unsuper_loss: 0.0
+average reward score: 5.8359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.41%) |Training time=0.45s (20.92%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4175|ppo_ep: 1|act_loss: 0.0092315673828125|cri_loss: 0.005367279052734375|unsuper_loss: 0.0
+average reward score: 5.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.33%) |Training time=0.45s (21.03%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4176|ppo_ep: 1|act_loss: 0.028106689453125|cri_loss: 0.0146331787109375|unsuper_loss: 0.0
+average reward score: 4.40234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.41%) |Training time=0.48s (20.31%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.66 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4177|ppo_ep: 1|act_loss: 0.022796630859375|cri_loss: 0.0117950439453125|unsuper_loss: 0.0
+average reward score: 6.27734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.42%) |Training time=0.45s (20.89%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4178|ppo_ep: 1|act_loss: 0.0141754150390625|cri_loss: 0.007419586181640625|unsuper_loss: 0.0
+average reward score: 6.25390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.00%) |Training time=0.47s (21.39%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
+[2023-04-14 11:21:38,140] [INFO] [logging.py:96:log_dist] [Rank 0] step=4180, skipped=56, lr=[4.92901869777655e-06, 4.92901869777655e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:21:39,028] [INFO] [timer.py:199:stop] epoch=0/micro_step=4180/global_step=4180, RunningAvgSamplesPerSec=105.2561419271204, CurrSamplesPerSec=27.181817887243163, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:21:39,122] [INFO] [logging.py:96:log_dist] [Rank 0] step=4180, skipped=66, lr=[2.563518015708896e-06, 2.563518015708896e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4179|ppo_ep: 1|act_loss: 0.0024871826171875|cri_loss: 0.0018558502197265625|unsuper_loss: 0.0
+average reward score: 5.4375
+-------------------------------------------------------------------------------------
+|E2E latency=3.13s |Gather latency=0.00s (0.00%) |Generate time=1.69s (54.01%) |Training time=1.34s (42.79%) |Others=0.10 (3.20%)|CurSamplesPerSec=10.21 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4180|ppo_ep: 1|act_loss: 0.01177215576171875|cri_loss: 0.00606536865234375|unsuper_loss: 0.0
+average reward score: 5.9375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.97%) |Training time=0.44s (20.36%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4181|ppo_ep: 1|act_loss: -0.051971435546875|cri_loss: -0.025054931640625|unsuper_loss: 0.0
+average reward score: 4.88671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.38%) |Training time=0.43s (19.87%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4182|ppo_ep: 1|act_loss: -0.03253173828125|cri_loss: -0.015716552734375|unsuper_loss: 0.0
+average reward score: 5.7109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.21%) |Training time=0.44s (20.16%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4183|ppo_ep: 1|act_loss: 0.00038909912109375|cri_loss: 0.0009660720825195312|unsuper_loss: 0.0
+average reward score: 6.40625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.74%) |Training time=0.44s (20.58%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4184|ppo_ep: 1|act_loss: -0.037841796875|cri_loss: -0.0181884765625|unsuper_loss: 0.0
+average reward score: 4.83984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.50%) |Training time=0.45s (20.82%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4185|ppo_ep: 1|act_loss: -0.006397247314453125|cri_loss: -0.002197265625|unsuper_loss: 0.0
+average reward score: 5.51171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.35%) |Training time=0.45s (20.99%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4186|ppo_ep: 1|act_loss: -0.02880859375|cri_loss: -0.01381683349609375|unsuper_loss: 0.0
+average reward score: 4.609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.18%) |Training time=0.45s (21.15%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4187|ppo_ep: 1|act_loss: -0.026763916015625|cri_loss: -0.0127716064453125|unsuper_loss: 0.0
+average reward score: 5.296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.53%) |Training time=0.45s (20.78%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4188|ppo_ep: 1|act_loss: -0.01885986328125|cri_loss: -0.00865936279296875|unsuper_loss: 0.0
+average reward score: 4.49609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.09%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
+[2023-04-14 11:22:00,467] [INFO] [logging.py:96:log_dist] [Rank 0] step=4190, skipped=56, lr=[4.910446083426898e-06, 4.910446083426898e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:22:00,728] [INFO] [timer.py:199:stop] epoch=0/micro_step=4190/global_step=4190, RunningAvgSamplesPerSec=105.25487124469956, CurrSamplesPerSec=59.8330459467797, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:22:00,820] [INFO] [logging.py:96:log_dist] [Rank 0] step=4190, skipped=66, lr=[2.5538956983298186e-06, 2.5538956983298186e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4189|ppo_ep: 1|act_loss: -0.0031604766845703125|cri_loss: -0.000560760498046875|unsuper_loss: 0.0
+average reward score: 5.26171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.39s |Gather latency=0.00s (0.00%) |Generate time=1.59s (66.67%) |Training time=0.70s (29.16%) |Others=0.10 (4.17%)|CurSamplesPerSec=13.38 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4190|ppo_ep: 1|act_loss: 0.0625|cri_loss: 0.032318115234375|unsuper_loss: 0.0
+average reward score: 5.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.55%) |Training time=0.45s (20.77%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4191|ppo_ep: 1|act_loss: 0.029144287109375|cri_loss: 0.014923095703125|unsuper_loss: 0.0
+average reward score: 4.51171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.74%) |Training time=0.47s (19.96%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4192|ppo_ep: 1|act_loss: 0.008636474609375|cri_loss: 0.0048980712890625|unsuper_loss: 0.0
+average reward score: 5.68359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.19%) |Training time=0.45s (21.16%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4193|ppo_ep: 1|act_loss: -0.030487060546875|cri_loss: -0.01329803466796875|unsuper_loss: 0.0
+average reward score: 6.06640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.44%) |Training time=0.45s (20.90%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4194|ppo_ep: 1|act_loss: 0.003936767578125|cri_loss: 0.0020847320556640625|unsuper_loss: 0.0
+average reward score: 5.24609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.68%) |Training time=0.44s (20.63%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4195|ppo_ep: 1|act_loss: -0.036102294921875|cri_loss: -0.01715087890625|unsuper_loss: 0.0
+average reward score: 6.40625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.57%) |Training time=0.45s (20.77%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4196|ppo_ep: 1|act_loss: 6.031990051269531e-05|cri_loss: 0.00016546249389648438|unsuper_loss: 0.0
+average reward score: 5.21484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.30%) |Training time=0.45s (21.03%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4197|ppo_ep: 1|act_loss: 0.0220947265625|cri_loss: 0.01141357421875|unsuper_loss: 0.0
+average reward score: 5.03125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.87%) |Training time=0.44s (20.42%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4198|ppo_ep: 1|act_loss: 0.03314208984375|cri_loss: 0.017913818359375|unsuper_loss: 0.0
+average reward score: 5.17578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.64%) |Training time=0.44s (20.68%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46
+[2023-04-14 11:22:22,354] [INFO] [logging.py:96:log_dist] [Rank 0] step=4200, skipped=56, lr=[4.8918722025601635e-06, 4.8918722025601635e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:22:22,373] [INFO] [timer.py:199:stop] epoch=0/micro_step=4200/global_step=4200, RunningAvgSamplesPerSec=105.26922859440923, CurrSamplesPerSec=108.52489355130318, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:22:22,465] [INFO] [logging.py:96:log_dist] [Rank 0] step=4200, skipped=66, lr=[2.5442725820864755e-06, 2.5442725820864755e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4199|ppo_ep: 1|act_loss: 0.003414154052734375|cri_loss: 0.00225067138671875|unsuper_loss: 0.0
+average reward score: 5.47265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.07%) |Training time=0.46s (21.29%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4200|ppo_ep: 1|act_loss: -0.00171661376953125|cri_loss: -0.0006322860717773438|unsuper_loss: 0.0
+average reward score: 5.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.28%) |Training time=0.45s (21.04%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4201|ppo_ep: 1|act_loss: -0.006732940673828125|cri_loss: -0.0029163360595703125|unsuper_loss: 0.0
+average reward score: 5.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.19%) |Training time=0.45s (21.16%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4202|ppo_ep: 1|act_loss: 0.0506591796875|cri_loss: 0.0276336669921875|unsuper_loss: 0.0
+average reward score: 6.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.07%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4203|ppo_ep: 1|act_loss: 0.038818359375|cri_loss: 0.0203399658203125|unsuper_loss: 0.0
+average reward score: 5.9375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.00%) |Training time=0.45s (20.84%) |Others=0.11 (5.16%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4204|ppo_ep: 1|act_loss: -0.0139923095703125|cri_loss: -0.00634002685546875|unsuper_loss: 0.0
+average reward score: 4.6484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.08%) |Training time=0.46s (21.26%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4205|ppo_ep: 1|act_loss: 0.00275421142578125|cri_loss: 0.0015354156494140625|unsuper_loss: 0.0
+average reward score: 5.109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.80%) |Training time=0.46s (21.54%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4206|ppo_ep: 1|act_loss: -0.010833740234375|cri_loss: -0.0052947998046875|unsuper_loss: 0.0
+average reward score: 5.41796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.21%) |Training time=0.48s (20.51%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4207|ppo_ep: 1|act_loss: 0.00415802001953125|cri_loss: 0.00371551513671875|unsuper_loss: 0.0
+average reward score: 4.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.31%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4208|ppo_ep: 1|act_loss: 0.00266265869140625|cri_loss: 0.0019474029541015625|unsuper_loss: 0.0
+average reward score: 4.9140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.45%) |Training time=0.48s (21.17%) |Others=0.10 (4.38%)|CurSamplesPerSec=14.02 |AvgSamplesPerSec=14.46
+[2023-04-14 11:22:44,152] [INFO] [logging.py:96:log_dist] [Rank 0] step=4210, skipped=56, lr=[4.8732973304860655e-06, 4.8732973304860655e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:22:44,170] [INFO] [timer.py:199:stop] epoch=0/micro_step=4210/global_step=4210, RunningAvgSamplesPerSec=105.27366239794104, CurrSamplesPerSec=105.68341682427847, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:22:44,263] [INFO] [logging.py:96:log_dist] [Rank 0] step=4210, skipped=66, lr=[2.5346488096166647e-06, 2.5346488096166647e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4209|ppo_ep: 1|act_loss: 0.004459381103515625|cri_loss: 0.0029392242431640625|unsuper_loss: 0.0
+average reward score: 5.3046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.70%) |Training time=0.47s (21.66%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4210|ppo_ep: 1|act_loss: -0.007663726806640625|cri_loss: -0.0016937255859375|unsuper_loss: 0.0
+average reward score: 5.87890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.40%) |Training time=0.43s (19.88%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4211|ppo_ep: 1|act_loss: 0.0038623809814453125|cri_loss: 0.002246856689453125|unsuper_loss: 0.0
+average reward score: 4.28515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.37%) |Training time=0.48s (22.00%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4212|ppo_ep: 1|act_loss: -0.0183563232421875|cri_loss: -0.00844573974609375|unsuper_loss: 0.0
+average reward score: 4.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.41%) |Training time=0.47s (21.94%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4213|ppo_ep: 1|act_loss: -0.01000213623046875|cri_loss: -0.004795074462890625|unsuper_loss: 0.0
+average reward score: 4.96875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.37%) |Training time=0.47s (21.99%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4214|ppo_ep: 1|act_loss: 0.051055908203125|cri_loss: 0.0279693603515625|unsuper_loss: 0.0
+average reward score: 6.16796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.85%) |Training time=0.46s (21.52%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4215|ppo_ep: 1|act_loss: -0.01861572265625|cri_loss: -0.00881195068359375|unsuper_loss: 0.0
+average reward score: 5.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.08%) |Training time=0.46s (21.27%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4216|ppo_ep: 1|act_loss: 0.003650665283203125|cri_loss: 0.0027332305908203125|unsuper_loss: 0.0
+average reward score: 5.94140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.32%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4217|ppo_ep: 1|act_loss: -0.02685546875|cri_loss: -0.0128631591796875|unsuper_loss: 0.0
+average reward score: 4.828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.18%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4218|ppo_ep: 1|act_loss: -0.01708984375|cri_loss: -0.007843017578125|unsuper_loss: 0.0
+average reward score: 5.98828125
+-------------------------------------------------------------------------------------
+|E2E latency=3.50s |Gather latency=0.00s (0.00%) |Generate time=1.60s (45.66%) |Training time=0.45s (12.92%) |Others=1.45 (41.42%)|CurSamplesPerSec=9.14 |AvgSamplesPerSec=14.46
+[2023-04-14 11:23:07,071] [INFO] [logging.py:96:log_dist] [Rank 0] step=4220, skipped=56, lr=[4.85472174252902e-06, 4.85472174252902e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:23:07,089] [INFO] [timer.py:199:stop] epoch=0/micro_step=4220/global_step=4220, RunningAvgSamplesPerSec=105.27928430346253, CurrSamplesPerSec=109.89310827053387, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:23:07,182] [INFO] [logging.py:96:log_dist] [Rank 0] step=4220, skipped=66, lr=[2.5250245235679094e-06, 2.5250245235679094e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4219|ppo_ep: 1|act_loss: 0.005771636962890625|cri_loss: 0.0032749176025390625|unsuper_loss: 0.0
+average reward score: 5.828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.26%) |Training time=0.45s (21.08%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4220|ppo_ep: 1|act_loss: -0.00738525390625|cri_loss: -0.003170013427734375|unsuper_loss: 0.0
+average reward score: 5.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.93%) |Training time=0.46s (21.40%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4221|ppo_ep: 1|act_loss: -0.004085540771484375|cri_loss: -0.0013275146484375|unsuper_loss: 0.0
+average reward score: 5.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.23%) |Training time=0.48s (20.52%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.64 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4222|ppo_ep: 1|act_loss: -0.0155792236328125|cri_loss: -0.00717926025390625|unsuper_loss: 0.0
+average reward score: 4.3359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.80%) |Training time=0.46s (21.56%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4223|ppo_ep: 1|act_loss: -0.0067138671875|cri_loss: -0.003093719482421875|unsuper_loss: 0.0
+average reward score: 5.1875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.45s (21.13%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4224|ppo_ep: 1|act_loss: -0.0264434814453125|cri_loss: -0.012542724609375|unsuper_loss: 0.0
+average reward score: 5.44140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.68%) |Training time=0.44s (20.64%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4225|ppo_ep: 1|act_loss: -0.0164642333984375|cri_loss: -0.005649566650390625|unsuper_loss: 0.0
+average reward score: 5.203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.92%) |Training time=0.44s (20.37%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4226|ppo_ep: 1|act_loss: -0.0190277099609375|cri_loss: -0.0092315673828125|unsuper_loss: 0.0
+average reward score: 6.03125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.59%) |Training time=0.44s (20.70%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4227|ppo_ep: 1|act_loss: 0.0005841255187988281|cri_loss: 0.0010204315185546875|unsuper_loss: 0.0
+average reward score: 5.6015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.32%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4228|ppo_ep: 1|act_loss: 0.0234222412109375|cri_loss: 0.01303863525390625|unsuper_loss: 0.0
+average reward score: 5.36328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.46s (21.20%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
+[2023-04-14 11:23:28,778] [INFO] [logging.py:96:log_dist] [Rank 0] step=4230, skipped=56, lr=[4.836145714024048e-06, 4.836145714024048e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:23:28,796] [INFO] [timer.py:199:stop] epoch=0/micro_step=4230/global_step=4230, RunningAvgSamplesPerSec=105.28801567814266, CurrSamplesPerSec=109.88582062485825, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:23:28,889] [INFO] [logging.py:96:log_dist] [Rank 0] step=4230, skipped=66, lr=[2.515399866595347e-06, 2.515399866595347e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4229|ppo_ep: 1|act_loss: 0.04559326171875|cri_loss: 0.0240631103515625|unsuper_loss: 0.0
+average reward score: 6.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.19%) |Training time=0.45s (21.15%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4230|ppo_ep: 1|act_loss: 0.021453857421875|cri_loss: 0.011810302734375|unsuper_loss: 0.0
+average reward score: 5.09765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.94%) |Training time=0.46s (21.42%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4231|ppo_ep: 1|act_loss: 0.0029163360595703125|cri_loss: 0.001949310302734375|unsuper_loss: 0.0
+average reward score: 4.72265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.57%) |Training time=0.44s (20.74%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
+[2023-04-14 11:23:35,324] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 4232|ppo_ep: 1|act_loss: 0.0218048095703125|cri_loss: 0.011627197265625|unsuper_loss: 0.0
+average reward score: 5.7265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.62%) |Training time=0.45s (21.19%) |Others=0.09 (4.18%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.46
+[2023-04-14 11:23:37,455] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 4233|ppo_ep: 1|act_loss: -0.028045654296875|cri_loss: -0.0132598876953125|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.71%) |Training time=0.45s (21.10%) |Others=0.09 (4.19%)|CurSamplesPerSec=15.02 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4234|ppo_ep: 1|act_loss: 0.005527496337890625|cri_loss: 0.002880096435546875|unsuper_loss: 0.0
+average reward score: 5.890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.29%) |Training time=0.45s (21.04%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4235|ppo_ep: 1|act_loss: 0.05145263671875|cri_loss: 0.0276641845703125|unsuper_loss: 0.0
+average reward score: 6.6015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.76s (76.17%) |Training time=0.45s (19.49%) |Others=0.10 (4.34%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4236|ppo_ep: 1|act_loss: -0.0341796875|cri_loss: -0.015838623046875|unsuper_loss: 0.0
+average reward score: 4.73828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.71%) |Training time=0.44s (20.62%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4237|ppo_ep: 1|act_loss: -0.017791748046875|cri_loss: -0.0085601806640625|unsuper_loss: 0.0
+average reward score: 4.92578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.74s (75.86%) |Training time=0.45s (19.78%) |Others=0.10 (4.36%)|CurSamplesPerSec=13.98 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4238|ppo_ep: 1|act_loss: -0.032806396484375|cri_loss: -0.01560211181640625|unsuper_loss: 0.0
+average reward score: 5.3359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.90%) |Training time=0.44s (20.60%) |Others=0.12 (5.50%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+[2023-04-14 11:23:50,554] [INFO] [logging.py:96:log_dist] [Rank 0] step=4240, skipped=56, lr=[4.817569520312709e-06, 4.817569520312709e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:23:50,572] [INFO] [timer.py:199:stop] epoch=0/micro_step=4240/global_step=4240, RunningAvgSamplesPerSec=105.30150071014239, CurrSamplesPerSec=111.66534771424102, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:23:50,665] [INFO] [logging.py:96:log_dist] [Rank 0] step=4240, skipped=68, lr=[2.5076999698199706e-06, 2.5076999698199706e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4239|ppo_ep: 1|act_loss: -0.001766204833984375|cri_loss: 0.0019130706787109375|unsuper_loss: 0.0
+average reward score: 6.078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.41%) |Training time=0.45s (20.94%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4240|ppo_ep: 1|act_loss: 0.0226898193359375|cri_loss: 0.0124053955078125|unsuper_loss: 0.0
+average reward score: 4.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.84%) |Training time=0.44s (20.49%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4241|ppo_ep: 1|act_loss: 0.002574920654296875|cri_loss: 0.0016956329345703125|unsuper_loss: 0.0
+average reward score: 6.20703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.00%) |Training time=0.49s (22.40%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4242|ppo_ep: 1|act_loss: 0.0186767578125|cri_loss: 0.01000213623046875|unsuper_loss: 0.0
+average reward score: 5.8984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.43%) |Training time=0.50s (22.87%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4243|ppo_ep: 1|act_loss: 0.030120849609375|cri_loss: 0.0162506103515625|unsuper_loss: 0.0
+average reward score: 5.34765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.65%) |Training time=0.47s (21.59%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4244|ppo_ep: 1|act_loss: 0.0592041015625|cri_loss: 0.031158447265625|unsuper_loss: 0.0
+average reward score: 5.484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.98%) |Training time=0.42s (19.12%) |Others=0.11 (4.90%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4245|ppo_ep: 1|act_loss: 0.0345458984375|cri_loss: 0.0182037353515625|unsuper_loss: 0.0
+average reward score: 4.79296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.99%) |Training time=0.46s (21.33%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4246|ppo_ep: 1|act_loss: 0.00125885009765625|cri_loss: 0.0011882781982421875|unsuper_loss: 0.0
+average reward score: 4.9765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.47%) |Training time=0.45s (20.88%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4247|ppo_ep: 1|act_loss: -0.0139312744140625|cri_loss: -0.00653076171875|unsuper_loss: 0.0
+average reward score: 6.15234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.46s (21.17%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4248|ppo_ep: 1|act_loss: -0.0172271728515625|cri_loss: -0.0080413818359375|unsuper_loss: 0.0
+average reward score: 7.1796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.45s (20.74%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+[2023-04-14 11:24:12,290] [INFO] [logging.py:96:log_dist] [Rank 0] step=4250, skipped=56, lr=[4.798993436739004e-06, 4.798993436739004e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:24:12,308] [INFO] [timer.py:199:stop] epoch=0/micro_step=4250/global_step=4250, RunningAvgSamplesPerSec=105.30792594664827, CurrSamplesPerSec=114.12237540451974, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:24:12,401] [INFO] [logging.py:96:log_dist] [Rank 0] step=4250, skipped=68, lr=[2.4980750046916983e-06, 2.4980750046916983e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4249|ppo_ep: 1|act_loss: -0.02313232421875|cri_loss: -0.01113128662109375|unsuper_loss: 0.0
+average reward score: 6.88671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.95%) |Training time=0.44s (20.41%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4250|ppo_ep: 1|act_loss: -0.0238037109375|cri_loss: -0.01103973388671875|unsuper_loss: 0.0
+average reward score: 5.0234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.39%) |Training time=0.45s (20.96%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4251|ppo_ep: 1|act_loss: -0.03948974609375|cri_loss: -0.0182342529296875|unsuper_loss: 0.0
+average reward score: 4.28125
+-------------------------------------------------------------------------------------
+|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.28%) |Training time=0.48s (20.47%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.51 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4252|ppo_ep: 1|act_loss: 0.0126495361328125|cri_loss: 0.006908416748046875|unsuper_loss: 0.0
+average reward score: 4.6328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.88%) |Training time=0.47s (21.50%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4253|ppo_ep: 1|act_loss: 0.024200439453125|cri_loss: 0.0125732421875|unsuper_loss: 0.0
+average reward score: 7.3359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.98%) |Training time=0.46s (21.40%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4254|ppo_ep: 1|act_loss: -0.021636962890625|cri_loss: -0.0095977783203125|unsuper_loss: 0.0
+average reward score: 5.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.67%) |Training time=0.47s (21.71%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4255|ppo_ep: 1|act_loss: -0.01812744140625|cri_loss: -0.008758544921875|unsuper_loss: 0.0
+average reward score: 6.01171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.62%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4256|ppo_ep: 1|act_loss: -0.004058837890625|cri_loss: -0.0011882781982421875|unsuper_loss: 0.0
+average reward score: 4.5703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.89%) |Training time=0.47s (21.46%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4257|ppo_ep: 1|act_loss: 0.021209716796875|cri_loss: 0.01186370849609375|unsuper_loss: 0.0
+average reward score: 5.91796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.86s |Gather latency=0.00s (0.00%) |Generate time=1.61s (56.22%) |Training time=0.47s (16.46%) |Others=0.78 (27.32%)|CurSamplesPerSec=11.18 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4258|ppo_ep: 1|act_loss: 0.06622314453125|cri_loss: 0.0384521484375|unsuper_loss: 0.0
+average reward score: 5.46484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.79%) |Training time=0.44s (20.55%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+[2023-04-14 11:24:34,891] [INFO] [logging.py:96:log_dist] [Rank 0] step=4260, skipped=56, lr=[4.7804177386453025e-06, 4.7804177386453025e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:24:34,909] [INFO] [timer.py:199:stop] epoch=0/micro_step=4260/global_step=4260, RunningAvgSamplesPerSec=105.30860582605034, CurrSamplesPerSec=107.67247823369864, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:24:35,002] [INFO] [logging.py:96:log_dist] [Rank 0] step=4260, skipped=68, lr=[2.488450068096499e-06, 2.488450068096499e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4259|ppo_ep: 1|act_loss: 0.04168701171875|cri_loss: 0.021820068359375|unsuper_loss: 0.0
+average reward score: 4.89453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.92%) |Training time=0.46s (21.16%) |Others=0.11 (4.93%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4260|ppo_ep: 1|act_loss: -0.031890869140625|cri_loss: -0.0142669677734375|unsuper_loss: 0.0
+average reward score: 5.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.96%) |Training time=0.46s (21.40%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4261|ppo_ep: 1|act_loss: 0.018280029296875|cri_loss: 0.00974273681640625|unsuper_loss: 0.0
+average reward score: 6.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.62%) |Training time=0.47s (21.76%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4262|ppo_ep: 1|act_loss: -0.0208587646484375|cri_loss: -0.00948333740234375|unsuper_loss: 0.0
+average reward score: 6.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.35%) |Training time=0.48s (22.02%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4263|ppo_ep: 1|act_loss: -0.01275634765625|cri_loss: -0.0055389404296875|unsuper_loss: 0.0
+average reward score: 5.6796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.70%) |Training time=0.47s (21.67%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4264|ppo_ep: 1|act_loss: -0.0343017578125|cri_loss: -0.01505279541015625|unsuper_loss: 0.0
+average reward score: 5.1875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.60%) |Training time=0.48s (21.81%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4265|ppo_ep: 1|act_loss: -0.0079498291015625|cri_loss: -0.0037689208984375|unsuper_loss: 0.0
+average reward score: 4.6640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.25%) |Training time=0.48s (22.13%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4266|ppo_ep: 1|act_loss: -0.0089263916015625|cri_loss: -0.0040740966796875|unsuper_loss: 0.0
+average reward score: 5.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.50s |Gather latency=0.00s (0.00%) |Generate time=1.79s (71.52%) |Training time=0.61s (24.32%) |Others=0.10 (4.15%)|CurSamplesPerSec=12.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4267|ppo_ep: 1|act_loss: -0.00508880615234375|cri_loss: -0.002323150634765625|unsuper_loss: 0.0
+average reward score: 5.50390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.44%) |Training time=0.48s (21.92%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4268|ppo_ep: 1|act_loss: 0.021148681640625|cri_loss: 0.01126861572265625|unsuper_loss: 0.0
+average reward score: 5.125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.31%) |Training time=0.48s (22.07%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+[2023-04-14 11:24:56,993] [INFO] [logging.py:96:log_dist] [Rank 0] step=4270, skipped=56, lr=[4.761842701368264e-06, 4.761842701368264e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:24:57,011] [INFO] [timer.py:199:stop] epoch=0/micro_step=4270/global_step=4270, RunningAvgSamplesPerSec=105.29193536234465, CurrSamplesPerSec=100.81037834200097, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:24:57,104] [INFO] [logging.py:96:log_dist] [Rank 0] step=4270, skipped=68, lr=[2.4788253026991545e-06, 2.4788253026991545e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4269|ppo_ep: 1|act_loss: 0.0455322265625|cri_loss: 0.023406982421875|unsuper_loss: 0.0
+average reward score: 6.078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.40%) |Training time=0.48s (22.01%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4270|ppo_ep: 1|act_loss: 0.0024509429931640625|cri_loss: 0.0016841888427734375|unsuper_loss: 0.0
+average reward score: 5.0859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.35%) |Training time=0.46s (21.00%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4271|ppo_ep: 1|act_loss: 0.010040283203125|cri_loss: 0.006195068359375|unsuper_loss: 0.0
+average reward score: 4.34765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.48s (21.90%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4272|ppo_ep: 1|act_loss: 0.022003173828125|cri_loss: 0.01129150390625|unsuper_loss: 0.0
+average reward score: 5.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (21.93%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4273|ppo_ep: 1|act_loss: 0.018524169921875|cri_loss: 0.0123443603515625|unsuper_loss: 0.0
+average reward score: 6.09375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.55%) |Training time=0.47s (21.78%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4274|ppo_ep: 1|act_loss: -0.00841522216796875|cri_loss: -0.0037631988525390625|unsuper_loss: 0.0
+average reward score: 5.1953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.23%) |Training time=0.48s (22.16%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4275|ppo_ep: 1|act_loss: -0.0285491943359375|cri_loss: -0.0128021240234375|unsuper_loss: 0.0
+average reward score: 6.06640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.48s (21.92%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4276|ppo_ep: 1|act_loss: -0.0015773773193359375|cri_loss: -0.0004177093505859375|unsuper_loss: 0.0
+average reward score: 5.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.69%) |Training time=0.47s (21.67%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4277|ppo_ep: 1|act_loss: -0.031494140625|cri_loss: -0.01528167724609375|unsuper_loss: 0.0
+average reward score: 5.55859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.71s |Gather latency=0.00s (0.00%) |Generate time=1.60s (58.98%) |Training time=0.48s (17.68%) |Others=0.63 (23.34%)|CurSamplesPerSec=11.82 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4278|ppo_ep: 1|act_loss: -0.00934600830078125|cri_loss: -0.004230499267578125|unsuper_loss: 0.0
+average reward score: 5.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.08s |Gather latency=0.00s (0.00%) |Generate time=1.59s (76.59%) |Training time=0.39s (18.58%) |Others=0.10 (4.82%)|CurSamplesPerSec=15.40 |AvgSamplesPerSec=14.46
+[2023-04-14 11:25:19,142] [INFO] [logging.py:96:log_dist] [Rank 0] step=4280, skipped=56, lr=[4.74326860023475e-06, 4.74326860023475e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:25:19,161] [INFO] [timer.py:199:stop] epoch=0/micro_step=4280/global_step=4280, RunningAvgSamplesPerSec=105.29278448645617, CurrSamplesPerSec=100.23129910087523, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:25:19,253] [INFO] [logging.py:96:log_dist] [Rank 0] step=4280, skipped=68, lr=[2.4692008511619042e-06, 2.4692008511619042e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4279|ppo_ep: 1|act_loss: 0.0067291259765625|cri_loss: 0.0035915374755859375|unsuper_loss: 0.0
+average reward score: 6.64453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.15%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4280|ppo_ep: 1|act_loss: -0.0108489990234375|cri_loss: -0.005157470703125|unsuper_loss: 0.0
+average reward score: 7.859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.08%) |Training time=0.48s (20.30%) |Others=0.11 (4.62%)|CurSamplesPerSec=13.58 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4281|ppo_ep: 1|act_loss: 0.0028228759765625|cri_loss: 0.0021076202392578125|unsuper_loss: 0.0
+average reward score: 4.87890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (21.99%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4282|ppo_ep: 1|act_loss: -0.0103912353515625|cri_loss: -0.004909515380859375|unsuper_loss: 0.0
+average reward score: 5.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.10%) |Training time=0.49s (22.32%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4283|ppo_ep: 1|act_loss: 0.004169464111328125|cri_loss: 0.0023479461669921875|unsuper_loss: 0.0
+average reward score: 5.7109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.29%) |Training time=0.48s (22.07%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4284|ppo_ep: 1|act_loss: 0.01311492919921875|cri_loss: 0.00726318359375|unsuper_loss: 0.0
+average reward score: 5.77734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.35%) |Training time=0.48s (22.04%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4285|ppo_ep: 1|act_loss: 0.0207977294921875|cri_loss: 0.0112457275390625|unsuper_loss: 0.0
+average reward score: 5.87890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (21.95%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4286|ppo_ep: 1|act_loss: 0.0085296630859375|cri_loss: 0.004547119140625|unsuper_loss: 0.0
+average reward score: 4.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.35%) |Training time=0.48s (22.04%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4287|ppo_ep: 1|act_loss: -0.0113525390625|cri_loss: -0.005161285400390625|unsuper_loss: 0.0
+average reward score: 6.25390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.90%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4288|ppo_ep: 1|act_loss: 0.027923583984375|cri_loss: 0.0167694091796875|unsuper_loss: 0.0
+average reward score: 4.68359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.83%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+[2023-04-14 11:25:41,026] [INFO] [logging.py:96:log_dist] [Rank 0] step=4290, skipped=56, lr=[4.724695710557746e-06, 4.724695710557746e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:25:41,044] [INFO] [timer.py:199:stop] epoch=0/micro_step=4290/global_step=4290, RunningAvgSamplesPerSec=105.28350097959824, CurrSamplesPerSec=101.11827769046826, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:25:41,137] [INFO] [logging.py:96:log_dist] [Rank 0] step=4290, skipped=68, lr=[2.4595768561423393e-06, 2.4595768561423393e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4289|ppo_ep: 1|act_loss: -0.03314208984375|cri_loss: -0.015716552734375|unsuper_loss: 0.0
+average reward score: 4.859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.05%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4290|ppo_ep: 1|act_loss: -0.02056884765625|cri_loss: -0.0088958740234375|unsuper_loss: 0.0
+average reward score: 5.39453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.84%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4291|ppo_ep: 1|act_loss: -0.023284912109375|cri_loss: -0.01110076904296875|unsuper_loss: 0.0
+average reward score: 5.453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (22.02%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4292|ppo_ep: 1|act_loss: 0.0148468017578125|cri_loss: 0.008026123046875|unsuper_loss: 0.0
+average reward score: 5.296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (22.01%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4293|ppo_ep: 1|act_loss: 0.01361846923828125|cri_loss: 0.0087890625|unsuper_loss: 0.0
+average reward score: 5.26171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.47%) |Training time=0.48s (21.92%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4294|ppo_ep: 1|act_loss: 0.004306793212890625|cri_loss: 0.002872467041015625|unsuper_loss: 0.0
+average reward score: 4.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.29%) |Training time=0.48s (22.14%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4295|ppo_ep: 1|act_loss: 0.0026702880859375|cri_loss: 0.0014410018920898438|unsuper_loss: 0.0
+average reward score: 5.55859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.76%) |Training time=0.48s (21.63%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.54 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4296|ppo_ep: 1|act_loss: 0.00958251953125|cri_loss: 0.005100250244140625|unsuper_loss: 0.0
+average reward score: 4.62109375
+-------------------------------------------------------------------------------------
+|E2E latency=3.13s |Gather latency=0.00s (0.00%) |Generate time=1.78s (56.75%) |Training time=0.48s (15.22%) |Others=0.88 (28.02%)|CurSamplesPerSec=10.23 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4297|ppo_ep: 1|act_loss: 0.03570556640625|cri_loss: 0.018829345703125|unsuper_loss: 0.0
+average reward score: 5.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.34%) |Training time=0.48s (22.03%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4298|ppo_ep: 1|act_loss: 0.03399658203125|cri_loss: 0.019012451171875|unsuper_loss: 0.0
+average reward score: 5.3828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (21.97%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+[2023-04-14 11:26:03,707] [INFO] [logging.py:96:log_dist] [Rank 0] step=4300, skipped=56, lr=[4.706124307632283e-06, 4.706124307632283e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:26:03,726] [INFO] [timer.py:199:stop] epoch=0/micro_step=4300/global_step=4300, RunningAvgSamplesPerSec=105.27549328885391, CurrSamplesPerSec=102.87540000751152, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:26:03,819] [INFO] [logging.py:96:log_dist] [Rank 0] step=4300, skipped=68, lr=[2.4499534602912815e-06, 2.4499534602912815e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4299|ppo_ep: 1|act_loss: 0.0301055908203125|cri_loss: 0.0162200927734375|unsuper_loss: 0.0
+average reward score: 5.390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.86%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4300|ppo_ep: 1|act_loss: -0.0199432373046875|cri_loss: -0.009765625|unsuper_loss: 0.0
+average reward score: 4.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.84%) |Training time=0.49s (22.54%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4301|ppo_ep: 1|act_loss: 0.02093505859375|cri_loss: 0.01099395751953125|unsuper_loss: 0.0
+average reward score: 6.29296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.23%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4302|ppo_ep: 1|act_loss: 0.001983642578125|cri_loss: 0.0019588470458984375|unsuper_loss: 0.0
+average reward score: 5.96875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.14%) |Training time=0.48s (22.22%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4303|ppo_ep: 1|act_loss: -0.041778564453125|cri_loss: -0.0200042724609375|unsuper_loss: 0.0
+average reward score: 5.1796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.90%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4304|ppo_ep: 1|act_loss: -0.0122833251953125|cri_loss: -0.00597381591796875|unsuper_loss: 0.0
+average reward score: 5.25390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.15%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4305|ppo_ep: 1|act_loss: 0.006443023681640625|cri_loss: 0.0036296844482421875|unsuper_loss: 0.0
+average reward score: 4.6796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.47s (21.97%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4306|ppo_ep: 1|act_loss: 0.0103607177734375|cri_loss: 0.0058746337890625|unsuper_loss: 0.0
+average reward score: 5.63671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.47%) |Training time=0.47s (21.89%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4307|ppo_ep: 1|act_loss: 0.037078857421875|cri_loss: 0.0198211669921875|unsuper_loss: 0.0
+average reward score: 6.58203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.18%) |Training time=0.48s (22.21%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4308|ppo_ep: 1|act_loss: 0.00476837158203125|cri_loss: 0.0025424957275390625|unsuper_loss: 0.0
+average reward score: 4.515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.24%) |Training time=0.48s (22.12%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+[2023-04-14 11:26:25,365] [INFO] [logging.py:96:log_dist] [Rank 0] step=4310, skipped=56, lr=[4.687554666731353e-06, 4.687554666731353e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:26:25,383] [INFO] [timer.py:199:stop] epoch=0/micro_step=4310/global_step=4310, RunningAvgSamplesPerSec=105.26517600866048, CurrSamplesPerSec=101.96257966198779, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:26:25,476] [INFO] [logging.py:96:log_dist] [Rank 0] step=4310, skipped=68, lr=[2.440330806250673e-06, 2.440330806250673e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4309|ppo_ep: 1|act_loss: 0.052459716796875|cri_loss: 0.0281829833984375|unsuper_loss: 0.0
+average reward score: 6.38671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (21.99%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4310|ppo_ep: 1|act_loss: -0.01421356201171875|cri_loss: -0.00677490234375|unsuper_loss: 0.0
+average reward score: 5.734375
+-------------------------------------------------------------------------------------
+|E2E latency=3.60s |Gather latency=0.00s (0.00%) |Generate time=1.59s (44.04%) |Training time=0.50s (14.00%) |Others=1.51 (41.96%)|CurSamplesPerSec=8.88 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4311|ppo_ep: 1|act_loss: 0.0005640983581542969|cri_loss: 0.000530242919921875|unsuper_loss: 0.0
+average reward score: 5.70703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.16%) |Training time=0.48s (22.21%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4312|ppo_ep: 1|act_loss: -0.001445770263671875|cri_loss: -0.0002770423889160156|unsuper_loss: 0.0
+average reward score: 4.609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.12%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4313|ppo_ep: 1|act_loss: -0.0284423828125|cri_loss: -0.0135040283203125|unsuper_loss: 0.0
+average reward score: 5.65625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.81%) |Training time=0.49s (22.58%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4314|ppo_ep: 1|act_loss: -0.0107269287109375|cri_loss: -0.005100250244140625|unsuper_loss: 0.0
+average reward score: 6.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.47%) |Training time=0.47s (21.86%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4315|ppo_ep: 1|act_loss: -0.0107421875|cri_loss: -0.004486083984375|unsuper_loss: 0.0
+average reward score: 5.67578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.90%) |Training time=0.49s (22.50%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4316|ppo_ep: 1|act_loss: -0.00214385986328125|cri_loss: -0.00075531005859375|unsuper_loss: 0.0
+average reward score: 5.27734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.10%) |Training time=0.48s (22.28%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4317|ppo_ep: 1|act_loss: -0.015869140625|cri_loss: -0.007343292236328125|unsuper_loss: 0.0
+average reward score: 6.36328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.08%) |Training time=0.48s (22.30%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4318|ppo_ep: 1|act_loss: 0.00504302978515625|cri_loss: 0.0028839111328125|unsuper_loss: 0.0
+average reward score: 5.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.34%) |Training time=0.48s (22.07%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.46
+[2023-04-14 11:26:48,503] [INFO] [logging.py:96:log_dist] [Rank 0] step=4320, skipped=56, lr=[4.66898706310183e-06, 4.66898706310183e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:26:48,521] [INFO] [timer.py:199:stop] epoch=0/micro_step=4320/global_step=4320, RunningAvgSamplesPerSec=105.2507615935386, CurrSamplesPerSec=100.87576558125552, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:26:48,614] [INFO] [logging.py:96:log_dist] [Rank 0] step=4320, skipped=68, lr=[2.4307090366514594e-06, 2.4307090366514594e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4319|ppo_ep: 1|act_loss: 0.004791259765625|cri_loss: 0.002666473388671875|unsuper_loss: 0.0
+average reward score: 6.296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.22%) |Training time=0.48s (22.18%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4320|ppo_ep: 1|act_loss: 0.02197265625|cri_loss: 0.0123138427734375|unsuper_loss: 0.0
+average reward score: 5.37109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.06%) |Training time=0.48s (22.32%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4321|ppo_ep: 1|act_loss: 0.017242431640625|cri_loss: 0.00876617431640625|unsuper_loss: 0.0
+average reward score: 4.6484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.99%) |Training time=0.48s (21.82%) |Others=0.14 (6.20%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4322|ppo_ep: 1|act_loss: -0.002384185791015625|cri_loss: -0.0009288787841796875|unsuper_loss: 0.0
+average reward score: 4.65625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.31%) |Training time=0.48s (22.05%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4323|ppo_ep: 1|act_loss: -0.0127716064453125|cri_loss: -0.00608062744140625|unsuper_loss: 0.0
+average reward score: 4.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.13%) |Training time=0.48s (22.26%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4324|ppo_ep: 1|act_loss: -0.003589630126953125|cri_loss: -0.000904083251953125|unsuper_loss: 0.0
+average reward score: 5.62890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.50%) |Training time=0.48s (21.10%) |Others=0.10 (4.40%)|CurSamplesPerSec=14.01 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4325|ppo_ep: 1|act_loss: -0.06109619140625|cri_loss: -0.0297698974609375|unsuper_loss: 0.0
+average reward score: 4.8984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.89%) |Training time=0.49s (20.82%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4326|ppo_ep: 1|act_loss: -0.0284423828125|cri_loss: -0.01348114013671875|unsuper_loss: 0.0
+average reward score: 5.02734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.90%) |Training time=0.49s (22.50%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4327|ppo_ep: 1|act_loss: 0.00945281982421875|cri_loss: 0.00511932373046875|unsuper_loss: 0.0
+average reward score: 5.19921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.12%) |Training time=0.48s (22.26%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4328|ppo_ep: 1|act_loss: -0.01401519775390625|cri_loss: -0.006587982177734375|unsuper_loss: 0.0
+average reward score: 5.3203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.22%) |Training time=0.48s (22.13%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+[2023-04-14 11:27:10,534] [INFO] [logging.py:96:log_dist] [Rank 0] step=4330, skipped=56, lr=[4.650421771960395e-06, 4.650421771960395e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:27:10,551] [INFO] [timer.py:199:stop] epoch=0/micro_step=4330/global_step=4330, RunningAvgSamplesPerSec=105.23655293591679, CurrSamplesPerSec=96.47581163393487, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:27:10,644] [INFO] [logging.py:96:log_dist] [Rank 0] step=4330, skipped=68, lr=[2.421088294111479e-06, 2.421088294111479e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4329|ppo_ep: 1|act_loss: -0.0030517578125|cri_loss: -0.001354217529296875|unsuper_loss: 0.0
+average reward score: 5.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.79%) |Training time=0.49s (22.62%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4330|ppo_ep: 1|act_loss: 0.00681304931640625|cri_loss: 0.003574371337890625|unsuper_loss: 0.0
+average reward score: 4.86328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.85%) |Training time=0.49s (22.53%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4331|ppo_ep: 1|act_loss: 0.0313720703125|cri_loss: 0.016265869140625|unsuper_loss: 0.0
+average reward score: 4.59765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.17%) |Training time=0.48s (22.19%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4332|ppo_ep: 1|act_loss: 0.050048828125|cri_loss: 0.02886962890625|unsuper_loss: 0.0
+average reward score: 5.9609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.03%) |Training time=0.48s (22.36%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4333|ppo_ep: 1|act_loss: 0.0006165504455566406|cri_loss: 0.0004563331604003906|unsuper_loss: 0.0
+average reward score: 4.64453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.79%) |Training time=0.49s (22.61%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
+[2023-04-14 11:27:21,487] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 4334|ppo_ep: 1|act_loss: 0.007572174072265625|cri_loss: 0.00405120849609375|unsuper_loss: 0.0
+average reward score: 5.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.49%) |Training time=0.48s (22.33%) |Others=0.09 (4.18%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+[2023-04-14 11:27:23,649] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 4335|ppo_ep: 1|act_loss: 0.09259033203125|cri_loss: 0.0550537109375|unsuper_loss: 0.0
+average reward score: 5.6171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.49s (22.67%) |Others=0.09 (4.13%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4336|ppo_ep: 1|act_loss: 0.0041046142578125|cri_loss: 0.00220489501953125|unsuper_loss: 0.0
+average reward score: 5.99609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.03%) |Training time=0.48s (22.35%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4337|ppo_ep: 1|act_loss: -0.0007777214050292969|cri_loss: -1.049041748046875e-05|unsuper_loss: 0.0
+average reward score: 4.23046875
+-------------------------------------------------------------------------------------
+|E2E latency=3.05s |Gather latency=0.00s (0.00%) |Generate time=1.58s (51.88%) |Training time=0.49s (15.97%) |Others=0.98 (32.15%)|CurSamplesPerSec=10.49 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4338|ppo_ep: 1|act_loss: 0.0126495361328125|cri_loss: 0.00670623779296875|unsuper_loss: 0.0
+average reward score: 6.1796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.58s (70.00%) |Training time=0.49s (21.83%) |Others=0.19 (8.17%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.46
+[2023-04-14 11:27:33,216] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+[2023-04-14 11:27:33,216] [INFO] [logging.py:96:log_dist] [Rank 0] step=4340, skipped=57, lr=[4.633715214549443e-06, 4.633715214549443e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:27:33,217] [INFO] [timer.py:199:stop] epoch=0/micro_step=4340/global_step=4340, RunningAvgSamplesPerSec=105.22239416119598, CurrSamplesPerSec=108.74004434905967, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:27:33,309] [INFO] [logging.py:96:log_dist] [Rank 0] step=4340, skipped=70, lr=[2.413392535391663e-06, 2.413392535391663e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4339|ppo_ep: 1|act_loss: -0.0102386474609375|cri_loss: -0.00473785400390625|unsuper_loss: 0.0
+average reward score: 6.28125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.45%) |Training time=0.46s (20.93%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46
+[2023-04-14 11:27:35,354] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 4340|ppo_ep: 1|act_loss: -0.00921630859375|cri_loss: -0.00395965576171875|unsuper_loss: 0.0
+average reward score: 5.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.07%) |Training time=0.45s (21.25%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4341|ppo_ep: 1|act_loss: -0.0147705078125|cri_loss: -0.0072479248046875|unsuper_loss: 0.0
+average reward score: 6.125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.96%) |Training time=0.49s (22.43%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4342|ppo_ep: 1|act_loss: -0.0070648193359375|cri_loss: -0.003284454345703125|unsuper_loss: 0.0
+average reward score: 6.96484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.02%) |Training time=0.48s (22.35%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4343|ppo_ep: 1|act_loss: -0.040374755859375|cri_loss: -0.0195770263671875|unsuper_loss: 0.0
+average reward score: 5.09765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.98%) |Training time=0.49s (22.39%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4344|ppo_ep: 1|act_loss: -0.0029506683349609375|cri_loss: -0.00118255615234375|unsuper_loss: 0.0
+average reward score: 4.73828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.79%) |Training time=0.49s (22.60%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4345|ppo_ep: 1|act_loss: 0.01107025146484375|cri_loss: 0.00582122802734375|unsuper_loss: 0.0
+average reward score: 5.76171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.22%) |Training time=0.50s (22.30%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.32 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4346|ppo_ep: 1|act_loss: 0.037841796875|cri_loss: 0.0195465087890625|unsuper_loss: 0.0
+average reward score: 6.234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.48s (22.31%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4347|ppo_ep: 1|act_loss: 0.010650634765625|cri_loss: 0.00545501708984375|unsuper_loss: 0.0
+average reward score: 4.9296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.58s |Gather latency=0.00s (0.00%) |Generate time=1.58s (61.26%) |Training time=0.49s (18.85%) |Others=0.51 (19.88%)|CurSamplesPerSec=12.40 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4348|ppo_ep: 1|act_loss: -0.01806640625|cri_loss: -0.00841522216796875|unsuper_loss: 0.0
+average reward score: 5.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.95%) |Training time=0.48s (22.41%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+[2023-04-14 11:27:55,334] [INFO] [logging.py:96:log_dist] [Rank 0] step=4350, skipped=58, lr=[4.617010953733625e-06, 4.617010953733625e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:27:55,352] [INFO] [timer.py:199:stop] epoch=0/micro_step=4350/global_step=4350, RunningAvgSamplesPerSec=105.20829371375697, CurrSamplesPerSec=99.3526850550957, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:27:55,445] [INFO] [logging.py:96:log_dist] [Rank 0] step=4350, skipped=70, lr=[2.4037740009053053e-06, 2.4037740009053053e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4349|ppo_ep: 1|act_loss: 0.014923095703125|cri_loss: 0.00814056396484375|unsuper_loss: 0.0
+average reward score: 5.59375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.48s (22.35%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4350|ppo_ep: 1|act_loss: -0.0151824951171875|cri_loss: -0.00742340087890625|unsuper_loss: 0.0
+average reward score: 4.83984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.96%) |Training time=0.49s (22.42%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4351|ppo_ep: 1|act_loss: -0.00119781494140625|cri_loss: -0.0004775524139404297|unsuper_loss: 0.0
+average reward score: 5.14453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.04%) |Training time=0.48s (22.32%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4352|ppo_ep: 1|act_loss: 0.00487518310546875|cri_loss: 0.0028095245361328125|unsuper_loss: 0.0
+average reward score: 5.4296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.78%) |Training time=0.50s (22.63%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4353|ppo_ep: 1|act_loss: 0.022918701171875|cri_loss: 0.0121917724609375|unsuper_loss: 0.0
+average reward score: 5.7421875
+-------------------------------------------------------------------------------------
+|E2E latency=3.49s |Gather latency=0.00s (0.00%) |Generate time=1.68s (48.11%) |Training time=0.56s (16.00%) |Others=1.25 (35.89%)|CurSamplesPerSec=9.16 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4354|ppo_ep: 1|act_loss: 0.0035247802734375|cri_loss: 0.0018749237060546875|unsuper_loss: 0.0
+average reward score: 5.70703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.83%) |Training time=0.47s (21.51%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4355|ppo_ep: 1|act_loss: -0.0120697021484375|cri_loss: -0.004734039306640625|unsuper_loss: 0.0
+average reward score: 5.90625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.34%) |Training time=0.48s (22.03%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4356|ppo_ep: 1|act_loss: 0.02142333984375|cri_loss: 0.0111846923828125|unsuper_loss: 0.0
+average reward score: 5.4296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.35%) |Training time=0.48s (22.01%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4357|ppo_ep: 1|act_loss: -0.0118560791015625|cri_loss: -0.00458526611328125|unsuper_loss: 0.0
+average reward score: 6.09765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.27%) |Training time=0.48s (22.10%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4358|ppo_ep: 1|act_loss: 0.0582275390625|cri_loss: 0.0309295654296875|unsuper_loss: 0.0
+average reward score: 5.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.51s |Gather latency=0.00s (0.00%) |Generate time=1.60s (63.54%) |Training time=0.47s (18.86%) |Others=0.44 (17.60%)|CurSamplesPerSec=12.73 |AvgSamplesPerSec=14.46
+[2023-04-14 11:28:18,696] [INFO] [logging.py:96:log_dist] [Rank 0] step=4360, skipped=58, lr=[4.5984536014041675e-06, 4.5984536014041675e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:28:18,714] [INFO] [timer.py:199:stop] epoch=0/micro_step=4360/global_step=4360, RunningAvgSamplesPerSec=105.19267954139451, CurrSamplesPerSec=105.13316387876034, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:28:18,807] [INFO] [logging.py:96:log_dist] [Rank 0] step=4360, skipped=70, lr=[2.3941568927203878e-06, 2.3941568927203878e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4359|ppo_ep: 1|act_loss: -0.001995086669921875|cri_loss: -0.000576019287109375|unsuper_loss: 0.0
+average reward score: 5.63671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.77%) |Training time=0.47s (21.59%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4360|ppo_ep: 1|act_loss: -0.010589599609375|cri_loss: -0.005023956298828125|unsuper_loss: 0.0
+average reward score: 5.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.42%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4361|ppo_ep: 1|act_loss: -0.0277099609375|cri_loss: -0.01325225830078125|unsuper_loss: 0.0
+average reward score: 4.890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.24%) |Training time=0.45s (21.07%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4362|ppo_ep: 1|act_loss: -0.0044097900390625|cri_loss: -0.001445770263671875|unsuper_loss: 0.0
+average reward score: 5.00390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.45s (21.10%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4363|ppo_ep: 1|act_loss: 0.0423583984375|cri_loss: 0.0220489501953125|unsuper_loss: 0.0
+average reward score: 5.6484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.45s (21.20%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4364|ppo_ep: 1|act_loss: 0.0306396484375|cri_loss: 0.015960693359375|unsuper_loss: 0.0
+average reward score: 5.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.32%) |Training time=0.45s (20.97%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4365|ppo_ep: 1|act_loss: 0.003692626953125|cri_loss: 0.0020427703857421875|unsuper_loss: 0.0
+average reward score: 5.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.49%) |Training time=0.44s (20.81%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4366|ppo_ep: 1|act_loss: -0.003925323486328125|cri_loss: -0.0013608932495117188|unsuper_loss: 0.0
+average reward score: 5.90625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.15%) |Training time=0.45s (21.19%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4367|ppo_ep: 1|act_loss: 0.03369140625|cri_loss: 0.0172882080078125|unsuper_loss: 0.0
+average reward score: 5.30859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.28%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4368|ppo_ep: 1|act_loss: 0.0003256797790527344|cri_loss: 0.0004138946533203125|unsuper_loss: 0.0
+average reward score: 5.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.82s |Gather latency=0.00s (0.00%) |Generate time=1.66s (58.85%) |Training time=0.49s (17.54%) |Others=0.67 (23.62%)|CurSamplesPerSec=11.36 |AvgSamplesPerSec=14.46
+[2023-04-14 11:28:40,789] [INFO] [logging.py:96:log_dist] [Rank 0] step=4370, skipped=58, lr=[4.579899607038848e-06, 4.579899607038848e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:28:40,807] [INFO] [timer.py:199:stop] epoch=0/micro_step=4370/global_step=4370, RunningAvgSamplesPerSec=105.20610923662908, CurrSamplesPerSec=110.14616484290242, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:28:40,900] [INFO] [logging.py:96:log_dist] [Rank 0] step=4370, skipped=70, lr=[2.3845413533856517e-06, 2.3845413533856517e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4369|ppo_ep: 1|act_loss: 0.0188140869140625|cri_loss: 0.0099945068359375|unsuper_loss: 0.0
+average reward score: 5.8203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.13%) |Training time=0.45s (21.20%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4370|ppo_ep: 1|act_loss: 0.0085601806640625|cri_loss: 0.0045623779296875|unsuper_loss: 0.0
+average reward score: 4.6640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.40%) |Training time=0.45s (20.89%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4371|ppo_ep: 1|act_loss: -0.01934814453125|cri_loss: -0.00904083251953125|unsuper_loss: 0.0
+average reward score: 5.546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.30%) |Training time=0.45s (21.00%) |Others=0.10 (4.69%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4372|ppo_ep: 1|act_loss: -0.006256103515625|cri_loss: -0.0027332305908203125|unsuper_loss: 0.0
+average reward score: 5.35546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.17%) |Training time=0.46s (21.18%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4373|ppo_ep: 1|act_loss: -0.008270263671875|cri_loss: -0.004001617431640625|unsuper_loss: 0.0
+average reward score: 5.28515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.45s (21.11%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4374|ppo_ep: 1|act_loss: 0.028411865234375|cri_loss: 0.01493072509765625|unsuper_loss: 0.0
+average reward score: 6.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.15%) |Training time=0.45s (21.13%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4375|ppo_ep: 1|act_loss: -0.01084136962890625|cri_loss: -0.00421905517578125|unsuper_loss: 0.0
+average reward score: 5.91796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.08%) |Training time=0.46s (21.24%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4376|ppo_ep: 1|act_loss: 0.0487060546875|cri_loss: 0.026031494140625|unsuper_loss: 0.0
+average reward score: 4.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.25%) |Training time=0.45s (21.07%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4377|ppo_ep: 1|act_loss: 0.0104522705078125|cri_loss: 0.00604248046875|unsuper_loss: 0.0
+average reward score: 5.77734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.29%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4378|ppo_ep: 1|act_loss: -0.01346588134765625|cri_loss: -0.006237030029296875|unsuper_loss: 0.0
+average reward score: 6.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.45s (21.20%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
+[2023-04-14 11:29:02,241] [INFO] [logging.py:96:log_dist] [Rank 0] step=4380, skipped=58, lr=[4.561349245652627e-06, 4.561349245652627e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:29:02,259] [INFO] [timer.py:199:stop] epoch=0/micro_step=4380/global_step=4380, RunningAvgSamplesPerSec=105.21642532909777, CurrSamplesPerSec=108.15262171002004, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:29:02,352] [INFO] [logging.py:96:log_dist] [Rank 0] step=4380, skipped=70, lr=[2.374927525426587e-06, 2.374927525426587e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4379|ppo_ep: 1|act_loss: 0.0002435445785522461|cri_loss: 0.0004558563232421875|unsuper_loss: 0.0
+average reward score: 5.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.01%) |Training time=0.46s (21.34%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4380|ppo_ep: 1|act_loss: -0.0017757415771484375|cri_loss: -0.0005774497985839844|unsuper_loss: 0.0
+average reward score: 4.453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.07%) |Training time=0.46s (21.27%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4381|ppo_ep: 1|act_loss: -0.008941650390625|cri_loss: -0.00415802001953125|unsuper_loss: 0.0
+average reward score: 5.8046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.32%) |Training time=0.46s (21.11%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4382|ppo_ep: 1|act_loss: -0.02496337890625|cri_loss: -0.0115814208984375|unsuper_loss: 0.0
+average reward score: 4.984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.69s (75.22%) |Training time=0.46s (20.31%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.23 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4383|ppo_ep: 1|act_loss: -0.018798828125|cri_loss: -0.0090789794921875|unsuper_loss: 0.0
+average reward score: 5.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.96%) |Training time=0.45s (19.61%) |Others=0.10 (4.43%)|CurSamplesPerSec=13.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4384|ppo_ep: 1|act_loss: 0.02191162109375|cri_loss: 0.01122283935546875|unsuper_loss: 0.0
+average reward score: 4.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.28%) |Training time=0.45s (21.05%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4385|ppo_ep: 1|act_loss: 0.01512908935546875|cri_loss: 0.00812530517578125|unsuper_loss: 0.0
+average reward score: 5.1796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.42%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4386|ppo_ep: 1|act_loss: 0.01531982421875|cri_loss: 0.0079345703125|unsuper_loss: 0.0
+average reward score: 5.35546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.21%) |Training time=0.45s (21.07%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4387|ppo_ep: 1|act_loss: 0.0019092559814453125|cri_loss: 0.00247955322265625|unsuper_loss: 0.0
+average reward score: 6.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.16%) |Training time=0.48s (21.93%) |Others=0.11 (4.91%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4388|ppo_ep: 1|act_loss: -0.018646240234375|cri_loss: -0.00868988037109375|unsuper_loss: 0.0
+average reward score: 4.13671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.65%) |Training time=0.47s (21.71%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+[2023-04-14 11:29:24,065] [INFO] [logging.py:96:log_dist] [Rank 0] step=4390, skipped=58, lr=[4.542802792206608e-06, 4.542802792206608e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:29:24,084] [INFO] [timer.py:199:stop] epoch=0/micro_step=4390/global_step=4390, RunningAvgSamplesPerSec=105.22203844777437, CurrSamplesPerSec=107.79432154766418, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:29:24,176] [INFO] [logging.py:96:log_dist] [Rank 0] step=4390, skipped=70, lr=[2.3653155513433175e-06, 2.3653155513433175e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4389|ppo_ep: 1|act_loss: -0.0149383544921875|cri_loss: -0.005939483642578125|unsuper_loss: 0.0
+average reward score: 5.04296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.29%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4390|ppo_ep: 1|act_loss: 0.05224609375|cri_loss: 0.0274200439453125|unsuper_loss: 0.0
+average reward score: 5.609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.47s (21.61%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4391|ppo_ep: 1|act_loss: -0.0126495361328125|cri_loss: -0.005931854248046875|unsuper_loss: 0.0
+average reward score: 6.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.84%) |Training time=0.46s (21.49%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4392|ppo_ep: 1|act_loss: -0.0054779052734375|cri_loss: -0.0024261474609375|unsuper_loss: 0.0
+average reward score: 5.03125
+-------------------------------------------------------------------------------------
+|E2E latency=2.97s |Gather latency=0.00s (0.00%) |Generate time=1.59s (53.63%) |Training time=0.47s (15.71%) |Others=0.91 (30.66%)|CurSamplesPerSec=10.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4393|ppo_ep: 1|act_loss: 0.0209808349609375|cri_loss: 0.01116180419921875|unsuper_loss: 0.0
+average reward score: 4.79296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.62%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4394|ppo_ep: 1|act_loss: -0.013763427734375|cri_loss: -0.006748199462890625|unsuper_loss: 0.0
+average reward score: 6.87109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.60%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4395|ppo_ep: 1|act_loss: -0.0205078125|cri_loss: -0.0098724365234375|unsuper_loss: 0.0
+average reward score: 4.5703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.83%) |Training time=0.46s (21.52%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4396|ppo_ep: 1|act_loss: -0.02789306640625|cri_loss: -0.013275146484375|unsuper_loss: 0.0
+average reward score: 5.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.73%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4397|ppo_ep: 1|act_loss: 0.00476837158203125|cri_loss: 0.0038661956787109375|unsuper_loss: 0.0
+average reward score: 4.80078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.81%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4398|ppo_ep: 1|act_loss: 0.0108642578125|cri_loss: 0.00586700439453125|unsuper_loss: 0.0
+average reward score: 4.7578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.88%) |Training time=0.46s (19.70%) |Others=0.10 (4.42%)|CurSamplesPerSec=13.77 |AvgSamplesPerSec=14.46
+[2023-04-14 11:29:46,633] [INFO] [logging.py:96:log_dist] [Rank 0] step=4400, skipped=58, lr=[4.524260521603971e-06, 4.524260521603971e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:29:46,652] [INFO] [timer.py:199:stop] epoch=0/micro_step=4400/global_step=4400, RunningAvgSamplesPerSec=105.2229391910278, CurrSamplesPerSec=105.90347959812492, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:29:46,744] [INFO] [logging.py:96:log_dist] [Rank 0] step=4400, skipped=70, lr=[2.3557055736084847e-06, 2.3557055736084847e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4399|ppo_ep: 1|act_loss: -0.02545166015625|cri_loss: -0.01221466064453125|unsuper_loss: 0.0
+average reward score: 4.72265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.99%) |Training time=0.46s (21.41%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4400|ppo_ep: 1|act_loss: -0.03961181640625|cri_loss: -0.0188751220703125|unsuper_loss: 0.0
+average reward score: 6.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.74%) |Training time=0.47s (21.61%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4401|ppo_ep: 1|act_loss: 0.063720703125|cri_loss: 0.034637451171875|unsuper_loss: 0.0
+average reward score: 5.76171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.57%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4402|ppo_ep: 1|act_loss: -0.0141143798828125|cri_loss: -0.006595611572265625|unsuper_loss: 0.0
+average reward score: 5.3828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.69%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4403|ppo_ep: 1|act_loss: 0.03546142578125|cri_loss: 0.01824951171875|unsuper_loss: 0.0
+average reward score: 7.25
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.61%) |Training time=0.47s (21.76%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4404|ppo_ep: 1|act_loss: -0.0003781318664550781|cri_loss: 6.4849853515625e-05|unsuper_loss: 0.0
+average reward score: 5.69921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.09%) |Training time=0.46s (21.24%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4405|ppo_ep: 1|act_loss: -0.0084228515625|cri_loss: -0.00386810302734375|unsuper_loss: 0.0
+average reward score: 6.1015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.00%) |Training time=0.46s (21.32%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4406|ppo_ep: 1|act_loss: 0.03094482421875|cri_loss: 0.0161285400390625|unsuper_loss: 0.0
+average reward score: 4.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.17%) |Training time=0.45s (21.18%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4407|ppo_ep: 1|act_loss: 0.0733642578125|cri_loss: 0.039581298828125|unsuper_loss: 0.0
+average reward score: 5.56640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.46s (21.38%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4408|ppo_ep: 1|act_loss: 0.0005645751953125|cri_loss: 0.0008716583251953125|unsuper_loss: 0.0
+average reward score: 4.87890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.21%) |Training time=0.45s (21.09%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+[2023-04-14 11:30:08,176] [INFO] [logging.py:96:log_dist] [Rank 0] step=4410, skipped=58, lr=[4.505722708685901e-06, 4.505722708685901e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:30:08,194] [INFO] [timer.py:199:stop] epoch=0/micro_step=4410/global_step=4410, RunningAvgSamplesPerSec=105.22675621955904, CurrSamplesPerSec=107.51842912451345, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:30:08,287] [INFO] [logging.py:96:log_dist] [Rank 0] step=4410, skipped=70, lr=[2.346097734665143e-06, 2.346097734665143e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4409|ppo_ep: 1|act_loss: 0.009124755859375|cri_loss: 0.004863739013671875|unsuper_loss: 0.0
+average reward score: 5.6015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.44%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4410|ppo_ep: 1|act_loss: 0.00501251220703125|cri_loss: 0.0026264190673828125|unsuper_loss: 0.0
+average reward score: 4.6171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.91%) |Training time=0.47s (21.47%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4411|ppo_ep: 1|act_loss: 0.004062652587890625|cri_loss: 0.00296783447265625|unsuper_loss: 0.0
+average reward score: 4.765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.56%) |Training time=0.45s (19.98%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.25 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4412|ppo_ep: 1|act_loss: 0.01087188720703125|cri_loss: 0.00595855712890625|unsuper_loss: 0.0
+average reward score: 4.10546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.96%) |Training time=0.46s (21.38%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+[2023-04-14 11:30:17,048] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
+epoch: 0|step: 4413|ppo_ep: 1|act_loss: 0.022308349609375|cri_loss: 0.0118408203125|unsuper_loss: 0.0
+average reward score: 5.53515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.76s (76.95%) |Training time=0.43s (18.68%) |Others=0.10 (4.37%)|CurSamplesPerSec=13.97 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4414|ppo_ep: 1|act_loss: -0.019378662109375|cri_loss: -0.00940704345703125|unsuper_loss: 0.0
+average reward score: 5.0
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.67%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4415|ppo_ep: 1|act_loss: -0.0246429443359375|cri_loss: -0.0103607177734375|unsuper_loss: 0.0
+average reward score: 4.65234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.75%) |Training time=0.47s (21.65%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4416|ppo_ep: 1|act_loss: -0.0114288330078125|cri_loss: -0.005565643310546875|unsuper_loss: 0.0
+average reward score: 4.99609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.70%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4417|ppo_ep: 1|act_loss: -0.00141143798828125|cri_loss: 0.0003528594970703125|unsuper_loss: 0.0
+average reward score: 5.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.96%) |Training time=0.48s (22.40%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4418|ppo_ep: 1|act_loss: 0.00626373291015625|cri_loss: 0.003574371337890625|unsuper_loss: 0.0
+average reward score: 5.484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.58%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+[2023-04-14 11:30:30,012] [INFO] [logging.py:96:log_dist] [Rank 0] step=4420, skipped=59, lr=[4.489042715482996e-06, 4.489042715482996e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:30:30,030] [INFO] [timer.py:199:stop] epoch=0/micro_step=4420/global_step=4420, RunningAvgSamplesPerSec=105.22975983988711, CurrSamplesPerSec=105.75086433215567, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:30:30,123] [INFO] [logging.py:96:log_dist] [Rank 0] step=4420, skipped=70, lr=[2.3364921769246423e-06, 2.3364921769246423e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4419|ppo_ep: 1|act_loss: 0.015380859375|cri_loss: 0.00812530517578125|unsuper_loss: 0.0
+average reward score: 5.80859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.85%) |Training time=0.47s (21.52%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4420|ppo_ep: 1|act_loss: 0.0207672119140625|cri_loss: 0.0114898681640625|unsuper_loss: 0.0
+average reward score: 5.3125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.82%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4421|ppo_ep: 1|act_loss: 0.03228759765625|cri_loss: 0.0165252685546875|unsuper_loss: 0.0
+average reward score: 5.015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.79%) |Training time=0.47s (21.56%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4422|ppo_ep: 1|act_loss: -0.0010213851928710938|cri_loss: -0.00040078163146972656|unsuper_loss: 0.0
+average reward score: 6.07421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.41s |Gather latency=0.00s (0.00%) |Generate time=1.59s (66.06%) |Training time=0.47s (19.46%) |Others=0.35 (14.49%)|CurSamplesPerSec=13.27 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4423|ppo_ep: 1|act_loss: 6.699562072753906e-05|cri_loss: 0.00020170211791992188|unsuper_loss: 0.0
+average reward score: 5.52734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.61%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4424|ppo_ep: 1|act_loss: -0.0081939697265625|cri_loss: -0.00389862060546875|unsuper_loss: 0.0
+average reward score: 5.8203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.57%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4425|ppo_ep: 1|act_loss: -0.0167236328125|cri_loss: -0.0081329345703125|unsuper_loss: 0.0
+average reward score: 5.1953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.29%) |Training time=0.48s (22.06%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4426|ppo_ep: 1|act_loss: -0.0308990478515625|cri_loss: -0.014556884765625|unsuper_loss: 0.0
+average reward score: 5.23828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.47s (21.82%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4427|ppo_ep: 1|act_loss: 0.00275421142578125|cri_loss: 0.0015478134155273438|unsuper_loss: 0.0
+average reward score: 6.15625
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.60s (70.08%) |Training time=0.58s (25.53%) |Others=0.10 (4.39%)|CurSamplesPerSec=14.01 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4428|ppo_ep: 1|act_loss: 0.0010356903076171875|cri_loss: 0.001148223876953125|unsuper_loss: 0.0
+average reward score: 4.86328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.13%) |Training time=0.48s (22.23%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+[2023-04-14 11:30:52,008] [INFO] [logging.py:96:log_dist] [Rank 0] step=4430, skipped=59, lr=[4.470514129113125e-06, 4.470514129113125e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:30:52,026] [INFO] [timer.py:199:stop] epoch=0/micro_step=4430/global_step=4430, RunningAvgSamplesPerSec=105.21941202542125, CurrSamplesPerSec=96.38726764544512, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:30:52,119] [INFO] [logging.py:96:log_dist] [Rank 0] step=4430, skipped=70, lr=[2.3268890427645213e-06, 2.3268890427645213e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4429|ppo_ep: 1|act_loss: 0.0251617431640625|cri_loss: 0.013397216796875|unsuper_loss: 0.0
+average reward score: 5.58203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.70%) |Training time=0.49s (22.71%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4430|ppo_ep: 1|act_loss: -0.0066070556640625|cri_loss: -0.003173828125|unsuper_loss: 0.0
+average reward score: 5.51953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.00%) |Training time=0.49s (22.38%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4431|ppo_ep: 1|act_loss: -0.024017333984375|cri_loss: -0.011444091796875|unsuper_loss: 0.0
+average reward score: 5.078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.48s (22.29%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4432|ppo_ep: 1|act_loss: -0.0283203125|cri_loss: -0.01357269287109375|unsuper_loss: 0.0
+average reward score: 5.72265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.91%) |Training time=0.49s (22.47%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4433|ppo_ep: 1|act_loss: -0.0015773773193359375|cri_loss: -0.0005788803100585938|unsuper_loss: 0.0
+average reward score: 5.55078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.47s (21.91%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4434|ppo_ep: 1|act_loss: 0.0058746337890625|cri_loss: 0.00351715087890625|unsuper_loss: 0.0
+average reward score: 5.4921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.14%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4435|ppo_ep: 1|act_loss: -0.0010538101196289062|cri_loss: 8.96453857421875e-05|unsuper_loss: 0.0
+average reward score: 4.7421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.48s (22.31%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+[2023-04-14 11:31:07,251] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 4436|ppo_ep: 1|act_loss: -0.00556182861328125|cri_loss: -0.002140045166015625|unsuper_loss: 0.0
+average reward score: 5.87890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.79%) |Training time=0.47s (22.02%) |Others=0.09 (4.19%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
+[2023-04-14 11:31:09,405] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 4437|ppo_ep: 1|act_loss: -0.0242919921875|cri_loss: -0.01165008544921875|unsuper_loss: 0.0
+average reward score: 5.0
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.19%) |Training time=0.49s (22.66%) |Others=0.09 (4.15%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4438|ppo_ep: 1|act_loss: 0.00684356689453125|cri_loss: 0.0038604736328125|unsuper_loss: 0.0
+average reward score: 4.28515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.48s (22.32%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+[2023-04-14 11:31:13,619] [INFO] [logging.py:96:log_dist] [Rank 0] step=4440, skipped=59, lr=[4.451990797079012e-06, 4.451990797079012e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:31:13,637] [INFO] [timer.py:199:stop] epoch=0/micro_step=4440/global_step=4440, RunningAvgSamplesPerSec=105.20787125582682, CurrSamplesPerSec=100.67366538753491, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:31:13,729] [INFO] [logging.py:96:log_dist] [Rank 0] step=4440, skipped=72, lr=[2.3192083760691226e-06, 2.3192083760691226e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4439|ppo_ep: 1|act_loss: -0.00165557861328125|cri_loss: -0.0004649162292480469|unsuper_loss: 0.0
+average reward score: 4.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.23%) |Training time=0.48s (22.17%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4440|ppo_ep: 1|act_loss: 0.0289306640625|cri_loss: 0.01500701904296875|unsuper_loss: 0.0
+average reward score: 6.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.31%) |Training time=0.48s (21.83%) |Others=0.11 (4.85%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4441|ppo_ep: 1|act_loss: 0.002044677734375|cri_loss: 0.001621246337890625|unsuper_loss: 0.0
+average reward score: 5.67578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.67s (74.29%) |Training time=0.48s (21.24%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.25 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4442|ppo_ep: 1|act_loss: 0.021697998046875|cri_loss: 0.011474609375|unsuper_loss: 0.0
+average reward score: 5.6328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.91%) |Training time=0.49s (22.45%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4443|ppo_ep: 1|act_loss: 0.0056304931640625|cri_loss: 0.003143310546875|unsuper_loss: 0.0
+average reward score: 5.25
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.18%) |Training time=0.48s (22.19%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4444|ppo_ep: 1|act_loss: 0.027313232421875|cri_loss: 0.01392364501953125|unsuper_loss: 0.0
+average reward score: 4.96484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.68%) |Training time=0.48s (20.67%) |Others=0.11 (4.65%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4445|ppo_ep: 1|act_loss: 0.03533935546875|cri_loss: 0.01910400390625|unsuper_loss: 0.0
+average reward score: 4.9375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.88%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4446|ppo_ep: 1|act_loss: 0.003787994384765625|cri_loss: 0.0020809173583984375|unsuper_loss: 0.0
+average reward score: 6.28125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.70%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4447|ppo_ep: 1|act_loss: 0.0007276535034179688|cri_loss: 0.0010051727294921875|unsuper_loss: 0.0
+average reward score: 5.796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.10%) |Training time=0.48s (22.28%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4448|ppo_ep: 1|act_loss: -0.0034160614013671875|cri_loss: -0.0006732940673828125|unsuper_loss: 0.0
+average reward score: 4.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.46s (21.49%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+[2023-04-14 11:31:35,512] [INFO] [logging.py:96:log_dist] [Rank 0] step=4450, skipped=59, lr=[4.433472993941121e-06, 4.433472993941121e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:31:35,530] [INFO] [timer.py:199:stop] epoch=0/micro_step=4450/global_step=4450, RunningAvgSamplesPerSec=105.20223601286291, CurrSamplesPerSec=111.97585245167195, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:31:35,623] [INFO] [logging.py:96:log_dist] [Rank 0] step=4450, skipped=72, lr=[2.309609963029065e-06, 2.309609963029065e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4449|ppo_ep: 1|act_loss: -0.01202392578125|cri_loss: -0.005767822265625|unsuper_loss: 0.0
+average reward score: 6.08203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.59%) |Training time=0.45s (20.74%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4450|ppo_ep: 1|act_loss: -0.024658203125|cri_loss: -0.01102447509765625|unsuper_loss: 0.0
+average reward score: 5.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.52s |Gather latency=0.00s (0.00%) |Generate time=1.60s (63.36%) |Training time=0.46s (18.39%) |Others=0.46 (18.25%)|CurSamplesPerSec=12.71 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4451|ppo_ep: 1|act_loss: -0.0023059844970703125|cri_loss: -0.0007138252258300781|unsuper_loss: 0.0
+average reward score: 4.796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.65%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4452|ppo_ep: 1|act_loss: -0.006069183349609375|cri_loss: -0.0021514892578125|unsuper_loss: 0.0
+average reward score: 5.28125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.91%) |Training time=0.46s (21.42%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4453|ppo_ep: 1|act_loss: -0.029815673828125|cri_loss: -0.01385498046875|unsuper_loss: 0.0
+average reward score: 6.00390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.96%) |Training time=0.46s (21.36%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4454|ppo_ep: 1|act_loss: -0.016937255859375|cri_loss: -0.007965087890625|unsuper_loss: 0.0
+average reward score: 5.71484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (21.96%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4455|ppo_ep: 1|act_loss: -0.036224365234375|cri_loss: -0.0174713134765625|unsuper_loss: 0.0
+average reward score: 5.5703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.86%) |Training time=0.46s (21.49%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4456|ppo_ep: 1|act_loss: 0.03314208984375|cri_loss: 0.0172271728515625|unsuper_loss: 0.0
+average reward score: 5.59765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.80%) |Training time=0.46s (21.56%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4457|ppo_ep: 1|act_loss: 0.0027675628662109375|cri_loss: 0.0017824172973632812|unsuper_loss: 0.0
+average reward score: 5.87890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.76%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4458|ppo_ep: 1|act_loss: 0.026031494140625|cri_loss: 0.01338958740234375|unsuper_loss: 0.0
+average reward score: 4.76953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.39%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
+[2023-04-14 11:31:57,598] [INFO] [logging.py:96:log_dist] [Rank 0] step=4460, skipped=59, lr=[4.41496099417797e-06, 4.41496099417797e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:31:57,616] [INFO] [timer.py:199:stop] epoch=0/micro_step=4460/global_step=4460, RunningAvgSamplesPerSec=105.20444311444572, CurrSamplesPerSec=108.08268602342073, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:31:57,709] [INFO] [logging.py:96:log_dist] [Rank 0] step=4460, skipped=72, lr=[2.3000143720286463e-06, 2.3000143720286463e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4459|ppo_ep: 1|act_loss: 0.06365966796875|cri_loss: 0.03680419921875|unsuper_loss: 0.0
+average reward score: 5.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.96%) |Training time=0.46s (19.74%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4460|ppo_ep: 1|act_loss: 0.01898193359375|cri_loss: 0.00991058349609375|unsuper_loss: 0.0
+average reward score: 6.82421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.47s (21.60%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4461|ppo_ep: 1|act_loss: -0.0066986083984375|cri_loss: -0.00315093994140625|unsuper_loss: 0.0
+average reward score: 4.9140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.46s (21.53%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4462|ppo_ep: 1|act_loss: 0.0011196136474609375|cri_loss: 0.0011081695556640625|unsuper_loss: 0.0
+average reward score: 5.98046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.61%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4463|ppo_ep: 1|act_loss: -0.0222320556640625|cri_loss: -0.00909423828125|unsuper_loss: 0.0
+average reward score: 6.11328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.27%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4464|ppo_ep: 1|act_loss: -0.00608062744140625|cri_loss: 0.001983642578125|unsuper_loss: 0.0
+average reward score: 6.4453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.27%) |Training time=0.45s (21.00%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4465|ppo_ep: 1|act_loss: -0.028961181640625|cri_loss: -0.0133514404296875|unsuper_loss: 0.0
+average reward score: 5.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.13%) |Training time=0.46s (21.17%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4466|ppo_ep: 1|act_loss: 0.029052734375|cri_loss: 0.01490020751953125|unsuper_loss: 0.0
+average reward score: 4.953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.31%) |Training time=0.41s (18.96%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4467|ppo_ep: 1|act_loss: -0.0139923095703125|cri_loss: -0.005023956298828125|unsuper_loss: 0.0
+average reward score: 5.91015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.65%) |Training time=0.40s (18.63%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4468|ppo_ep: 1|act_loss: 0.006591796875|cri_loss: 0.0034809112548828125|unsuper_loss: 0.0
+average reward score: 6.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.40%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+[2023-04-14 11:32:19,212] [INFO] [logging.py:96:log_dist] [Rank 0] step=4470, skipped=59, lr=[4.3964550721820475e-06, 4.3964550721820475e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:32:19,230] [INFO] [timer.py:199:stop] epoch=0/micro_step=4470/global_step=4470, RunningAvgSamplesPerSec=105.21677106152741, CurrSamplesPerSec=103.61686894506175, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:32:19,323] [INFO] [logging.py:96:log_dist] [Rank 0] step=4470, skipped=72, lr=[2.2904217452976725e-06, 2.2904217452976725e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4469|ppo_ep: 1|act_loss: -0.00719451904296875|cri_loss: -0.002063751220703125|unsuper_loss: 0.0
+average reward score: 5.703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.72%) |Training time=0.47s (21.67%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4470|ppo_ep: 1|act_loss: 0.01214599609375|cri_loss: 0.00646209716796875|unsuper_loss: 0.0
+average reward score: 6.7265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.99%) |Training time=0.46s (20.54%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.17 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4471|ppo_ep: 1|act_loss: 0.0025005340576171875|cri_loss: 0.00406646728515625|unsuper_loss: 0.0
+average reward score: 7.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.09%) |Training time=0.46s (21.23%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4472|ppo_ep: 1|act_loss: 0.004077911376953125|cri_loss: 0.002239227294921875|unsuper_loss: 0.0
+average reward score: 5.1171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.67%) |Training time=0.45s (20.63%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4473|ppo_ep: 1|act_loss: -0.0126495361328125|cri_loss: -0.005893707275390625|unsuper_loss: 0.0
+average reward score: 5.78515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.45%) |Training time=0.45s (20.88%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4474|ppo_ep: 1|act_loss: -0.0308990478515625|cri_loss: -0.0146636962890625|unsuper_loss: 0.0
+average reward score: 6.41015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.33%) |Training time=0.47s (20.34%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4475|ppo_ep: 1|act_loss: -0.0252532958984375|cri_loss: -0.01204681396484375|unsuper_loss: 0.0
+average reward score: 4.92578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.86%) |Training time=0.46s (21.47%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4476|ppo_ep: 1|act_loss: -0.0056610107421875|cri_loss: -0.002590179443359375|unsuper_loss: 0.0
+average reward score: 5.1171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.85%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4477|ppo_ep: 1|act_loss: 0.0009708404541015625|cri_loss: 0.0006566047668457031|unsuper_loss: 0.0
+average reward score: 5.7109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.64%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4478|ppo_ep: 1|act_loss: 0.04119873046875|cri_loss: 0.02197265625|unsuper_loss: 0.0
+average reward score: 5.94140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.09%) |Training time=0.46s (21.23%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+[2023-04-14 11:32:41,073] [INFO] [logging.py:96:log_dist] [Rank 0] step=4480, skipped=59, lr=[4.377955502255766e-06, 4.377955502255766e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:32:41,092] [INFO] [timer.py:199:stop] epoch=0/micro_step=4480/global_step=4480, RunningAvgSamplesPerSec=105.22041393282225, CurrSamplesPerSec=104.97159258882692, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:32:41,184] [INFO] [logging.py:96:log_dist] [Rank 0] step=4480, skipped=72, lr=[2.2808322250220145e-06, 2.2808322250220145e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4479|ppo_ep: 1|act_loss: -0.024658203125|cri_loss: -0.01195526123046875|unsuper_loss: 0.0
+average reward score: 5.55859375
+-------------------------------------------------------------------------------------
+|E2E latency=3.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (50.62%) |Training time=0.47s (14.91%) |Others=1.08 (34.47%)|CurSamplesPerSec=10.19 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4480|ppo_ep: 1|act_loss: 0.01971435546875|cri_loss: 0.0106048583984375|unsuper_loss: 0.0
+average reward score: 5.31640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.39%) |Training time=0.45s (20.92%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4481|ppo_ep: 1|act_loss: -0.017547607421875|cri_loss: -0.00855255126953125|unsuper_loss: 0.0
+average reward score: 5.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.67%) |Training time=0.44s (20.64%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4482|ppo_ep: 1|act_loss: -0.00038242340087890625|cri_loss: 0.0005755424499511719|unsuper_loss: 0.0
+average reward score: 6.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.98%) |Training time=0.44s (20.38%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4483|ppo_ep: 1|act_loss: -0.0160369873046875|cri_loss: -0.007762908935546875|unsuper_loss: 0.0
+average reward score: 5.08984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.64%) |Training time=0.44s (20.68%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4484|ppo_ep: 1|act_loss: 0.00464630126953125|cri_loss: 0.002918243408203125|unsuper_loss: 0.0
+average reward score: 5.56640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.43%) |Training time=0.45s (20.91%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4485|ppo_ep: 1|act_loss: -0.023712158203125|cri_loss: -0.0107879638671875|unsuper_loss: 0.0
+average reward score: 5.23046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.32%) |Training time=0.45s (20.99%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4486|ppo_ep: 1|act_loss: 0.007293701171875|cri_loss: 0.004192352294921875|unsuper_loss: 0.0
+average reward score: 5.14453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.82%) |Training time=0.47s (21.54%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4487|ppo_ep: 1|act_loss: -0.0009126663208007812|cri_loss: -7.581710815429688e-05|unsuper_loss: 0.0
+average reward score: 6.16796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.17%) |Training time=0.46s (21.17%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4488|ppo_ep: 1|act_loss: 0.013427734375|cri_loss: 0.007648468017578125|unsuper_loss: 0.0
+average reward score: 5.72265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.86%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
+[2023-04-14 11:33:03,613] [INFO] [logging.py:96:log_dist] [Rank 0] step=4490, skipped=59, lr=[4.3594625586073746e-06, 4.3594625586073746e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:33:03,632] [INFO] [timer.py:199:stop] epoch=0/micro_step=4490/global_step=4490, RunningAvgSamplesPerSec=105.23280805486438, CurrSamplesPerSec=110.25764020668524, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:33:03,725] [INFO] [logging.py:96:log_dist] [Rank 0] step=4490, skipped=72, lr=[2.271245953341494e-06, 2.271245953341494e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4489|ppo_ep: 1|act_loss: 0.0073394775390625|cri_loss: 0.0040130615234375|unsuper_loss: 0.0
+average reward score: 4.859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.31%) |Training time=0.45s (21.05%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4490|ppo_ep: 1|act_loss: -0.017333984375|cri_loss: -0.0079803466796875|unsuper_loss: 0.0
+average reward score: 5.0859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.33%) |Training time=0.45s (21.01%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4491|ppo_ep: 1|act_loss: -0.015625|cri_loss: -0.0067596435546875|unsuper_loss: 0.0
+average reward score: 5.7265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.40%) |Training time=0.45s (19.27%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4492|ppo_ep: 1|act_loss: -0.046142578125|cri_loss: -0.0220184326171875|unsuper_loss: 0.0
+average reward score: 5.125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.45s (21.08%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4493|ppo_ep: 1|act_loss: -0.0009975433349609375|cri_loss: -0.000133514404296875|unsuper_loss: 0.0
+average reward score: 5.25390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.46s (21.10%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4494|ppo_ep: 1|act_loss: -0.0071868896484375|cri_loss: -0.0030841827392578125|unsuper_loss: 0.0
+average reward score: 5.46484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.56%) |Training time=0.45s (20.77%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4495|ppo_ep: 1|act_loss: -0.0185546875|cri_loss: -0.00876617431640625|unsuper_loss: 0.0
+average reward score: 5.984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.60%) |Training time=0.45s (20.73%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4496|ppo_ep: 1|act_loss: -0.00152587890625|cri_loss: 0.002399444580078125|unsuper_loss: 0.0
+average reward score: 5.3359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.33%) |Training time=0.45s (21.01%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4497|ppo_ep: 1|act_loss: -0.002407073974609375|cri_loss: -0.0009021759033203125|unsuper_loss: 0.0
+average reward score: 5.28515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.64%) |Training time=0.44s (20.70%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4498|ppo_ep: 1|act_loss: 0.01605224609375|cri_loss: 0.00878143310546875|unsuper_loss: 0.0
+average reward score: 5.14453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.44s (20.18%) |Others=0.11 (5.22%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
+[2023-04-14 11:33:25,426] [INFO] [logging.py:96:log_dist] [Rank 0] step=4500, skipped=59, lr=[4.340976515346915e-06, 4.340976515346915e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:33:25,444] [INFO] [timer.py:199:stop] epoch=0/micro_step=4500/global_step=4500, RunningAvgSamplesPerSec=105.24786998748107, CurrSamplesPerSec=114.88214409214008, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:33:25,537] [INFO] [logging.py:96:log_dist] [Rank 0] step=4500, skipped=72, lr=[2.261663072347785e-06, 2.261663072347785e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4499|ppo_ep: 1|act_loss: 0.0015306472778320312|cri_loss: 0.0017213821411132812|unsuper_loss: 0.0
+average reward score: 5.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.93%) |Training time=0.44s (19.62%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4500|ppo_ep: 1|act_loss: -0.002105712890625|cri_loss: -6.4849853515625e-05|unsuper_loss: 0.0
+average reward score: 5.05078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.59%) |Training time=0.45s (20.74%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4501|ppo_ep: 1|act_loss: 0.0303955078125|cri_loss: 0.0157470703125|unsuper_loss: 0.0
+average reward score: 6.1015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.91%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4502|ppo_ep: 1|act_loss: 0.02105712890625|cri_loss: 0.01132965087890625|unsuper_loss: 0.0
+average reward score: 4.98828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.54%) |Training time=0.45s (20.79%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4503|ppo_ep: 1|act_loss: -0.03997802734375|cri_loss: -0.0192108154296875|unsuper_loss: 0.0
+average reward score: 5.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.77%) |Training time=0.44s (20.55%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4504|ppo_ep: 1|act_loss: 0.013336181640625|cri_loss: 0.007686614990234375|unsuper_loss: 0.0
+average reward score: 5.51171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.46%) |Training time=0.45s (20.86%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4505|ppo_ep: 1|act_loss: -0.0149078369140625|cri_loss: -0.0070953369140625|unsuper_loss: 0.0
+average reward score: 6.26171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.42%) |Training time=0.45s (20.92%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4506|ppo_ep: 1|act_loss: 0.00888824462890625|cri_loss: 0.004749298095703125|unsuper_loss: 0.0
+average reward score: 4.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.64%) |Training time=0.46s (19.69%) |Others=0.11 (4.66%)|CurSamplesPerSec=13.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4507|ppo_ep: 1|act_loss: 0.0194854736328125|cri_loss: 0.01007843017578125|unsuper_loss: 0.0
+average reward score: 5.5703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.45s (21.12%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4508|ppo_ep: 1|act_loss: 0.06427001953125|cri_loss: 0.034271240234375|unsuper_loss: 0.0
+average reward score: 5.046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.32%) |Training time=0.45s (21.01%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
+[2023-04-14 11:33:47,136] [INFO] [logging.py:96:log_dist] [Rank 0] step=4510, skipped=59, lr=[4.32249764648214e-06, 4.32249764648214e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:33:48,297] [INFO] [timer.py:199:stop] epoch=0/micro_step=4510/global_step=4510, RunningAvgSamplesPerSec=105.17105176081999, CurrSamplesPerSec=21.96928961487455, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:33:48,390] [INFO] [logging.py:96:log_dist] [Rank 0] step=4510, skipped=72, lr=[2.2520837240822997e-06, 2.2520837240822997e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4509|ppo_ep: 1|act_loss: -0.017822265625|cri_loss: -0.00855255126953125|unsuper_loss: 0.0
+average reward score: 5.32421875
+-------------------------------------------------------------------------------------
+|E2E latency=3.32s |Gather latency=0.00s (0.00%) |Generate time=1.60s (48.26%) |Training time=1.62s (48.71%) |Others=0.10 (3.03%)|CurSamplesPerSec=9.63 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4510|ppo_ep: 1|act_loss: 0.010223388671875|cri_loss: 0.005870819091796875|unsuper_loss: 0.0
+average reward score: 6.625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.48s (22.28%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4511|ppo_ep: 1|act_loss: 0.033660888671875|cri_loss: 0.017913818359375|unsuper_loss: 0.0
+average reward score: 4.37109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.62%) |Training time=0.47s (21.72%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4512|ppo_ep: 1|act_loss: 0.00102996826171875|cri_loss: 0.00168609619140625|unsuper_loss: 0.0
+average reward score: 4.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.32%) |Training time=0.48s (22.06%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4513|ppo_ep: 1|act_loss: 0.00452423095703125|cri_loss: 0.0026302337646484375|unsuper_loss: 0.0
+average reward score: 5.8515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.55%) |Training time=0.47s (21.80%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4514|ppo_ep: 1|act_loss: 0.01007080078125|cri_loss: 0.005413055419921875|unsuper_loss: 0.0
+average reward score: 5.81640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (21.95%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4515|ppo_ep: 1|act_loss: 0.0038013458251953125|cri_loss: 0.00213623046875|unsuper_loss: 0.0
+average reward score: 6.515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.90%) |Training time=0.49s (22.49%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4516|ppo_ep: 1|act_loss: -0.00154876708984375|cri_loss: -0.00052642822265625|unsuper_loss: 0.0
+average reward score: 5.73046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.18%) |Training time=0.48s (22.19%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4517|ppo_ep: 1|act_loss: 0.007537841796875|cri_loss: 0.003902435302734375|unsuper_loss: 0.0
+average reward score: 5.9765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.12%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4518|ppo_ep: 1|act_loss: -0.0220947265625|cri_loss: -0.0106658935546875|unsuper_loss: 0.0
+average reward score: 5.01171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.18%) |Training time=0.48s (22.20%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+[2023-04-14 11:34:09,955] [INFO] [logging.py:96:log_dist] [Rank 0] step=4520, skipped=59, lr=[4.304026225914465e-06, 4.304026225914465e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:34:09,973] [INFO] [timer.py:199:stop] epoch=0/micro_step=4520/global_step=4520, RunningAvgSamplesPerSec=105.16232246066636, CurrSamplesPerSec=103.59375617466904, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:34:10,066] [INFO] [logging.py:96:log_dist] [Rank 0] step=4520, skipped=72, lr=[2.2425080505340897e-06, 2.2425080505340897e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4519|ppo_ep: 1|act_loss: -0.02069091796875|cri_loss: -0.00975799560546875|unsuper_loss: 0.0
+average reward score: 6.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.67%) |Training time=0.47s (21.72%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4520|ppo_ep: 1|act_loss: 0.0130767822265625|cri_loss: 0.006988525390625|unsuper_loss: 0.0
+average reward score: 6.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.39%) |Training time=0.50s (21.33%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.57 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4521|ppo_ep: 1|act_loss: 0.042938232421875|cri_loss: 0.0226898193359375|unsuper_loss: 0.0
+average reward score: 4.76171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.94%) |Training time=0.46s (21.41%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4522|ppo_ep: 1|act_loss: -0.0004248619079589844|cri_loss: 0.0001125335693359375|unsuper_loss: 0.0
+average reward score: 6.875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.65%) |Training time=0.47s (21.72%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4523|ppo_ep: 1|act_loss: 0.03631591796875|cri_loss: 0.01885986328125|unsuper_loss: 0.0
+average reward score: 5.453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.86%) |Training time=0.49s (22.51%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4524|ppo_ep: 1|act_loss: -0.006473541259765625|cri_loss: -0.003021240234375|unsuper_loss: 0.0
+average reward score: 5.91015625
+-------------------------------------------------------------------------------------
+|E2E latency=3.23s |Gather latency=0.00s (0.00%) |Generate time=1.60s (49.51%) |Training time=0.47s (14.71%) |Others=1.16 (35.78%)|CurSamplesPerSec=9.91 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4525|ppo_ep: 1|act_loss: -0.0032958984375|cri_loss: -0.0013866424560546875|unsuper_loss: 0.0
+average reward score: 5.44921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.08s |Gather latency=0.00s (0.00%) |Generate time=1.59s (76.58%) |Training time=0.39s (18.58%) |Others=0.10 (4.84%)|CurSamplesPerSec=15.38 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4526|ppo_ep: 1|act_loss: 0.0804443359375|cri_loss: 0.042755126953125|unsuper_loss: 0.0
+average reward score: 5.18359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.12%) |Training time=0.48s (22.10%) |Others=0.10 (4.78%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4527|ppo_ep: 1|act_loss: 0.0029087066650390625|cri_loss: 0.002635955810546875|unsuper_loss: 0.0
+average reward score: 6.203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.66%) |Training time=0.48s (21.78%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4528|ppo_ep: 1|act_loss: -0.0214691162109375|cri_loss: -0.0103302001953125|unsuper_loss: 0.0
+average reward score: 6.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.68s (73.67%) |Training time=0.49s (21.59%) |Others=0.11 (4.74%)|CurSamplesPerSec=14.01 |AvgSamplesPerSec=14.46
+[2023-04-14 11:34:33,000] [INFO] [logging.py:96:log_dist] [Rank 0] step=4530, skipped=59, lr=[4.285562527434903e-06, 4.285562527434903e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:34:33,018] [INFO] [timer.py:199:stop] epoch=0/micro_step=4530/global_step=4530, RunningAvgSamplesPerSec=105.15677733092694, CurrSamplesPerSec=97.51551941225235, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:34:33,111] [INFO] [logging.py:96:log_dist] [Rank 0] step=4530, skipped=72, lr=[2.2329361936377355e-06, 2.2329361936377355e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4529|ppo_ep: 1|act_loss: -0.037109375|cri_loss: -0.0172882080078125|unsuper_loss: 0.0
+average reward score: 4.9453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.89%) |Training time=0.49s (22.53%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4530|ppo_ep: 1|act_loss: 0.021636962890625|cri_loss: 0.011260986328125|unsuper_loss: 0.0
+average reward score: 4.91796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.95%) |Training time=0.49s (22.43%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4531|ppo_ep: 1|act_loss: -0.01393890380859375|cri_loss: -0.0066986083984375|unsuper_loss: 0.0
+average reward score: 4.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.17%) |Training time=0.48s (22.08%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4532|ppo_ep: 1|act_loss: 0.01395416259765625|cri_loss: 0.007312774658203125|unsuper_loss: 0.0
+average reward score: 5.16796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.00%) |Training time=0.49s (22.38%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4533|ppo_ep: 1|act_loss: 0.015045166015625|cri_loss: 0.0084991455078125|unsuper_loss: 0.0
+average reward score: 5.73046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.86%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4534|ppo_ep: 1|act_loss: 0.0009570121765136719|cri_loss: 0.0007576942443847656|unsuper_loss: 0.0
+average reward score: 5.8203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.10%) |Training time=0.48s (22.28%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4535|ppo_ep: 1|act_loss: -0.05126953125|cri_loss: -0.0240936279296875|unsuper_loss: 0.0
+average reward score: 5.32421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.69%) |Training time=0.48s (21.86%) |Others=0.12 (5.45%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4536|ppo_ep: 1|act_loss: 0.00452423095703125|cri_loss: 0.0024509429931640625|unsuper_loss: 0.0
+average reward score: 5.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.73s (74.96%) |Training time=0.47s (20.37%) |Others=0.11 (4.67%)|CurSamplesPerSec=13.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4537|ppo_ep: 1|act_loss: -0.0042266845703125|cri_loss: -0.0018224716186523438|unsuper_loss: 0.0
+average reward score: 5.3671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.19%) |Training time=0.48s (22.18%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+[2023-04-14 11:34:52,784] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 4538|ppo_ep: 1|act_loss: 0.03857421875|cri_loss: 0.019989013671875|unsuper_loss: 0.0
+average reward score: 5.02734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.48s (22.22%) |Others=0.09 (4.19%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+[2023-04-14 11:34:54,843] [INFO] [logging.py:96:log_dist] [Rank 0] step=4540, skipped=59, lr=[4.267106824720006e-06, 4.267106824720006e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:34:54,862] [INFO] [timer.py:199:stop] epoch=0/micro_step=4540/global_step=4540, RunningAvgSamplesPerSec=105.14597895413151, CurrSamplesPerSec=97.63909587372902, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:34:54,945] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+[2023-04-14 11:34:54,946] [INFO] [logging.py:96:log_dist] [Rank 0] step=4540, skipped=74, lr=[2.2252815514539662e-06, 2.2252815514539662e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4539|ppo_ep: 1|act_loss: -0.02557373046875|cri_loss: -0.0123748779296875|unsuper_loss: 0.0
+average reward score: 5.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.18%) |Training time=0.49s (22.68%) |Others=0.09 (4.14%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4540|ppo_ep: 1|act_loss: -0.005657196044921875|cri_loss: -0.0024356842041015625|unsuper_loss: 0.0
+average reward score: 5.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.71%) |Training time=0.49s (22.44%) |Others=0.11 (4.85%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4541|ppo_ep: 1|act_loss: 0.0125885009765625|cri_loss: 0.006679534912109375|unsuper_loss: 0.0
+average reward score: 5.93359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.06%) |Training time=0.48s (22.31%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4542|ppo_ep: 1|act_loss: -0.0191192626953125|cri_loss: -0.0088653564453125|unsuper_loss: 0.0
+average reward score: 5.3515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.99%) |Training time=0.48s (22.17%) |Others=0.10 (4.84%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4543|ppo_ep: 1|act_loss: -0.011566162109375|cri_loss: -0.005367279052734375|unsuper_loss: 0.0
+average reward score: 5.796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.13%) |Training time=0.48s (22.24%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4544|ppo_ep: 1|act_loss: 0.010528564453125|cri_loss: 0.00630950927734375|unsuper_loss: 0.0
+average reward score: 4.859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.27%) |Training time=0.48s (22.11%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4545|ppo_ep: 1|act_loss: -0.0128631591796875|cri_loss: -0.005611419677734375|unsuper_loss: 0.0
+average reward score: 5.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.21%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4546|ppo_ep: 1|act_loss: 0.047515869140625|cri_loss: 0.02423095703125|unsuper_loss: 0.0
+average reward score: 5.83203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.14%) |Training time=0.48s (22.23%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4547|ppo_ep: 1|act_loss: 0.0244140625|cri_loss: 0.01294708251953125|unsuper_loss: 0.0
+average reward score: 4.8515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.12%) |Training time=0.48s (22.26%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4548|ppo_ep: 1|act_loss: 0.0213470458984375|cri_loss: 0.0111083984375|unsuper_loss: 0.0
+average reward score: 4.62890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.84s |Gather latency=0.00s (0.00%) |Generate time=1.58s (55.63%) |Training time=0.48s (16.89%) |Others=0.78 (27.49%)|CurSamplesPerSec=11.25 |AvgSamplesPerSec=14.46
+[2023-04-14 11:35:17,159] [INFO] [logging.py:96:log_dist] [Rank 0] step=4550, skipped=59, lr=[4.248659391327812e-06, 4.248659391327812e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:35:17,177] [INFO] [timer.py:199:stop] epoch=0/micro_step=4550/global_step=4550, RunningAvgSamplesPerSec=105.13486238929258, CurrSamplesPerSec=100.88547102857268, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:35:17,270] [INFO] [logging.py:96:log_dist] [Rank 0] step=4550, skipped=74, lr=[2.2157169220241807e-06, 2.2157169220241807e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4549|ppo_ep: 1|act_loss: -0.029541015625|cri_loss: -0.01416015625|unsuper_loss: 0.0
+average reward score: 4.4453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.14%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4550|ppo_ep: 1|act_loss: 0.0008153915405273438|cri_loss: 0.000988006591796875|unsuper_loss: 0.0
+average reward score: 5.0234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.12%) |Training time=0.48s (20.57%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4551|ppo_ep: 1|act_loss: 0.0382080078125|cri_loss: 0.0198822021484375|unsuper_loss: 0.0
+average reward score: 4.80859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.52%) |Training time=0.48s (21.87%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4552|ppo_ep: 1|act_loss: 0.005889892578125|cri_loss: 0.00328826904296875|unsuper_loss: 0.0
+average reward score: 5.94921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (21.94%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4553|ppo_ep: 1|act_loss: 0.007633209228515625|cri_loss: 0.00445556640625|unsuper_loss: 0.0
+average reward score: 5.875
+-------------------------------------------------------------------------------------
+|E2E latency=2.51s |Gather latency=0.00s (0.00%) |Generate time=1.60s (63.76%) |Training time=0.46s (18.25%) |Others=0.45 (17.99%)|CurSamplesPerSec=12.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4554|ppo_ep: 1|act_loss: -0.04290771484375|cri_loss: -0.01824951171875|unsuper_loss: 0.0
+average reward score: 4.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.18%) |Training time=0.48s (22.17%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4555|ppo_ep: 1|act_loss: -0.011505126953125|cri_loss: -0.00542449951171875|unsuper_loss: 0.0
+average reward score: 4.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.93%) |Training time=0.49s (22.49%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4556|ppo_ep: 1|act_loss: 0.023284912109375|cri_loss: 0.01212310791015625|unsuper_loss: 0.0
+average reward score: 4.88671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.62s (71.91%) |Training time=0.49s (21.71%) |Others=0.14 (6.38%)|CurSamplesPerSec=14.20 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4557|ppo_ep: 1|act_loss: 0.02960205078125|cri_loss: 0.016326904296875|unsuper_loss: 0.0
+average reward score: 5.91015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.21%) |Training time=0.50s (22.29%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.37 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4558|ppo_ep: 1|act_loss: 0.0092010498046875|cri_loss: 0.005279541015625|unsuper_loss: 0.0
+average reward score: 5.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.32%) |Training time=0.48s (22.02%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+[2023-04-14 11:35:39,518] [INFO] [logging.py:96:log_dist] [Rank 0] step=4560, skipped=59, lr=[4.230220500693783e-06, 4.230220500693783e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:35:39,838] [INFO] [timer.py:199:stop] epoch=0/micro_step=4560/global_step=4560, RunningAvgSamplesPerSec=105.10160046034554, CurrSamplesPerSec=51.63946257422192, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:35:39,931] [INFO] [logging.py:96:log_dist] [Rank 0] step=4560, skipped=74, lr=[2.2061565063554063e-06, 2.2061565063554063e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4559|ppo_ep: 1|act_loss: -0.020111083984375|cri_loss: -0.00907135009765625|unsuper_loss: 0.0
+average reward score: 4.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.47s |Gather latency=0.00s (0.00%) |Generate time=1.59s (64.27%) |Training time=0.78s (31.69%) |Others=0.10 (4.04%)|CurSamplesPerSec=12.95 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4560|ppo_ep: 1|act_loss: -0.028472900390625|cri_loss: -0.01371002197265625|unsuper_loss: 0.0
+average reward score: 5.25
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.82%) |Training time=0.49s (22.56%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4561|ppo_ep: 1|act_loss: -0.017730712890625|cri_loss: -0.00830078125|unsuper_loss: 0.0
+average reward score: 4.1875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.17%) |Training time=0.48s (22.20%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4562|ppo_ep: 1|act_loss: 0.00510406494140625|cri_loss: 0.0028438568115234375|unsuper_loss: 0.0
+average reward score: 4.01171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.96%) |Training time=0.49s (22.42%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4563|ppo_ep: 1|act_loss: -0.00669097900390625|cri_loss: -0.003200531005859375|unsuper_loss: 0.0
+average reward score: 5.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.18%) |Training time=0.49s (22.25%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4564|ppo_ep: 1|act_loss: 0.0183563232421875|cri_loss: 0.009552001953125|unsuper_loss: 0.0
+average reward score: 4.5859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.62s |Gather latency=0.00s (0.00%) |Generate time=1.75s (66.91%) |Training time=0.51s (19.64%) |Others=0.35 (13.45%)|CurSamplesPerSec=12.22 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4565|ppo_ep: 1|act_loss: 0.029815673828125|cri_loss: 0.0155181884765625|unsuper_loss: 0.0
+average reward score: 6.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.13%) |Training time=0.48s (22.24%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4566|ppo_ep: 1|act_loss: 0.0841064453125|cri_loss: 0.044769287109375|unsuper_loss: 0.0
+average reward score: 4.203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.97%) |Training time=0.49s (22.42%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4567|ppo_ep: 1|act_loss: 0.030364990234375|cri_loss: 0.01641845703125|unsuper_loss: 0.0
+average reward score: 5.890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.51%) |Training time=0.50s (22.93%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4568|ppo_ep: 1|act_loss: -0.068603515625|cri_loss: -0.032928466796875|unsuper_loss: 0.0
+average reward score: 4.90625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.60%) |Training time=0.50s (22.81%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.46
+[2023-04-14 11:36:02,033] [INFO] [logging.py:96:log_dist] [Rank 0] step=4570, skipped=59, lr=[4.2117904261267626e-06, 4.2117904261267626e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:36:02,051] [INFO] [timer.py:199:stop] epoch=0/micro_step=4570/global_step=4570, RunningAvgSamplesPerSec=105.08257705936522, CurrSamplesPerSec=98.051808641592, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:36:02,144] [INFO] [logging.py:96:log_dist] [Rank 0] step=4570, skipped=74, lr=[2.1966004461560667e-06, 2.1966004461560667e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4569|ppo_ep: 1|act_loss: -0.0516357421875|cri_loss: -0.0239410400390625|unsuper_loss: 0.0
+average reward score: 4.5859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.89%) |Training time=0.49s (22.52%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4570|ppo_ep: 1|act_loss: 0.005802154541015625|cri_loss: 0.0033092498779296875|unsuper_loss: 0.0
+average reward score: 6.0078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.93%) |Training time=0.49s (22.46%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4571|ppo_ep: 1|act_loss: 0.0352783203125|cri_loss: 0.0202178955078125|unsuper_loss: 0.0
+average reward score: 4.08984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.21%) |Training time=0.48s (22.15%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4572|ppo_ep: 1|act_loss: 0.0212554931640625|cri_loss: 0.01184844970703125|unsuper_loss: 0.0
+average reward score: 4.58203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.10%) |Training time=0.48s (22.30%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4573|ppo_ep: 1|act_loss: 0.04534912109375|cri_loss: 0.0251312255859375|unsuper_loss: 0.0
+average reward score: 4.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.18%) |Training time=0.48s (22.20%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4574|ppo_ep: 1|act_loss: -0.0034160614013671875|cri_loss: -0.0014438629150390625|unsuper_loss: 0.0
+average reward score: 6.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.48s (22.35%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4575|ppo_ep: 1|act_loss: -0.0118560791015625|cri_loss: -0.002506256103515625|unsuper_loss: 0.0
+average reward score: 4.6484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.02%) |Training time=0.49s (22.38%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4576|ppo_ep: 1|act_loss: -0.0187225341796875|cri_loss: -0.007053375244140625|unsuper_loss: 0.0
+average reward score: 5.078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.75%) |Training time=0.48s (22.23%) |Others=0.11 (5.02%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4577|ppo_ep: 1|act_loss: 0.0162353515625|cri_loss: 0.0099639892578125|unsuper_loss: 0.0
+average reward score: 4.25
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.14%) |Training time=0.48s (22.24%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4578|ppo_ep: 1|act_loss: -0.002105712890625|cri_loss: -0.0005311965942382812|unsuper_loss: 0.0
+average reward score: 5.73046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.26%) |Training time=0.48s (22.13%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+[2023-04-14 11:36:23,920] [INFO] [logging.py:96:log_dist] [Rank 0] step=4580, skipped=59, lr=[4.193369440804912e-06, 4.193369440804912e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:36:23,938] [INFO] [timer.py:199:stop] epoch=0/micro_step=4580/global_step=4580, RunningAvgSamplesPerSec=105.06683239626007, CurrSamplesPerSec=86.84876269801265, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:36:24,031] [INFO] [logging.py:96:log_dist] [Rank 0] step=4580, skipped=74, lr=[2.1870488830700263e-06, 2.1870488830700263e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4579|ppo_ep: 1|act_loss: 0.02239990234375|cri_loss: 0.011810302734375|unsuper_loss: 0.0
+average reward score: 5.25390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.72s (72.70%) |Training time=0.54s (23.00%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.55 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4580|ppo_ep: 1|act_loss: -0.0028533935546875|cri_loss: -0.0010128021240234375|unsuper_loss: 0.0
+average reward score: 5.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.18%) |Training time=0.48s (22.18%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4581|ppo_ep: 1|act_loss: -0.03582763671875|cri_loss: -0.017059326171875|unsuper_loss: 0.0
+average reward score: 4.87109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (21.94%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4582|ppo_ep: 1|act_loss: -0.0109100341796875|cri_loss: -0.0051116943359375|unsuper_loss: 0.0
+average reward score: 4.60546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.93%) |Training time=0.49s (22.44%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4583|ppo_ep: 1|act_loss: -0.0345458984375|cri_loss: -0.015655517578125|unsuper_loss: 0.0
+average reward score: 5.1875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.24%) |Training time=0.46s (21.11%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4584|ppo_ep: 1|act_loss: 0.0039215087890625|cri_loss: 0.002899169921875|unsuper_loss: 0.0
+average reward score: 5.97265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.88%) |Training time=0.47s (21.47%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4585|ppo_ep: 1|act_loss: 0.016510009765625|cri_loss: 0.008544921875|unsuper_loss: 0.0
+average reward score: 5.85546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.28%) |Training time=0.46s (21.07%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4586|ppo_ep: 1|act_loss: 0.052734375|cri_loss: 0.0289154052734375|unsuper_loss: 0.0
+average reward score: 5.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.90%) |Training time=0.47s (20.66%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.16 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4587|ppo_ep: 1|act_loss: 0.012481689453125|cri_loss: 0.006565093994140625|unsuper_loss: 0.0
+average reward score: 5.6953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.69%) |Training time=0.47s (21.69%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4588|ppo_ep: 1|act_loss: 0.00595855712890625|cri_loss: 0.00421905517578125|unsuper_loss: 0.0
+average reward score: 5.51953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.78%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+[2023-04-14 11:36:45,723] [INFO] [logging.py:96:log_dist] [Rank 0] step=4590, skipped=59, lr=[4.174957817771677e-06, 4.174957817771677e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:36:45,741] [INFO] [timer.py:199:stop] epoch=0/micro_step=4590/global_step=4590, RunningAvgSamplesPerSec=105.06351756370773, CurrSamplesPerSec=102.14213537144157, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:36:45,834] [INFO] [logging.py:96:log_dist] [Rank 0] step=4590, skipped=74, lr=[2.1775019586744924e-06, 2.1775019586744924e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4589|ppo_ep: 1|act_loss: -0.00743865966796875|cri_loss: -0.003204345703125|unsuper_loss: 0.0
+average reward score: 4.12890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (21.98%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4590|ppo_ep: 1|act_loss: 0.003932952880859375|cri_loss: 0.0020923614501953125|unsuper_loss: 0.0
+average reward score: 5.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.87%) |Training time=0.47s (21.52%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4591|ppo_ep: 1|act_loss: -0.0245361328125|cri_loss: -0.01166534423828125|unsuper_loss: 0.0
+average reward score: 6.515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.87%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4592|ppo_ep: 1|act_loss: -0.0102691650390625|cri_loss: -0.0049285888671875|unsuper_loss: 0.0
+average reward score: 4.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (21.97%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4593|ppo_ep: 1|act_loss: -0.0169525146484375|cri_loss: -0.0081939697265625|unsuper_loss: 0.0
+average reward score: 5.5
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.67%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4594|ppo_ep: 1|act_loss: -0.0111541748046875|cri_loss: -0.0048980712890625|unsuper_loss: 0.0
+average reward score: 5.796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.48%) |Training time=0.47s (20.22%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4595|ppo_ep: 1|act_loss: -0.0110931396484375|cri_loss: -0.00519561767578125|unsuper_loss: 0.0
+average reward score: 5.21875
+-------------------------------------------------------------------------------------
+|E2E latency=2.45s |Gather latency=0.00s (0.00%) |Generate time=1.59s (65.04%) |Training time=0.47s (19.26%) |Others=0.38 (15.71%)|CurSamplesPerSec=13.08 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4596|ppo_ep: 1|act_loss: 0.0110931396484375|cri_loss: 0.005840301513671875|unsuper_loss: 0.0
+average reward score: 6.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.07s |Gather latency=0.00s (0.00%) |Generate time=1.59s (76.53%) |Training time=0.39s (18.65%) |Others=0.10 (4.82%)|CurSamplesPerSec=15.44 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4597|ppo_ep: 1|act_loss: 0.009765625|cri_loss: 0.005664825439453125|unsuper_loss: 0.0
+average reward score: 5.7421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.62%) |Training time=0.47s (21.73%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4598|ppo_ep: 1|act_loss: 0.00322723388671875|cri_loss: 0.0019207000732421875|unsuper_loss: 0.0
+average reward score: 5.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.90%) |Training time=0.47s (21.47%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
+[2023-04-14 11:37:07,765] [INFO] [logging.py:96:log_dist] [Rank 0] step=4600, skipped=59, lr=[4.156555829931723e-06, 4.156555829931723e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:37:07,783] [INFO] [timer.py:199:stop] epoch=0/micro_step=4600/global_step=4600, RunningAvgSamplesPerSec=105.06512917590997, CurrSamplesPerSec=95.71679557963769, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:37:07,876] [INFO] [logging.py:96:log_dist] [Rank 0] step=4600, skipped=74, lr=[2.167959814477915e-06, 2.167959814477915e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4599|ppo_ep: 1|act_loss: 0.0211944580078125|cri_loss: 0.011505126953125|unsuper_loss: 0.0
+average reward score: 5.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.69%) |Training time=0.50s (22.75%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4600|ppo_ep: 1|act_loss: 0.0211334228515625|cri_loss: 0.010833740234375|unsuper_loss: 0.0
+average reward score: 5.8984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.96s |Gather latency=0.00s (0.00%) |Generate time=1.58s (53.31%) |Training time=0.50s (16.89%) |Others=0.88 (29.80%)|CurSamplesPerSec=10.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4601|ppo_ep: 1|act_loss: -0.0022735595703125|cri_loss: -0.0004930496215820312|unsuper_loss: 0.0
+average reward score: 5.046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.67%) |Training time=0.49s (22.71%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4602|ppo_ep: 1|act_loss: 0.00644683837890625|cri_loss: 0.0035648345947265625|unsuper_loss: 0.0
+average reward score: 5.68359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.65%) |Training time=0.49s (22.73%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4603|ppo_ep: 1|act_loss: 0.0165252685546875|cri_loss: 0.0088958740234375|unsuper_loss: 0.0
+average reward score: 5.25390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.97%) |Training time=0.49s (22.35%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4604|ppo_ep: 1|act_loss: -0.01346588134765625|cri_loss: -0.00623321533203125|unsuper_loss: 0.0
+average reward score: 5.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.75%) |Training time=0.49s (22.65%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4605|ppo_ep: 1|act_loss: 0.021270751953125|cri_loss: 0.01096343994140625|unsuper_loss: 0.0
+average reward score: 5.01171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.56%) |Training time=0.50s (22.83%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4606|ppo_ep: 1|act_loss: -0.000988006591796875|cri_loss: -0.00018930435180664062|unsuper_loss: 0.0
+average reward score: 4.8046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.19%) |Training time=0.48s (22.17%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4607|ppo_ep: 1|act_loss: -0.0102691650390625|cri_loss: -0.0048065185546875|unsuper_loss: 0.0
+average reward score: 6.5546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.78%) |Training time=0.49s (22.63%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4608|ppo_ep: 1|act_loss: -0.01401519775390625|cri_loss: -0.00673675537109375|unsuper_loss: 0.0
+average reward score: 5.94921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.58s (69.12%) |Training time=0.52s (22.89%) |Others=0.18 (7.99%)|CurSamplesPerSec=13.99 |AvgSamplesPerSec=14.46
+[2023-04-14 11:37:30,415] [INFO] [logging.py:96:log_dist] [Rank 0] step=4610, skipped=59, lr=[4.138163750046903e-06, 4.138163750046903e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:37:30,433] [INFO] [timer.py:199:stop] epoch=0/micro_step=4610/global_step=4610, RunningAvgSamplesPerSec=105.04432744786926, CurrSamplesPerSec=99.1981883570604, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:37:30,526] [INFO] [logging.py:96:log_dist] [Rank 0] step=4610, skipped=74, lr=[2.158422591917891e-06, 2.158422591917891e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4609|ppo_ep: 1|act_loss: -0.00725555419921875|cri_loss: -0.0033588409423828125|unsuper_loss: 0.0
+average reward score: 5.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.07%) |Training time=0.49s (22.31%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4610|ppo_ep: 1|act_loss: -0.04510498046875|cri_loss: -0.021087646484375|unsuper_loss: 0.0
+average reward score: 5.828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.76%) |Training time=0.49s (22.64%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4611|ppo_ep: 1|act_loss: 0.01213836669921875|cri_loss: 0.0068511962890625|unsuper_loss: 0.0
+average reward score: 6.0078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.93%) |Training time=0.49s (22.43%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4612|ppo_ep: 1|act_loss: 0.027618408203125|cri_loss: 0.0142364501953125|unsuper_loss: 0.0
+average reward score: 4.88671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.07%) |Training time=0.49s (22.30%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4613|ppo_ep: 1|act_loss: 0.021514892578125|cri_loss: 0.01171875|unsuper_loss: 0.0
+average reward score: 5.97265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.97%) |Training time=0.49s (22.42%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46
+[2023-04-14 11:37:41,322] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 4614|ppo_ep: 1|act_loss: 0.03173828125|cri_loss: 0.0161895751953125|unsuper_loss: 0.0
+average reward score: 5.9921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.21%) |Training time=0.46s (20.90%) |Others=0.11 (4.88%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.46
+[2023-04-14 11:37:43,569] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 4615|ppo_ep: 1|act_loss: -0.00678253173828125|cri_loss: -0.0028896331787109375|unsuper_loss: 0.0
+average reward score: 4.5859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.68s (75.08%) |Training time=0.46s (20.46%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.29 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4616|ppo_ep: 1|act_loss: -0.0020618438720703125|cri_loss: -0.0008721351623535156|unsuper_loss: 0.0
+average reward score: 5.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.32%) |Training time=0.48s (22.07%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4617|ppo_ep: 1|act_loss: 0.01251220703125|cri_loss: 0.006763458251953125|unsuper_loss: 0.0
+average reward score: 5.57421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.78%) |Training time=0.49s (22.64%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4618|ppo_ep: 1|act_loss: 0.0031261444091796875|cri_loss: 0.0018663406372070312|unsuper_loss: 0.0
+average reward score: 4.828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (21.94%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+[2023-04-14 11:37:52,245] [INFO] [logging.py:96:log_dist] [Rank 0] step=4620, skipped=61, lr=[4.123457403069056e-06, 4.123457403069056e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:37:52,264] [INFO] [timer.py:199:stop] epoch=0/micro_step=4620/global_step=4620, RunningAvgSamplesPerSec=105.03470887489762, CurrSamplesPerSec=100.67064495708186, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:37:52,356] [INFO] [logging.py:96:log_dist] [Rank 0] step=4620, skipped=74, lr=[2.1488904323590644e-06, 2.1488904323590644e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4619|ppo_ep: 1|act_loss: -0.00931549072265625|cri_loss: -0.00424957275390625|unsuper_loss: 0.0
+average reward score: 5.91796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.28%) |Training time=0.48s (22.14%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4620|ppo_ep: 1|act_loss: -0.026458740234375|cri_loss: -0.011993408203125|unsuper_loss: 0.0
+average reward score: 5.3515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.18%) |Training time=0.48s (22.21%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4621|ppo_ep: 1|act_loss: -0.027191162109375|cri_loss: -0.01263427734375|unsuper_loss: 0.0
+average reward score: 5.39453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.48s (22.38%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4622|ppo_ep: 1|act_loss: 0.015960693359375|cri_loss: 0.00853729248046875|unsuper_loss: 0.0
+average reward score: 5.46484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.68%) |Training time=0.49s (22.72%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4623|ppo_ep: 1|act_loss: 0.032012939453125|cri_loss: 0.0174407958984375|unsuper_loss: 0.0
+average reward score: 6.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.48s (22.37%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4624|ppo_ep: 1|act_loss: 0.031524658203125|cri_loss: 0.0169219970703125|unsuper_loss: 0.0
+average reward score: 3.92578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.88%) |Training time=0.49s (20.82%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4625|ppo_ep: 1|act_loss: 0.0192718505859375|cri_loss: 0.0108184814453125|unsuper_loss: 0.0
+average reward score: 6.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.08%) |Training time=0.48s (22.30%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4626|ppo_ep: 1|act_loss: 0.014068603515625|cri_loss: 0.0085296630859375|unsuper_loss: 0.0
+average reward score: 6.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.86%) |Training time=0.49s (22.52%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4627|ppo_ep: 1|act_loss: 0.0254974365234375|cri_loss: 0.01367950439453125|unsuper_loss: 0.0
+average reward score: 5.01953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.86%) |Training time=0.49s (22.53%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4628|ppo_ep: 1|act_loss: 0.06097412109375|cri_loss: 0.032196044921875|unsuper_loss: 0.0
+average reward score: 6.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.01%) |Training time=0.46s (21.36%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
+[2023-04-14 11:38:14,122] [INFO] [logging.py:96:log_dist] [Rank 0] step=4630, skipped=61, lr=[4.1050838443919875e-06, 4.1050838443919875e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:38:14,140] [INFO] [timer.py:199:stop] epoch=0/micro_step=4630/global_step=4630, RunningAvgSamplesPerSec=105.02180375590667, CurrSamplesPerSec=99.11819206302553, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:38:14,233] [INFO] [logging.py:96:log_dist] [Rank 0] step=4630, skipped=74, lr=[2.1393634770910363e-06, 2.1393634770910363e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4629|ppo_ep: 1|act_loss: 0.0362548828125|cri_loss: 0.0199737548828125|unsuper_loss: 0.0
+average reward score: 4.56640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.10%) |Training time=0.49s (22.32%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4630|ppo_ep: 1|act_loss: 0.004764556884765625|cri_loss: 0.003452301025390625|unsuper_loss: 0.0
+average reward score: 4.5546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.59s (75.43%) |Training time=0.42s (19.79%) |Others=0.10 (4.77%)|CurSamplesPerSec=15.19 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4631|ppo_ep: 1|act_loss: 0.002178192138671875|cri_loss: 0.0012903213500976562|unsuper_loss: 0.0
+average reward score: 5.15234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.31%) |Training time=0.48s (22.08%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4632|ppo_ep: 1|act_loss: -0.0230865478515625|cri_loss: -0.01128387451171875|unsuper_loss: 0.0
+average reward score: 5.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.14%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4633|ppo_ep: 1|act_loss: -0.037933349609375|cri_loss: -0.0178375244140625|unsuper_loss: 0.0
+average reward score: 4.98828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (21.97%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4634|ppo_ep: 1|act_loss: 0.00516510009765625|cri_loss: 0.0031719207763671875|unsuper_loss: 0.0
+average reward score: 4.69140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.52%) |Training time=0.47s (21.85%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4635|ppo_ep: 1|act_loss: -0.004116058349609375|cri_loss: -0.0011539459228515625|unsuper_loss: 0.0
+average reward score: 5.58984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (21.97%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4636|ppo_ep: 1|act_loss: -0.0214996337890625|cri_loss: -0.01045989990234375|unsuper_loss: 0.0
+average reward score: 5.44140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.19%) |Training time=0.48s (22.20%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4637|ppo_ep: 1|act_loss: -0.0006070137023925781|cri_loss: -5.626678466796875e-05|unsuper_loss: 0.0
+average reward score: 5.19921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.84%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4638|ppo_ep: 1|act_loss: -0.007564544677734375|cri_loss: -0.00339508056640625|unsuper_loss: 0.0
+average reward score: 6.296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.34%) |Training time=0.48s (22.05%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+[2023-04-14 11:38:36,000] [INFO] [logging.py:96:log_dist] [Rank 0] step=4640, skipped=61, lr=[4.086720956609049e-06, 4.086720956609049e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:38:36,018] [INFO] [timer.py:199:stop] epoch=0/micro_step=4640/global_step=4640, RunningAvgSamplesPerSec=105.01773699493148, CurrSamplesPerSec=98.99807414433666, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:38:36,111] [INFO] [logging.py:96:log_dist] [Rank 0] step=4640, skipped=74, lr=[2.1298418673262655e-06, 2.1298418673262655e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4639|ppo_ep: 1|act_loss: 0.024658203125|cri_loss: 0.01282501220703125|unsuper_loss: 0.0
+average reward score: 4.68359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.98%) |Training time=0.49s (20.77%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.46
+[2023-04-14 11:38:38,285] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 4640|ppo_ep: 1|act_loss: 0.0004324913024902344|cri_loss: 0.0012264251708984375|unsuper_loss: 0.0
+average reward score: 4.83203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.49s (22.46%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+[2023-04-14 11:38:40,445] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 4641|ppo_ep: 1|act_loss: 0.0021190643310546875|cri_loss: 0.0013751983642578125|unsuper_loss: 0.0
+average reward score: 5.32421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.70%) |Training time=0.48s (22.16%) |Others=0.09 (4.14%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4642|ppo_ep: 1|act_loss: 0.04876708984375|cri_loss: 0.0262603759765625|unsuper_loss: 0.0
+average reward score: 5.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.21%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4643|ppo_ep: 1|act_loss: 0.0234375|cri_loss: 0.011993408203125|unsuper_loss: 0.0
+average reward score: 6.7265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.67%) |Training time=0.43s (19.68%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4644|ppo_ep: 1|act_loss: -0.04034423828125|cri_loss: -0.018829345703125|unsuper_loss: 0.0
+average reward score: 6.83203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.79%) |Training time=0.47s (20.79%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.12 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4645|ppo_ep: 1|act_loss: -0.008087158203125|cri_loss: -0.0038051605224609375|unsuper_loss: 0.0
+average reward score: 4.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.84%) |Training time=0.47s (21.57%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4646|ppo_ep: 1|act_loss: 0.0191802978515625|cri_loss: 0.009796142578125|unsuper_loss: 0.0
+average reward score: 5.59375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.15%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4647|ppo_ep: 1|act_loss: 0.001789093017578125|cri_loss: 0.0009784698486328125|unsuper_loss: 0.0
+average reward score: 5.765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.75%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4648|ppo_ep: 1|act_loss: 0.019317626953125|cri_loss: 0.010040283203125|unsuper_loss: 0.0
+average reward score: 5.015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.47s (21.93%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+[2023-04-14 11:38:57,764] [INFO] [logging.py:96:log_dist] [Rank 0] step=4650, skipped=61, lr=[4.068369011902537e-06, 4.068369011902537e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:38:57,782] [INFO] [timer.py:199:stop] epoch=0/micro_step=4650/global_step=4650, RunningAvgSamplesPerSec=105.01619755698859, CurrSamplesPerSec=101.73921361882245, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:38:57,875] [INFO] [logging.py:96:log_dist] [Rank 0] step=4650, skipped=76, lr=[2.122228523121039e-06, 2.122228523121039e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4649|ppo_ep: 1|act_loss: 0.0022907257080078125|cri_loss: 0.00200653076171875|unsuper_loss: 0.0
+average reward score: 5.609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (22.02%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4650|ppo_ep: 1|act_loss: -0.032958984375|cri_loss: -0.0160064697265625|unsuper_loss: 0.0
+average reward score: 5.38671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.75%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4651|ppo_ep: 1|act_loss: -0.0117034912109375|cri_loss: -0.00518798828125|unsuper_loss: 0.0
+average reward score: 5.9296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.89%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4652|ppo_ep: 1|act_loss: 0.003528594970703125|cri_loss: 0.002811431884765625|unsuper_loss: 0.0
+average reward score: 5.703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.33%) |Training time=0.48s (22.04%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4653|ppo_ep: 1|act_loss: -0.0008487701416015625|cri_loss: -0.0002982616424560547|unsuper_loss: 0.0
+average reward score: 5.48046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.81%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4654|ppo_ep: 1|act_loss: 0.020538330078125|cri_loss: 0.010650634765625|unsuper_loss: 0.0
+average reward score: 4.80078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.59s (69.68%) |Training time=0.59s (25.94%) |Others=0.10 (4.38%)|CurSamplesPerSec=14.03 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4655|ppo_ep: 1|act_loss: 0.006198883056640625|cri_loss: 0.003208160400390625|unsuper_loss: 0.0
+average reward score: 5.26171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.51%) |Training time=0.47s (21.86%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4656|ppo_ep: 1|act_loss: 0.007274627685546875|cri_loss: 0.003936767578125|unsuper_loss: 0.0
+average reward score: 4.94921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.13%) |Training time=0.46s (21.21%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4657|ppo_ep: 1|act_loss: 0.002826690673828125|cri_loss: 0.0015611648559570312|unsuper_loss: 0.0
+average reward score: 5.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.60s (67.47%) |Training time=0.47s (19.75%) |Others=0.30 (12.77%)|CurSamplesPerSec=13.54 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4658|ppo_ep: 1|act_loss: 0.003002166748046875|cri_loss: 0.0018138885498046875|unsuper_loss: 0.0
+average reward score: 5.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.78%) |Training time=0.44s (20.49%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
+[2023-04-14 11:39:19,693] [INFO] [logging.py:96:log_dist] [Rank 0] step=4660, skipped=61, lr=[4.050028282292539e-06, 4.050028282292539e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:39:19,711] [INFO] [timer.py:199:stop] epoch=0/micro_step=4660/global_step=4660, RunningAvgSamplesPerSec=105.00860834725098, CurrSamplesPerSec=103.96174217485263, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:39:19,804] [INFO] [logging.py:96:log_dist] [Rank 0] step=4660, skipped=76, lr=[2.112716890863282e-06, 2.112716890863282e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4659|ppo_ep: 1|act_loss: 0.00225830078125|cri_loss: 0.001560211181640625|unsuper_loss: 0.0
+average reward score: 5.0859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.66%) |Training time=0.47s (21.72%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4660|ppo_ep: 1|act_loss: 0.019073486328125|cri_loss: 0.010650634765625|unsuper_loss: 0.0
+average reward score: 6.0
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.56%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4661|ppo_ep: 1|act_loss: -0.0088043212890625|cri_loss: -0.004238128662109375|unsuper_loss: 0.0
+average reward score: 5.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.73%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4662|ppo_ep: 1|act_loss: -0.019439697265625|cri_loss: -0.00907135009765625|unsuper_loss: 0.0
+average reward score: 5.453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.73%) |Training time=0.46s (21.46%) |Others=0.10 (4.81%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4663|ppo_ep: 1|act_loss: 0.0100860595703125|cri_loss: 0.0054473876953125|unsuper_loss: 0.0
+average reward score: 5.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.66%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4664|ppo_ep: 1|act_loss: -0.0088348388671875|cri_loss: -0.00360870361328125|unsuper_loss: 0.0
+average reward score: 4.9765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.69%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4665|ppo_ep: 1|act_loss: -0.02349853515625|cri_loss: -0.01110076904296875|unsuper_loss: 0.0
+average reward score: 6.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.66%) |Training time=0.47s (21.70%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4666|ppo_ep: 1|act_loss: 0.011749267578125|cri_loss: 0.0064697265625|unsuper_loss: 0.0
+average reward score: 5.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.54%) |Training time=0.47s (21.82%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4667|ppo_ep: 1|act_loss: 0.03271484375|cri_loss: 0.0168304443359375|unsuper_loss: 0.0
+average reward score: 4.91796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.76%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4668|ppo_ep: 1|act_loss: 0.068115234375|cri_loss: 0.036285400390625|unsuper_loss: 0.0
+average reward score: 5.5859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.45%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
+[2023-04-14 11:39:41,490] [INFO] [logging.py:96:log_dist] [Rank 0] step=4670, skipped=61, lr=[4.031699039632916e-06, 4.031699039632916e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:39:41,509] [INFO] [timer.py:199:stop] epoch=0/micro_step=4670/global_step=4670, RunningAvgSamplesPerSec=105.0078285540475, CurrSamplesPerSec=105.11653158130927, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:39:41,603] [INFO] [logging.py:96:log_dist] [Rank 0] step=4670, skipped=76, lr=[2.1032109990754734e-06, 2.1032109990754734e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4669|ppo_ep: 1|act_loss: 0.0181427001953125|cri_loss: 0.00955963134765625|unsuper_loss: 0.0
+average reward score: 4.52734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.52%) |Training time=0.47s (20.09%) |Others=0.10 (4.39%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4670|ppo_ep: 1|act_loss: 0.024658203125|cri_loss: 0.0130615234375|unsuper_loss: 0.0
+average reward score: 6.765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.32%) |Training time=0.48s (22.07%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4671|ppo_ep: 1|act_loss: -0.014862060546875|cri_loss: -0.006984710693359375|unsuper_loss: 0.0
+average reward score: 5.3203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.48s (21.93%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4672|ppo_ep: 1|act_loss: -0.0279693603515625|cri_loss: -0.01361083984375|unsuper_loss: 0.0
+average reward score: 5.20703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.96%) |Training time=0.48s (21.79%) |Others=0.12 (5.25%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4673|ppo_ep: 1|act_loss: -0.02764892578125|cri_loss: -0.01336669921875|unsuper_loss: 0.0
+average reward score: 4.77734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.64s (72.68%) |Training time=0.52s (22.83%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.18 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4674|ppo_ep: 1|act_loss: -0.03143310546875|cri_loss: -0.0149383544921875|unsuper_loss: 0.0
+average reward score: 5.03515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.69%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4675|ppo_ep: 1|act_loss: -0.027587890625|cri_loss: -0.01340484619140625|unsuper_loss: 0.0
+average reward score: 5.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.28%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4676|ppo_ep: 1|act_loss: -0.04046630859375|cri_loss: -0.01983642578125|unsuper_loss: 0.0
+average reward score: 4.7109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.84%) |Training time=0.46s (21.53%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4677|ppo_ep: 1|act_loss: -0.0153961181640625|cri_loss: -0.007396697998046875|unsuper_loss: 0.0
+average reward score: 5.234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.49%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4678|ppo_ep: 1|act_loss: -0.02301025390625|cri_loss: -0.0111541748046875|unsuper_loss: 0.0
+average reward score: 6.125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.46s (21.59%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
+[2023-04-14 11:40:03,192] [INFO] [logging.py:96:log_dist] [Rank 0] step=4680, skipped=61, lr=[4.01338155560726e-06, 4.01338155560726e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:40:03,210] [INFO] [timer.py:199:stop] epoch=0/micro_step=4680/global_step=4680, RunningAvgSamplesPerSec=105.00428085351663, CurrSamplesPerSec=107.38758437052944, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:40:03,303] [INFO] [logging.py:96:log_dist] [Rank 0] step=4680, skipped=76, lr=[2.093710988657859e-06, 2.093710988657859e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4679|ppo_ep: 1|act_loss: -0.0083160400390625|cri_loss: -0.003635406494140625|unsuper_loss: 0.0
+average reward score: 6.09375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.42%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4680|ppo_ep: 1|act_loss: -0.017486572265625|cri_loss: -0.00780487060546875|unsuper_loss: 0.0
+average reward score: 6.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.97%) |Training time=0.46s (21.37%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4681|ppo_ep: 1|act_loss: 0.015594482421875|cri_loss: 0.00809478759765625|unsuper_loss: 0.0
+average reward score: 5.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.94%) |Training time=0.46s (21.41%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4682|ppo_ep: 1|act_loss: 0.006862640380859375|cri_loss: 0.003787994384765625|unsuper_loss: 0.0
+average reward score: 4.71875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.07%) |Training time=0.46s (21.27%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4683|ppo_ep: 1|act_loss: 0.016693115234375|cri_loss: 0.00894927978515625|unsuper_loss: 0.0
+average reward score: 4.73046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.91%) |Training time=0.46s (21.44%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4684|ppo_ep: 1|act_loss: 0.0419921875|cri_loss: 0.021636962890625|unsuper_loss: 0.0
+average reward score: 5.06640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.20%) |Training time=0.48s (20.51%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4685|ppo_ep: 1|act_loss: 0.02496337890625|cri_loss: 0.01329803466796875|unsuper_loss: 0.0
+average reward score: 5.35546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.23%) |Training time=0.45s (21.10%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4686|ppo_ep: 1|act_loss: -0.03363037109375|cri_loss: -0.01654052734375|unsuper_loss: 0.0
+average reward score: 5.23828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.99%) |Training time=0.46s (21.37%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4687|ppo_ep: 1|act_loss: -0.0013904571533203125|cri_loss: -0.0002593994140625|unsuper_loss: 0.0
+average reward score: 5.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.61s (70.13%) |Training time=0.46s (20.03%) |Others=0.23 (9.84%)|CurSamplesPerSec=13.97 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4688|ppo_ep: 1|act_loss: -0.010955810546875|cri_loss: -0.0045623779296875|unsuper_loss: 0.0
+average reward score: 6.203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.19%) |Training time=0.48s (22.17%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46
+[2023-04-14 11:40:25,038] [INFO] [logging.py:96:log_dist] [Rank 0] step=4690, skipped=61, lr=[3.995076101724873e-06, 3.995076101724873e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:40:25,057] [INFO] [timer.py:199:stop] epoch=0/micro_step=4690/global_step=4690, RunningAvgSamplesPerSec=105.00716011355068, CurrSamplesPerSec=106.80988441863944, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:40:25,149] [INFO] [logging.py:96:log_dist] [Rank 0] step=4690, skipped=76, lr=[2.084217000423512e-06, 2.084217000423512e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4689|ppo_ep: 1|act_loss: -0.0246734619140625|cri_loss: -0.01197052001953125|unsuper_loss: 0.0
+average reward score: 6.93359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.89%) |Training time=0.46s (21.49%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4690|ppo_ep: 1|act_loss: -0.03106689453125|cri_loss: -0.014862060546875|unsuper_loss: 0.0
+average reward score: 5.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.22%) |Training time=0.45s (21.10%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4691|ppo_ep: 1|act_loss: 0.003841400146484375|cri_loss: 0.002197265625|unsuper_loss: 0.0
+average reward score: 6.0078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.97%) |Training time=0.46s (21.39%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4692|ppo_ep: 1|act_loss: 0.023040771484375|cri_loss: 0.01206207275390625|unsuper_loss: 0.0
+average reward score: 6.5234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.31%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4693|ppo_ep: 1|act_loss: 0.033447265625|cri_loss: 0.0173492431640625|unsuper_loss: 0.0
+average reward score: 5.86328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.31%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4694|ppo_ep: 1|act_loss: 0.0143585205078125|cri_loss: 0.0074462890625|unsuper_loss: 0.0
+average reward score: 5.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.29%) |Training time=0.45s (21.02%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4695|ppo_ep: 1|act_loss: 0.026763916015625|cri_loss: 0.01410675048828125|unsuper_loss: 0.0
+average reward score: 4.625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.05%) |Training time=0.46s (21.30%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4696|ppo_ep: 1|act_loss: -0.000705718994140625|cri_loss: -0.0001049041748046875|unsuper_loss: 0.0
+average reward score: 5.35546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.46s (21.40%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4697|ppo_ep: 1|act_loss: -0.001068115234375|cri_loss: -0.0003914833068847656|unsuper_loss: 0.0
+average reward score: 5.453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.12%) |Training time=0.46s (21.21%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4698|ppo_ep: 1|act_loss: 0.015380859375|cri_loss: 0.00798797607421875|unsuper_loss: 0.0
+average reward score: 4.73828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.99%) |Training time=0.46s (20.87%) |Others=0.16 (7.14%)|CurSamplesPerSec=14.49 |AvgSamplesPerSec=14.46
+[2023-04-14 11:40:46,698] [INFO] [logging.py:96:log_dist] [Rank 0] step=4700, skipped=61, lr=[3.97678294931674e-06, 3.97678294931674e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:40:46,716] [INFO] [timer.py:199:stop] epoch=0/micro_step=4700/global_step=4700, RunningAvgSamplesPerSec=105.01293390159111, CurrSamplesPerSec=100.83158391355171, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:40:46,810] [INFO] [logging.py:96:log_dist] [Rank 0] step=4700, skipped=76, lr=[2.07472917509624e-06, 2.07472917509624e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4699|ppo_ep: 1|act_loss: -0.00714874267578125|cri_loss: -0.0033435821533203125|unsuper_loss: 0.0
+average reward score: 4.5546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.68s (74.13%) |Training time=0.48s (21.13%) |Others=0.11 (4.73%)|CurSamplesPerSec=14.09 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4700|ppo_ep: 1|act_loss: -0.027618408203125|cri_loss: -0.01334381103515625|unsuper_loss: 0.0
+average reward score: 5.33984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.58%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4701|ppo_ep: 1|act_loss: -0.0284271240234375|cri_loss: -0.0135498046875|unsuper_loss: 0.0
+average reward score: 5.75390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.46s (21.52%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4702|ppo_ep: 1|act_loss: 0.0157012939453125|cri_loss: 0.00835418701171875|unsuper_loss: 0.0
+average reward score: 5.9296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.70%) |Training time=0.47s (21.69%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4703|ppo_ep: 1|act_loss: 0.0203857421875|cri_loss: 0.010467529296875|unsuper_loss: 0.0
+average reward score: 5.60546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.69s (73.58%) |Training time=0.51s (22.04%) |Others=0.10 (4.38%)|CurSamplesPerSec=13.95 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4704|ppo_ep: 1|act_loss: -0.0079803466796875|cri_loss: -0.003173828125|unsuper_loss: 0.0
+average reward score: 7.0
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.87%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4705|ppo_ep: 1|act_loss: 0.034271240234375|cri_loss: 0.0179901123046875|unsuper_loss: 0.0
+average reward score: 5.671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.10%) |Training time=0.46s (21.24%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4706|ppo_ep: 1|act_loss: 0.0245819091796875|cri_loss: 0.01285552978515625|unsuper_loss: 0.0
+average reward score: 6.3359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.98%) |Training time=0.46s (21.37%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4707|ppo_ep: 1|act_loss: 0.0191192626953125|cri_loss: 0.010101318359375|unsuper_loss: 0.0
+average reward score: 6.9375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.45s (21.12%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4708|ppo_ep: 1|act_loss: -0.004428863525390625|cri_loss: -0.0017147064208984375|unsuper_loss: 0.0
+average reward score: 4.609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.11%) |Training time=0.46s (21.23%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+[2023-04-14 11:41:08,383] [INFO] [logging.py:96:log_dist] [Rank 0] step=4710, skipped=61, lr=[3.9585023695315105e-06, 3.9585023695315105e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:41:08,402] [INFO] [timer.py:199:stop] epoch=0/micro_step=4710/global_step=4710, RunningAvgSamplesPerSec=105.01362133774731, CurrSamplesPerSec=109.07168687720736, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:41:08,494] [INFO] [logging.py:96:log_dist] [Rank 0] step=4710, skipped=76, lr=[2.0652476533085043e-06, 2.0652476533085043e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4709|ppo_ep: 1|act_loss: 0.0088348388671875|cri_loss: 0.00482940673828125|unsuper_loss: 0.0
+average reward score: 6.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.09%) |Training time=0.46s (21.25%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4710|ppo_ep: 1|act_loss: -0.0226593017578125|cri_loss: -0.0111083984375|unsuper_loss: 0.0
+average reward score: 5.72265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.46s (21.53%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4711|ppo_ep: 1|act_loss: -0.02349853515625|cri_loss: -0.01152801513671875|unsuper_loss: 0.0
+average reward score: 5.12109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.79%) |Training time=0.46s (21.55%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4712|ppo_ep: 1|act_loss: 0.0088348388671875|cri_loss: 0.005279541015625|unsuper_loss: 0.0
+average reward score: 6.04296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.98%) |Training time=0.46s (21.36%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4713|ppo_ep: 1|act_loss: -0.007274627685546875|cri_loss: -0.00310516357421875|unsuper_loss: 0.0
+average reward score: 4.375
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.71%) |Training time=0.49s (20.99%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.66 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4714|ppo_ep: 1|act_loss: 0.004566192626953125|cri_loss: 0.002834320068359375|unsuper_loss: 0.0
+average reward score: 4.8984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.07%) |Training time=0.46s (21.24%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4715|ppo_ep: 1|act_loss: 0.0113983154296875|cri_loss: 0.005950927734375|unsuper_loss: 0.0
+average reward score: 5.3203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.91%) |Training time=0.46s (21.44%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
+[2023-04-14 11:41:23,609] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 4716|ppo_ep: 1|act_loss: 0.078857421875|cri_loss: 0.044708251953125|unsuper_loss: 0.0
+average reward score: 5.390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.92s |Gather latency=0.00s (0.00%) |Generate time=1.59s (54.54%) |Training time=1.23s (42.03%) |Others=0.10 (3.43%)|CurSamplesPerSec=10.98 |AvgSamplesPerSec=14.46
+[2023-04-14 11:41:26,520] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 4717|ppo_ep: 1|act_loss: 0.02716064453125|cri_loss: 0.01409912109375|unsuper_loss: 0.0
+average reward score: 5.77734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.59s (75.21%) |Training time=0.42s (20.05%) |Others=0.10 (4.74%)|CurSamplesPerSec=15.11 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4718|ppo_ep: 1|act_loss: 0.00347137451171875|cri_loss: 0.0022602081298828125|unsuper_loss: 0.0
+average reward score: 5.9296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.47s (21.93%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+[2023-04-14 11:41:30,826] [INFO] [logging.py:96:log_dist] [Rank 0] step=4720, skipped=63, lr=[3.943887140084842e-06, 3.943887140084842e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:41:30,845] [INFO] [timer.py:199:stop] epoch=0/micro_step=4720/global_step=4720, RunningAvgSamplesPerSec=104.96272361672763, CurrSamplesPerSec=110.75861112853234, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:41:30,938] [INFO] [logging.py:96:log_dist] [Rank 0] step=4720, skipped=76, lr=[2.0557725755993283e-06, 2.0557725755993283e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4719|ppo_ep: 1|act_loss: -0.0146484375|cri_loss: -0.005100250244140625|unsuper_loss: 0.0
+average reward score: 5.58203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.46%) |Training time=0.45s (20.90%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4720|ppo_ep: 1|act_loss: 0.005779266357421875|cri_loss: 0.003086090087890625|unsuper_loss: 0.0
+average reward score: 5.26171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.33%) |Training time=0.43s (19.98%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4721|ppo_ep: 1|act_loss: -0.016845703125|cri_loss: -0.00811004638671875|unsuper_loss: 0.0
+average reward score: 4.7265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.84%) |Training time=0.44s (20.49%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4722|ppo_ep: 1|act_loss: -0.03753662109375|cri_loss: -0.01436614990234375|unsuper_loss: 0.0
+average reward score: 5.58203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.03%) |Training time=0.43s (20.29%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4723|ppo_ep: 1|act_loss: -0.008636474609375|cri_loss: -0.0040435791015625|unsuper_loss: 0.0
+average reward score: 5.83984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.61%) |Training time=0.45s (20.72%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4724|ppo_ep: 1|act_loss: 0.004852294921875|cri_loss: 0.002864837646484375|unsuper_loss: 0.0
+average reward score: 4.96875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.82%) |Training time=0.44s (20.51%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4725|ppo_ep: 1|act_loss: 0.0152740478515625|cri_loss: 0.008697509765625|unsuper_loss: 0.0
+average reward score: 4.87890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.87%) |Training time=0.44s (20.43%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4726|ppo_ep: 1|act_loss: -0.005199432373046875|cri_loss: -0.000682830810546875|unsuper_loss: 0.0
+average reward score: 5.92578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.68%) |Training time=0.44s (20.65%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4727|ppo_ep: 1|act_loss: 0.023590087890625|cri_loss: 0.0131072998046875|unsuper_loss: 0.0
+average reward score: 4.58984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.45s (20.78%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4728|ppo_ep: 1|act_loss: 0.003345489501953125|cri_loss: 0.0030059814453125|unsuper_loss: 0.0
+average reward score: 5.6484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.79s (75.61%) |Training time=0.48s (20.16%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.52 |AvgSamplesPerSec=14.46
+[2023-04-14 11:41:52,512] [INFO] [logging.py:96:log_dist] [Rank 0] step=4730, skipped=63, lr=[3.925629873718018e-06, 3.925629873718018e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:41:52,531] [INFO] [timer.py:199:stop] epoch=0/micro_step=4730/global_step=4730, RunningAvgSamplesPerSec=104.9805842763996, CurrSamplesPerSec=115.75283565613357, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:41:52,623] [INFO] [logging.py:96:log_dist] [Rank 0] step=4730, skipped=76, lr=[2.046304082412223e-06, 2.046304082412223e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4729|ppo_ep: 1|act_loss: 0.015625|cri_loss: 0.0095672607421875|unsuper_loss: 0.0
+average reward score: 7.2734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.89%) |Training time=0.44s (20.46%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4730|ppo_ep: 1|act_loss: 0.0770263671875|cri_loss: 0.041961669921875|unsuper_loss: 0.0
+average reward score: 6.16796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.61%) |Training time=0.45s (20.75%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4731|ppo_ep: 1|act_loss: -0.009735107421875|cri_loss: -0.003570556640625|unsuper_loss: 0.0
+average reward score: 4.68359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.58%) |Training time=0.45s (20.79%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4732|ppo_ep: 1|act_loss: -0.0060272216796875|cri_loss: -0.002765655517578125|unsuper_loss: 0.0
+average reward score: 5.0
+-------------------------------------------------------------------------------------
+|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.64s (71.88%) |Training time=0.54s (23.74%) |Others=0.10 (4.38%)|CurSamplesPerSec=13.99 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4733|ppo_ep: 1|act_loss: -0.005260467529296875|cri_loss: -0.00251007080078125|unsuper_loss: 0.0
+average reward score: 6.58203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.28%) |Training time=0.49s (22.16%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4734|ppo_ep: 1|act_loss: 0.019622802734375|cri_loss: 0.0103607177734375|unsuper_loss: 0.0
+average reward score: 5.94140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.71%) |Training time=0.44s (20.64%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4735|ppo_ep: 1|act_loss: -0.00572967529296875|cri_loss: -0.002567291259765625|unsuper_loss: 0.0
+average reward score: 5.63671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.85%) |Training time=0.47s (21.48%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4736|ppo_ep: 1|act_loss: -0.02288818359375|cri_loss: -0.010772705078125|unsuper_loss: 0.0
+average reward score: 6.9375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.48%) |Training time=0.45s (20.87%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4737|ppo_ep: 1|act_loss: 0.0115203857421875|cri_loss: 0.006496429443359375|unsuper_loss: 0.0
+average reward score: 6.45703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.45%) |Training time=0.45s (20.90%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4738|ppo_ep: 1|act_loss: -0.00241851806640625|cri_loss: -0.0006160736083984375|unsuper_loss: 0.0
+average reward score: 6.13671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.46s (21.20%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46
+[2023-04-14 11:42:14,294] [INFO] [logging.py:96:log_dist] [Rank 0] step=4740, skipped=63, lr=[3.907385938186079e-06, 3.907385938186079e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:42:14,312] [INFO] [timer.py:199:stop] epoch=0/micro_step=4740/global_step=4740, RunningAvgSamplesPerSec=104.98486849818003, CurrSamplesPerSec=111.83245012364935, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:42:14,405] [INFO] [logging.py:96:log_dist] [Rank 0] step=4740, skipped=76, lr=[2.0368423140930975e-06, 2.0368423140930975e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4739|ppo_ep: 1|act_loss: -0.01392364501953125|cri_loss: -0.006786346435546875|unsuper_loss: 0.0
+average reward score: 4.703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.57%) |Training time=0.45s (20.80%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46
+[2023-04-14 11:42:16,448] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
+epoch: 0|step: 4740|ppo_ep: 1|act_loss: -0.0205078125|cri_loss: -0.00921630859375|unsuper_loss: 0.0
+average reward score: 5.4296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.29%) |Training time=0.43s (20.02%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4741|ppo_ep: 1|act_loss: -0.02386474609375|cri_loss: -0.01117706298828125|unsuper_loss: 0.0
+average reward score: 5.06640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.73%) |Training time=0.44s (20.62%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.47
+[2023-04-14 11:42:20,842] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 4742|ppo_ep: 1|act_loss: -0.0082855224609375|cri_loss: -0.004001617431640625|unsuper_loss: 0.0
+average reward score: 4.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.95%) |Training time=0.45s (20.86%) |Others=0.09 (4.19%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.47
+[2023-04-14 11:42:23,156] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 4743|ppo_ep: 1|act_loss: -0.0103607177734375|cri_loss: -0.0032501220703125|unsuper_loss: 0.0
+average reward score: 4.859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.93%) |Training time=0.44s (19.19%) |Others=0.09 (3.88%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.47
+[2023-04-14 11:42:25,190] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192
+epoch: 0|step: 4744|ppo_ep: 1|act_loss: -0.0399169921875|cri_loss: -0.0182647705078125|unsuper_loss: 0.0
+average reward score: 6.47265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.48%) |Training time=0.42s (19.83%) |Others=0.10 (4.69%)|CurSamplesPerSec=15.03 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4745|ppo_ep: 1|act_loss: -0.00701141357421875|cri_loss: -0.003337860107421875|unsuper_loss: 0.0
+average reward score: 5.31640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.59%) |Training time=0.45s (20.78%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4746|ppo_ep: 1|act_loss: 0.006175994873046875|cri_loss: 0.00450897216796875|unsuper_loss: 0.0
+average reward score: 6.01953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.86s |Gather latency=0.00s (0.00%) |Generate time=1.61s (56.27%) |Training time=0.44s (15.55%) |Others=0.81 (28.18%)|CurSamplesPerSec=11.20 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4747|ppo_ep: 1|act_loss: -0.00533294677734375|cri_loss: -0.002384185791015625|unsuper_loss: 0.0
+average reward score: 4.984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.99%) |Training time=0.44s (20.32%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4748|ppo_ep: 1|act_loss: 0.005939483642578125|cri_loss: 0.003276824951171875|unsuper_loss: 0.0
+average reward score: 5.9140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.11%) |Training time=0.46s (21.27%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46
+[2023-04-14 11:42:36,660] [INFO] [logging.py:96:log_dist] [Rank 0] step=4750, skipped=65, lr=[3.892800569690045e-06, 3.892800569690045e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:42:36,679] [INFO] [timer.py:199:stop] epoch=0/micro_step=4750/global_step=4750, RunningAvgSamplesPerSec=105.00306105783682, CurrSamplesPerSec=112.30368528329925, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:42:36,771] [INFO] [logging.py:96:log_dist] [Rank 0] step=4750, skipped=78, lr=[2.0292778355916135e-06, 2.0292778355916135e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4749|ppo_ep: 1|act_loss: -0.00264739990234375|cri_loss: -0.0008673667907714844|unsuper_loss: 0.0
+average reward score: 6.3046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.62%) |Training time=0.45s (20.73%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4750|ppo_ep: 1|act_loss: 0.000370025634765625|cri_loss: 0.0034427642822265625|unsuper_loss: 0.0
+average reward score: 5.9921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.83%) |Training time=0.44s (20.48%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4751|ppo_ep: 1|act_loss: 0.01898193359375|cri_loss: 0.00974273681640625|unsuper_loss: 0.0
+average reward score: 6.25
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.66%) |Training time=0.44s (20.67%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4752|ppo_ep: 1|act_loss: -0.0025463104248046875|cri_loss: -0.0009636878967285156|unsuper_loss: 0.0
+average reward score: 5.4453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.69%) |Training time=0.44s (20.63%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46
+epoch: 0|step: 4753|ppo_ep: 1|act_loss: 0.02203369140625|cri_loss: 0.011383056640625|unsuper_loss: 0.0
+average reward score: 5.05859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.61%) |Training time=0.45s (20.72%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4754|ppo_ep: 1|act_loss: 0.0150146484375|cri_loss: 0.00759124755859375|unsuper_loss: 0.0
+average reward score: 6.05078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.87%) |Training time=0.44s (20.46%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4755|ppo_ep: 1|act_loss: 0.00328826904296875|cri_loss: 0.002582550048828125|unsuper_loss: 0.0
+average reward score: 5.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.83%) |Training time=0.44s (20.54%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4756|ppo_ep: 1|act_loss: -0.01629638671875|cri_loss: -0.00687408447265625|unsuper_loss: 0.0
+average reward score: 6.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.61%) |Training time=0.45s (20.73%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4757|ppo_ep: 1|act_loss: -0.0019683837890625|cri_loss: -0.0006279945373535156|unsuper_loss: 0.0
+average reward score: 4.71484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.31%) |Training time=0.45s (19.34%) |Others=0.10 (4.35%)|CurSamplesPerSec=13.78 |AvgSamplesPerSec=14.47
+[2023-04-14 11:42:56,174] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096
+epoch: 0|step: 4758|ppo_ep: 1|act_loss: -0.021881103515625|cri_loss: -0.009552001953125|unsuper_loss: 0.0
+average reward score: 4.94921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.61%) |Training time=0.42s (19.67%) |Others=0.10 (4.72%)|CurSamplesPerSec=15.09 |AvgSamplesPerSec=14.47
+[2023-04-14 11:42:58,305] [INFO] [logging.py:96:log_dist] [Rank 0] step=4760, skipped=66, lr=[3.876402607369461e-06, 3.876402607369461e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:42:58,323] [INFO] [timer.py:199:stop] epoch=0/micro_step=4760/global_step=4760, RunningAvgSamplesPerSec=105.02179852561878, CurrSamplesPerSec=113.14903772794369, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:42:58,416] [INFO] [logging.py:96:log_dist] [Rank 0] step=4760, skipped=78, lr=[2.0198285253870464e-06, 2.0198285253870464e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4759|ppo_ep: 1|act_loss: -0.00461578369140625|cri_loss: -0.002063751220703125|unsuper_loss: 0.0
+average reward score: 4.8203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.63%) |Training time=0.45s (20.73%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4760|ppo_ep: 1|act_loss: 0.00736236572265625|cri_loss: 0.003910064697265625|unsuper_loss: 0.0
+average reward score: 5.21875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.52%) |Training time=0.45s (20.83%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4761|ppo_ep: 1|act_loss: -0.047882080078125|cri_loss: -0.023040771484375|unsuper_loss: 0.0
+average reward score: 6.04296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.20%) |Training time=0.44s (20.21%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4762|ppo_ep: 1|act_loss: -0.001644134521484375|cri_loss: -0.0004086494445800781|unsuper_loss: 0.0
+average reward score: 4.51171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.71s (76.02%) |Training time=0.44s (19.52%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4763|ppo_ep: 1|act_loss: 0.0257568359375|cri_loss: 0.01320648193359375|unsuper_loss: 0.0
+average reward score: 5.1015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.28%) |Training time=0.45s (21.05%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4764|ppo_ep: 1|act_loss: -0.0240020751953125|cri_loss: -0.0116119384765625|unsuper_loss: 0.0
+average reward score: 4.6171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.62%) |Training time=0.45s (20.70%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4765|ppo_ep: 1|act_loss: 0.0045318603515625|cri_loss: 0.002452850341796875|unsuper_loss: 0.0
+average reward score: 5.484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.65%) |Training time=0.44s (20.68%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4766|ppo_ep: 1|act_loss: -0.0030364990234375|cri_loss: -0.0010528564453125|unsuper_loss: 0.0
+average reward score: 6.16796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.95%) |Training time=0.44s (20.35%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4767|ppo_ep: 1|act_loss: 0.06121826171875|cri_loss: 0.03350830078125|unsuper_loss: 0.0
+average reward score: 5.7421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.81%) |Training time=0.44s (20.49%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4768|ppo_ep: 1|act_loss: -0.02301025390625|cri_loss: -0.0109710693359375|unsuper_loss: 0.0
+average reward score: 4.9375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.02%) |Training time=0.43s (20.25%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.47
+[2023-04-14 11:43:19,934] [INFO] [logging.py:96:log_dist] [Rank 0] step=4770, skipped=66, lr=[3.8581960152626685e-06, 3.8581960152626685e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:43:19,953] [INFO] [timer.py:199:stop] epoch=0/micro_step=4770/global_step=4770, RunningAvgSamplesPerSec=105.03973265983572, CurrSamplesPerSec=115.99823346444069, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:43:20,046] [INFO] [logging.py:96:log_dist] [Rank 0] step=4770, skipped=78, lr=[2.010386332482083e-06, 2.010386332482083e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4769|ppo_ep: 1|act_loss: -0.02618408203125|cri_loss: -0.0127410888671875|unsuper_loss: 0.0
+average reward score: 5.09375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.86%) |Training time=0.44s (20.46%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4770|ppo_ep: 1|act_loss: -0.01104736328125|cri_loss: -0.00519561767578125|unsuper_loss: 0.0
+average reward score: 4.78515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.92%) |Training time=0.44s (20.38%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4771|ppo_ep: 1|act_loss: 0.024688720703125|cri_loss: 0.01568603515625|unsuper_loss: 0.0
+average reward score: 5.25390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.57%) |Training time=0.45s (20.77%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4772|ppo_ep: 1|act_loss: -0.039794921875|cri_loss: -0.0181884765625|unsuper_loss: 0.0
+average reward score: 5.48046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.94%) |Training time=0.46s (20.59%) |Others=0.17 (7.47%)|CurSamplesPerSec=14.35 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4773|ppo_ep: 1|act_loss: -0.0227508544921875|cri_loss: -0.01081085205078125|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.54%) |Training time=0.43s (19.84%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4774|ppo_ep: 1|act_loss: -0.018585205078125|cri_loss: -0.00902557373046875|unsuper_loss: 0.0
+average reward score: 6.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.70%) |Training time=0.44s (20.64%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4775|ppo_ep: 1|act_loss: -0.01849365234375|cri_loss: -0.008697509765625|unsuper_loss: 0.0
+average reward score: 5.1171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.96s |Gather latency=0.00s (0.00%) |Generate time=1.61s (54.33%) |Training time=0.44s (14.90%) |Others=0.91 (30.77%)|CurSamplesPerSec=10.83 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4776|ppo_ep: 1|act_loss: -0.00054931640625|cri_loss: -2.384185791015625e-05|unsuper_loss: 0.0
+average reward score: 4.90234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.81%) |Training time=0.44s (20.50%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4777|ppo_ep: 1|act_loss: 0.01239013671875|cri_loss: 0.006465911865234375|unsuper_loss: 0.0
+average reward score: 6.9921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.88%) |Training time=0.44s (20.43%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4778|ppo_ep: 1|act_loss: 0.029632568359375|cri_loss: 0.0154571533203125|unsuper_loss: 0.0
+average reward score: 4.79296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.75%) |Training time=0.44s (20.56%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.47
+[2023-04-14 11:43:42,352] [INFO] [logging.py:96:log_dist] [Rank 0] step=4780, skipped=66, lr=[3.840003753523217e-06, 3.840003753523217e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:43:42,371] [INFO] [timer.py:199:stop] epoch=0/micro_step=4780/global_step=4780, RunningAvgSamplesPerSec=105.0575480387665, CurrSamplesPerSec=113.63751879597393, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:43:42,463] [INFO] [logging.py:96:log_dist] [Rank 0] step=4780, skipped=78, lr=[2.000951396832801e-06, 2.000951396832801e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4779|ppo_ep: 1|act_loss: -0.0135650634765625|cri_loss: -0.005779266357421875|unsuper_loss: 0.0
+average reward score: 5.31640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.73%) |Training time=0.45s (20.62%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4780|ppo_ep: 1|act_loss: 0.04937744140625|cri_loss: 0.0261383056640625|unsuper_loss: 0.0
+average reward score: 5.17578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.55%) |Training time=0.45s (20.79%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4781|ppo_ep: 1|act_loss: 0.011749267578125|cri_loss: 0.007293701171875|unsuper_loss: 0.0
+average reward score: 4.75
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.86%) |Training time=0.44s (20.47%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4782|ppo_ep: 1|act_loss: 0.03631591796875|cri_loss: 0.019195556640625|unsuper_loss: 0.0
+average reward score: 4.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.88%) |Training time=0.44s (20.44%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4783|ppo_ep: 1|act_loss: 0.034027099609375|cri_loss: 0.01751708984375|unsuper_loss: 0.0
+average reward score: 5.6171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.16%) |Training time=0.44s (20.18%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4784|ppo_ep: 1|act_loss: 0.0131072998046875|cri_loss: 0.007442474365234375|unsuper_loss: 0.0
+average reward score: 5.18359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.83%) |Training time=0.44s (20.48%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4785|ppo_ep: 1|act_loss: 0.00457000732421875|cri_loss: 0.002666473388671875|unsuper_loss: 0.0
+average reward score: 4.93359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.70%) |Training time=0.44s (20.63%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4786|ppo_ep: 1|act_loss: -0.018707275390625|cri_loss: -0.0084686279296875|unsuper_loss: 0.0
+average reward score: 5.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.93%) |Training time=0.44s (20.39%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4787|ppo_ep: 1|act_loss: -0.018280029296875|cri_loss: -0.00768280029296875|unsuper_loss: 0.0
+average reward score: 5.20703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.48%) |Training time=0.44s (19.18%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.86 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4788|ppo_ep: 1|act_loss: 0.0078887939453125|cri_loss: 0.0048980712890625|unsuper_loss: 0.0
+average reward score: 5.4921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.85%) |Training time=0.44s (20.47%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.47
+[2023-04-14 11:44:03,971] [INFO] [logging.py:96:log_dist] [Rank 0] step=4790, skipped=66, lr=[3.821826091804312e-06, 3.821826091804312e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:44:03,990] [INFO] [timer.py:199:stop] epoch=0/micro_step=4790/global_step=4790, RunningAvgSamplesPerSec=105.07718399184161, CurrSamplesPerSec=115.61762668881086, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:44:04,082] [INFO] [logging.py:96:log_dist] [Rank 0] step=4790, skipped=78, lr=[1.9915238582877077e-06, 1.9915238582877077e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4789|ppo_ep: 1|act_loss: -0.032135009765625|cri_loss: -0.006683349609375|unsuper_loss: 0.0
+average reward score: 5.58984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.82%) |Training time=0.44s (20.50%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.47
+[2023-04-14 11:44:06,141] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 4096, reducing to 2048
+epoch: 0|step: 4790|ppo_ep: 1|act_loss: -0.0193939208984375|cri_loss: -0.0087890625|unsuper_loss: 0.0
+average reward score: 5.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.49%) |Training time=0.43s (19.85%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4791|ppo_ep: 1|act_loss: -0.03594970703125|cri_loss: -0.017120361328125|unsuper_loss: 0.0
+average reward score: 4.75390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.98%) |Training time=0.44s (19.55%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.28 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4792|ppo_ep: 1|act_loss: -0.00423431396484375|cri_loss: -0.0019321441650390625|unsuper_loss: 0.0
+average reward score: 5.50390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.77%) |Training time=0.44s (20.55%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4793|ppo_ep: 1|act_loss: -0.0269622802734375|cri_loss: -0.0128021240234375|unsuper_loss: 0.0
+average reward score: 5.71875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.58%) |Training time=0.45s (20.73%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4794|ppo_ep: 1|act_loss: 0.04144287109375|cri_loss: 0.023712158203125|unsuper_loss: 0.0
+average reward score: 3.994140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.43%) |Training time=0.45s (20.90%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4795|ppo_ep: 1|act_loss: 0.0204315185546875|cri_loss: 0.0131378173828125|unsuper_loss: 0.0
+average reward score: 5.4921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.75%) |Training time=0.44s (20.57%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4796|ppo_ep: 1|act_loss: 0.0016412734985351562|cri_loss: 0.000988006591796875|unsuper_loss: 0.0
+average reward score: 4.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.02%) |Training time=0.44s (20.29%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4797|ppo_ep: 1|act_loss: 0.01119232177734375|cri_loss: 0.0069122314453125|unsuper_loss: 0.0
+average reward score: 5.45703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.59%) |Training time=0.45s (20.76%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4798|ppo_ep: 1|act_loss: -0.0167083740234375|cri_loss: -0.00778961181640625|unsuper_loss: 0.0
+average reward score: 5.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.75%) |Training time=0.44s (20.58%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.47
+[2023-04-14 11:44:25,543] [INFO] [logging.py:96:log_dist] [Rank 0] step=4800, skipped=67, lr=[3.8054789019689153e-06, 3.8054789019689153e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:44:25,562] [INFO] [timer.py:199:stop] epoch=0/micro_step=4800/global_step=4800, RunningAvgSamplesPerSec=105.09690332358787, CurrSamplesPerSec=114.88991285099068, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:44:25,655] [INFO] [logging.py:96:log_dist] [Rank 0] step=4800, skipped=78, lr=[1.9821038565856653e-06, 1.9821038565856653e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4799|ppo_ep: 1|act_loss: -0.030059814453125|cri_loss: -0.01418304443359375|unsuper_loss: 0.0
+average reward score: 7.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.81%) |Training time=0.44s (20.54%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4800|ppo_ep: 1|act_loss: 0.0014495849609375|cri_loss: 0.0008115768432617188|unsuper_loss: 0.0
+average reward score: 6.1640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.13%) |Training time=0.43s (20.16%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4801|ppo_ep: 1|act_loss: 0.007587432861328125|cri_loss: 0.004497528076171875|unsuper_loss: 0.0
+average reward score: 5.3203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.86%) |Training time=0.44s (20.48%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4802|ppo_ep: 1|act_loss: 0.025054931640625|cri_loss: 0.017242431640625|unsuper_loss: 0.0
+average reward score: 4.765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.44%) |Training time=0.45s (19.22%) |Others=0.10 (4.34%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4803|ppo_ep: 1|act_loss: 0.03131103515625|cri_loss: 0.016448974609375|unsuper_loss: 0.0
+average reward score: 4.046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.67%) |Training time=0.44s (20.67%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4804|ppo_ep: 1|act_loss: 0.006938934326171875|cri_loss: 0.00620269775390625|unsuper_loss: 0.0
+average reward score: 5.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.45s (20.79%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4805|ppo_ep: 1|act_loss: -0.022186279296875|cri_loss: -0.00853729248046875|unsuper_loss: 0.0
+average reward score: 5.29296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.54s |Gather latency=0.00s (0.00%) |Generate time=1.60s (63.07%) |Training time=0.44s (17.42%) |Others=0.50 (19.51%)|CurSamplesPerSec=12.59 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4806|ppo_ep: 1|act_loss: -0.0245361328125|cri_loss: -0.01068115234375|unsuper_loss: 0.0
+average reward score: 5.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.79%) |Training time=0.44s (20.51%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4807|ppo_ep: 1|act_loss: -0.003475189208984375|cri_loss: 0.0003528594970703125|unsuper_loss: 0.0
+average reward score: 4.68359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.62%) |Training time=0.45s (20.72%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4808|ppo_ep: 1|act_loss: -0.023895263671875|cri_loss: -0.011199951171875|unsuper_loss: 0.0
+average reward score: 5.62109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.68%) |Training time=0.44s (20.63%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.47
+[2023-04-14 11:44:47,592] [INFO] [logging.py:96:log_dist] [Rank 0] step=4810, skipped=67, lr=[3.7873297224053474e-06, 3.7873297224053474e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:44:47,610] [INFO] [timer.py:199:stop] epoch=0/micro_step=4810/global_step=4810, RunningAvgSamplesPerSec=105.11458737215244, CurrSamplesPerSec=111.97529193575703, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:44:47,702] [INFO] [logging.py:96:log_dist] [Rank 0] step=4810, skipped=78, lr=[1.972691531353826e-06, 1.972691531353826e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4809|ppo_ep: 1|act_loss: -0.017120361328125|cri_loss: -0.008392333984375|unsuper_loss: 0.0
+average reward score: 5.8984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.55%) |Training time=0.45s (20.80%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4810|ppo_ep: 1|act_loss: -0.052734375|cri_loss: -0.025604248046875|unsuper_loss: 0.0
+average reward score: 4.78515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.85%) |Training time=0.44s (20.53%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4811|ppo_ep: 1|act_loss: -0.004974365234375|cri_loss: -0.0013408660888671875|unsuper_loss: 0.0
+average reward score: 4.6953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.79%) |Training time=0.44s (20.53%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4812|ppo_ep: 1|act_loss: 0.007965087890625|cri_loss: 0.0068359375|unsuper_loss: 0.0
+average reward score: 5.62890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.59%) |Training time=0.45s (20.74%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4813|ppo_ep: 1|act_loss: 0.013336181640625|cri_loss: 0.007358551025390625|unsuper_loss: 0.0
+average reward score: 6.703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.60%) |Training time=0.45s (20.73%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4814|ppo_ep: 1|act_loss: 0.03265380859375|cri_loss: 0.0167083740234375|unsuper_loss: 0.0
+average reward score: 5.16015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.45s (20.73%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4815|ppo_ep: 1|act_loss: 0.0244293212890625|cri_loss: 0.01416015625|unsuper_loss: 0.0
+average reward score: 4.9375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.67%) |Training time=0.44s (20.65%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4816|ppo_ep: 1|act_loss: 0.01470947265625|cri_loss: 0.0077667236328125|unsuper_loss: 0.0
+average reward score: 7.00390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.76%) |Training time=0.44s (20.58%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4817|ppo_ep: 1|act_loss: 0.013702392578125|cri_loss: 0.007625579833984375|unsuper_loss: 0.0
+average reward score: 5.1015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.99%) |Training time=0.46s (20.71%) |Others=0.16 (7.30%)|CurSamplesPerSec=14.40 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4818|ppo_ep: 1|act_loss: 0.027618408203125|cri_loss: 0.01415252685546875|unsuper_loss: 0.0
+average reward score: 5.9140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.79%) |Training time=0.44s (20.42%) |Others=0.10 (4.79%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.47
+[2023-04-14 11:45:09,187] [INFO] [logging.py:96:log_dist] [Rank 0] step=4820, skipped=67, lr=[3.769195923618539e-06, 3.769195923618539e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:45:09,206] [INFO] [timer.py:199:stop] epoch=0/micro_step=4820/global_step=4820, RunningAvgSamplesPerSec=105.12987027841997, CurrSamplesPerSec=109.2328892433651, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:45:09,298] [INFO] [logging.py:96:log_dist] [Rank 0] step=4820, skipped=78, lr=[1.9632870221055535e-06, 1.9632870221055535e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4819|ppo_ep: 1|act_loss: 0.0164794921875|cri_loss: 0.00868988037109375|unsuper_loss: 0.0
+average reward score: 4.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.33%) |Training time=0.46s (21.06%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4820|ppo_ep: 1|act_loss: 0.02825927734375|cri_loss: 0.01436614990234375|unsuper_loss: 0.0
+average reward score: 4.9609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.82%) |Training time=0.44s (20.27%) |Others=0.11 (4.91%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4821|ppo_ep: 1|act_loss: -0.001445770263671875|cri_loss: -0.0004239082336425781|unsuper_loss: 0.0
+average reward score: 5.0859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.69s (75.59%) |Training time=0.45s (19.93%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.30 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4822|ppo_ep: 1|act_loss: -0.024169921875|cri_loss: -0.01146697998046875|unsuper_loss: 0.0
+average reward score: 4.38671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.60%) |Training time=0.45s (20.74%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4823|ppo_ep: 1|act_loss: -0.0151824951171875|cri_loss: -0.00701141357421875|unsuper_loss: 0.0
+average reward score: 4.98046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.45s (20.77%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4824|ppo_ep: 1|act_loss: -0.048370361328125|cri_loss: -0.0208587646484375|unsuper_loss: 0.0
+average reward score: 5.0
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.33%) |Training time=0.45s (21.02%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4825|ppo_ep: 1|act_loss: -0.0100555419921875|cri_loss: -0.004581451416015625|unsuper_loss: 0.0
+average reward score: 5.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.90%) |Training time=0.44s (20.44%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4826|ppo_ep: 1|act_loss: 0.054901123046875|cri_loss: 0.029449462890625|unsuper_loss: 0.0
+average reward score: 4.85546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.62%) |Training time=0.44s (20.06%) |Others=0.16 (7.32%)|CurSamplesPerSec=14.49 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4827|ppo_ep: 1|act_loss: -0.01763916015625|cri_loss: -0.008514404296875|unsuper_loss: 0.0
+average reward score: 4.390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.68%) |Training time=0.44s (20.63%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4828|ppo_ep: 1|act_loss: 0.0816650390625|cri_loss: 0.044952392578125|unsuper_loss: 0.0
+average reward score: 4.6953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.89%) |Training time=0.44s (20.43%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.47
+[2023-04-14 11:45:30,886] [INFO] [logging.py:96:log_dist] [Rank 0] step=4830, skipped=67, lr=[3.7510777743951295e-06, 3.7510777743951295e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:45:30,905] [INFO] [timer.py:199:stop] epoch=0/micro_step=4830/global_step=4830, RunningAvgSamplesPerSec=105.14553518088816, CurrSamplesPerSec=112.7394056175783, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:45:30,998] [INFO] [logging.py:96:log_dist] [Rank 0] step=4830, skipped=78, lr=[1.9538904682383637e-06, 1.9538904682383637e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4829|ppo_ep: 1|act_loss: 0.00909423828125|cri_loss: 0.0047149658203125|unsuper_loss: 0.0
+average reward score: 5.10546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.60%) |Training time=0.45s (20.77%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4830|ppo_ep: 1|act_loss: -0.000301361083984375|cri_loss: 0.00032329559326171875|unsuper_loss: 0.0
+average reward score: 6.6796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.69%) |Training time=0.44s (20.64%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4831|ppo_ep: 1|act_loss: -0.0018453598022460938|cri_loss: -0.0007162094116210938|unsuper_loss: 0.0
+average reward score: 4.7421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.69%) |Training time=0.44s (20.65%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4832|ppo_ep: 1|act_loss: 0.0035800933837890625|cri_loss: 0.00217437744140625|unsuper_loss: 0.0
+average reward score: 5.5703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.78s (78.49%) |Training time=0.39s (17.07%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.13 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4833|ppo_ep: 1|act_loss: 0.0008764266967773438|cri_loss: 0.0006198883056640625|unsuper_loss: 0.0
+average reward score: 3.99609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.77%) |Training time=0.44s (20.58%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4834|ppo_ep: 1|act_loss: -0.00731658935546875|cri_loss: -0.0033473968505859375|unsuper_loss: 0.0
+average reward score: 4.3125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.45s (20.78%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4835|ppo_ep: 1|act_loss: -0.0066375732421875|cri_loss: -0.003192901611328125|unsuper_loss: 0.0
+average reward score: 4.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.96%) |Training time=0.44s (20.35%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4836|ppo_ep: 1|act_loss: 0.0096588134765625|cri_loss: 0.005191802978515625|unsuper_loss: 0.0
+average reward score: 5.4296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.68%) |Training time=0.44s (20.67%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4837|ppo_ep: 1|act_loss: -0.018707275390625|cri_loss: -0.00823974609375|unsuper_loss: 0.0
+average reward score: 5.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.53%) |Training time=0.45s (20.80%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4838|ppo_ep: 1|act_loss: 0.005157470703125|cri_loss: 0.005462646484375|unsuper_loss: 0.0
+average reward score: 5.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.62s (69.41%) |Training time=0.44s (18.96%) |Others=0.27 (11.63%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.47
+[2023-04-14 11:45:52,873] [INFO] [logging.py:96:log_dist] [Rank 0] step=4840, skipped=67, lr=[3.7329755432898005e-06, 3.7329755432898005e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:45:52,891] [INFO] [timer.py:199:stop] epoch=0/micro_step=4840/global_step=4840, RunningAvgSamplesPerSec=105.16561029558453, CurrSamplesPerSec=110.12817981835353, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:45:52,984] [INFO] [logging.py:96:log_dist] [Rank 0] step=4840, skipped=78, lr=[1.944502009031851e-06, 1.944502009031851e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4839|ppo_ep: 1|act_loss: 0.04376220703125|cri_loss: 0.02435302734375|unsuper_loss: 0.0
+average reward score: 5.25
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.36%) |Training time=0.45s (21.01%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4840|ppo_ep: 1|act_loss: 0.02081298828125|cri_loss: 0.01064300537109375|unsuper_loss: 0.0
+average reward score: 5.03515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.56%) |Training time=0.45s (20.79%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4841|ppo_ep: 1|act_loss: -0.0098876953125|cri_loss: -0.0048065185546875|unsuper_loss: 0.0
+average reward score: 5.546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.52%) |Training time=0.45s (20.81%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4842|ppo_ep: 1|act_loss: 0.0190887451171875|cri_loss: 0.0098876953125|unsuper_loss: 0.0
+average reward score: 6.19140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.65%) |Training time=0.44s (20.69%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.47
+[2023-04-14 11:46:01,459] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 2048, reducing to 1024
+epoch: 0|step: 4843|ppo_ep: 1|act_loss: -0.08251953125|cri_loss: 0.013916015625|unsuper_loss: 0.0
+average reward score: 5.1640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.57%) |Training time=0.42s (19.72%) |Others=0.10 (4.71%)|CurSamplesPerSec=15.13 |AvgSamplesPerSec=14.47
+[2023-04-14 11:46:03,690] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 4844|ppo_ep: 1|act_loss: -0.0209197998046875|cri_loss: -0.01010894775390625|unsuper_loss: 0.0
+average reward score: 5.5703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.60s (69.56%) |Training time=0.45s (19.41%) |Others=0.25 (11.03%)|CurSamplesPerSec=13.90 |AvgSamplesPerSec=14.47
+[2023-04-14 11:46:05,989] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 4845|ppo_ep: 1|act_loss: -0.024261474609375|cri_loss: -0.0117340087890625|unsuper_loss: 0.0
+average reward score: 5.56640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.92%) |Training time=0.45s (20.90%) |Others=0.09 (4.17%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4846|ppo_ep: 1|act_loss: 0.01186370849609375|cri_loss: 0.00653839111328125|unsuper_loss: 0.0
+average reward score: 4.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.66%) |Training time=0.44s (20.62%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4847|ppo_ep: 1|act_loss: -0.0236663818359375|cri_loss: -0.0113372802734375|unsuper_loss: 0.0
+average reward score: 4.375
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.49%) |Training time=0.44s (19.16%) |Others=0.10 (4.35%)|CurSamplesPerSec=13.86 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4848|ppo_ep: 1|act_loss: -0.0061187744140625|cri_loss: -0.00244903564453125|unsuper_loss: 0.0
+average reward score: 4.9765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.57%) |Training time=0.44s (20.54%) |Others=0.11 (4.89%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47
+[2023-04-14 11:46:14,645] [INFO] [logging.py:96:log_dist] [Rank 0] step=4850, skipped=68, lr=[3.7166973670563355e-06, 3.7166973670563355e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:46:14,664] [INFO] [timer.py:199:stop] epoch=0/micro_step=4850/global_step=4850, RunningAvgSamplesPerSec=105.18298878559892, CurrSamplesPerSec=112.45244484735046, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:46:14,757] [INFO] [logging.py:96:log_dist] [Rank 0] step=4850, skipped=80, lr=[1.9369971633416747e-06, 1.9369971633416747e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4849|ppo_ep: 1|act_loss: 0.0143890380859375|cri_loss: 0.008087158203125|unsuper_loss: 0.0
+average reward score: 5.74609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.77%) |Training time=0.45s (20.64%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4850|ppo_ep: 1|act_loss: -0.0021076202392578125|cri_loss: -0.0008134841918945312|unsuper_loss: 0.0
+average reward score: 5.41015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.78%) |Training time=0.44s (19.76%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.25 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4851|ppo_ep: 1|act_loss: -0.00928497314453125|cri_loss: -0.004505157470703125|unsuper_loss: 0.0
+average reward score: 5.62109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.40%) |Training time=0.45s (20.76%) |Others=0.10 (4.83%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4852|ppo_ep: 1|act_loss: 0.008697509765625|cri_loss: 0.00479888916015625|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.75%) |Training time=0.44s (20.56%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4853|ppo_ep: 1|act_loss: 0.0016317367553710938|cri_loss: 0.00292205810546875|unsuper_loss: 0.0
+average reward score: 6.00390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.69%) |Training time=0.44s (20.63%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4854|ppo_ep: 1|act_loss: -0.00138092041015625|cri_loss: -0.00043582916259765625|unsuper_loss: 0.0
+average reward score: 5.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.57%) |Training time=0.45s (20.77%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4855|ppo_ep: 1|act_loss: 0.1221923828125|cri_loss: 0.06793212890625|unsuper_loss: 0.0
+average reward score: 4.80078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.97%) |Training time=0.44s (20.36%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4856|ppo_ep: 1|act_loss: -0.00634002685546875|cri_loss: -0.002719879150390625|unsuper_loss: 0.0
+average reward score: 5.53515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.87s |Gather latency=0.00s (0.00%) |Generate time=1.61s (56.00%) |Training time=0.44s (15.37%) |Others=0.82 (28.63%)|CurSamplesPerSec=11.16 |AvgSamplesPerSec=14.47
+[2023-04-14 11:46:32,648] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 1024, reducing to 512
+[2023-04-14 11:46:32,734] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
+epoch: 0|step: 4857|ppo_ep: 1|act_loss: -0.033935546875|cri_loss: 0.1702880859375|unsuper_loss: 0.0
+average reward score: 4.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.84%) |Training time=0.42s (19.84%) |Others=0.09 (4.32%)|CurSamplesPerSec=15.14 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4858|ppo_ep: 1|act_loss: 0.02703857421875|cri_loss: 0.016845703125|unsuper_loss: 0.0
+average reward score: 4.40625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.84%) |Training time=0.44s (20.47%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47
+[2023-04-14 11:46:36,918] [INFO] [logging.py:96:log_dist] [Rank 0] step=4860, skipped=69, lr=[3.700432497278104e-06, 3.700432497278104e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:46:36,937] [INFO] [timer.py:199:stop] epoch=0/micro_step=4860/global_step=4860, RunningAvgSamplesPerSec=105.2016877522836, CurrSamplesPerSec=113.58462629500683, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:46:37,029] [INFO] [logging.py:96:log_dist] [Rank 0] step=4860, skipped=81, lr=[1.9285605994579322e-06, 1.9285605994579322e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4859|ppo_ep: 1|act_loss: 0.02972412109375|cri_loss: 0.0167388916015625|unsuper_loss: 0.0
+average reward score: 5.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.66%) |Training time=0.44s (20.69%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4860|ppo_ep: 1|act_loss: 0.006023406982421875|cri_loss: 0.003147125244140625|unsuper_loss: 0.0
+average reward score: 5.91015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.73%) |Training time=0.44s (20.60%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4861|ppo_ep: 1|act_loss: -0.0489501953125|cri_loss: -0.0228729248046875|unsuper_loss: 0.0
+average reward score: 5.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.27%) |Training time=0.46s (21.01%) |Others=0.13 (5.72%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4862|ppo_ep: 1|act_loss: -0.029449462890625|cri_loss: -0.01416778564453125|unsuper_loss: 0.0
+average reward score: 4.23046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.80%) |Training time=0.44s (20.23%) |Others=0.11 (4.96%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4863|ppo_ep: 1|act_loss: 0.02264404296875|cri_loss: 0.01192474365234375|unsuper_loss: 0.0
+average reward score: 5.73828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.60%) |Training time=0.45s (20.72%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4864|ppo_ep: 1|act_loss: 0.0120697021484375|cri_loss: 0.006404876708984375|unsuper_loss: 0.0
+average reward score: 6.19921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.64%) |Training time=0.44s (20.68%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4865|ppo_ep: 1|act_loss: 0.007213592529296875|cri_loss: 0.0038242340087890625|unsuper_loss: 0.0
+average reward score: 4.9921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.85%) |Training time=0.44s (20.49%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4866|ppo_ep: 1|act_loss: 0.0120849609375|cri_loss: 0.006259918212890625|unsuper_loss: 0.0
+average reward score: 6.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.67%) |Training time=0.44s (20.66%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4867|ppo_ep: 1|act_loss: 0.024078369140625|cri_loss: 0.012603759765625|unsuper_loss: 0.0
+average reward score: 4.25
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.70%) |Training time=0.44s (20.63%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47
+[2023-04-14 11:46:56,311] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 512, reducing to 256
+[2023-04-14 11:46:56,397] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192
+epoch: 0|step: 4868|ppo_ep: 1|act_loss: 0.000885009765625|cri_loss: 0.0006785392761230469|unsuper_loss: 0.0
+average reward score: 7.36328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.60s (76.11%) |Training time=0.41s (19.55%) |Others=0.09 (4.34%)|CurSamplesPerSec=15.19 |AvgSamplesPerSec=14.47
+[2023-04-14 11:46:58,434] [INFO] [logging.py:96:log_dist] [Rank 0] step=4870, skipped=70, lr=[3.6841811292336835e-06, 3.6841811292336835e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:46:58,453] [INFO] [timer.py:199:stop] epoch=0/micro_step=4870/global_step=4870, RunningAvgSamplesPerSec=105.2193610408015, CurrSamplesPerSec=112.97113388004588, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:46:58,545] [INFO] [logging.py:96:log_dist] [Rank 0] step=4870, skipped=82, lr=[1.9201308963652553e-06, 1.9201308963652553e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4869|ppo_ep: 1|act_loss: 0.039154052734375|cri_loss: 0.0216522216796875|unsuper_loss: 0.0
+average reward score: 5.81640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.59%) |Training time=0.45s (20.76%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4870|ppo_ep: 1|act_loss: 0.03594970703125|cri_loss: 0.01934814453125|unsuper_loss: 0.0
+average reward score: 5.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.36%) |Training time=0.45s (20.99%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4871|ppo_ep: 1|act_loss: -0.0149078369140625|cri_loss: -0.00624847412109375|unsuper_loss: 0.0
+average reward score: 5.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.65%) |Training time=0.44s (20.67%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4872|ppo_ep: 1|act_loss: -0.0032520294189453125|cri_loss: -0.0011539459228515625|unsuper_loss: 0.0
+average reward score: 5.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.66%) |Training time=0.44s (20.68%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4873|ppo_ep: 1|act_loss: -0.00739288330078125|cri_loss: -0.00341796875|unsuper_loss: 0.0
+average reward score: 4.87890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.51s |Gather latency=0.00s (0.00%) |Generate time=1.60s (63.72%) |Training time=0.44s (17.57%) |Others=0.47 (18.71%)|CurSamplesPerSec=12.74 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4874|ppo_ep: 1|act_loss: 0.0113525390625|cri_loss: 0.005992889404296875|unsuper_loss: 0.0
+average reward score: 6.44140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.33%) |Training time=0.45s (21.00%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4875|ppo_ep: 1|act_loss: -0.00506591796875|cri_loss: -0.001888275146484375|unsuper_loss: 0.0
+average reward score: 6.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.45s (20.81%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4876|ppo_ep: 1|act_loss: -0.01038360595703125|cri_loss: -0.0050811767578125|unsuper_loss: 0.0
+average reward score: 4.63671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.42%) |Training time=0.47s (20.88%) |Others=0.17 (7.70%)|CurSamplesPerSec=14.29 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4877|ppo_ep: 1|act_loss: -0.023468017578125|cri_loss: -0.01141357421875|unsuper_loss: 0.0
+average reward score: 5.8828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.72%) |Training time=0.44s (20.41%) |Others=0.10 (4.87%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4878|ppo_ep: 1|act_loss: 0.00634002685546875|cri_loss: 0.003414154052734375|unsuper_loss: 0.0
+average reward score: 5.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.27%) |Training time=0.45s (20.81%) |Others=0.11 (4.93%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.47
+[2023-04-14 11:47:20,508] [INFO] [logging.py:96:log_dist] [Rank 0] step=4880, skipped=70, lr=[3.6661401263059195e-06, 3.6661401263059195e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:47:20,682] [INFO] [timer.py:199:stop] epoch=0/micro_step=4880/global_step=4880, RunningAvgSamplesPerSec=105.21670595000673, CurrSamplesPerSec=63.53029762995129, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:47:20,777] [INFO] [logging.py:96:log_dist] [Rank 0] step=4880, skipped=82, lr=[1.9107727292970774e-06, 1.9107727292970774e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4879|ppo_ep: 1|act_loss: 0.030609130859375|cri_loss: 0.0157623291015625|unsuper_loss: 0.0
+average reward score: 5.63671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.41s |Gather latency=0.00s (0.00%) |Generate time=1.63s (67.36%) |Training time=0.69s (28.47%) |Others=0.10 (4.16%)|CurSamplesPerSec=13.26 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4880|ppo_ep: 1|act_loss: -0.0046234130859375|cri_loss: -0.0019855499267578125|unsuper_loss: 0.0
+average reward score: 6.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.48%) |Training time=0.41s (18.75%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4881|ppo_ep: 1|act_loss: 0.01279449462890625|cri_loss: 0.0069427490234375|unsuper_loss: 0.0
+average reward score: 5.1015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.03%) |Training time=0.47s (21.30%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4882|ppo_ep: 1|act_loss: -0.03192138671875|cri_loss: -0.0152435302734375|unsuper_loss: 0.0
+average reward score: 4.90625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.37%) |Training time=0.46s (20.94%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4883|ppo_ep: 1|act_loss: -0.0025081634521484375|cri_loss: -0.00106048583984375|unsuper_loss: 0.0
+average reward score: 4.70703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.17%) |Training time=0.46s (21.15%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4884|ppo_ep: 1|act_loss: 0.020965576171875|cri_loss: 0.01074981689453125|unsuper_loss: 0.0
+average reward score: 6.15625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.62%) |Training time=0.48s (21.72%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4885|ppo_ep: 1|act_loss: -0.0005536079406738281|cri_loss: 0.00018978118896484375|unsuper_loss: 0.0
+average reward score: 5.95703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.65s |Gather latency=0.00s (0.00%) |Generate time=1.63s (61.59%) |Training time=0.46s (17.21%) |Others=0.56 (21.21%)|CurSamplesPerSec=12.07 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4886|ppo_ep: 1|act_loss: 0.026092529296875|cri_loss: 0.01351165771484375|unsuper_loss: 0.0
+average reward score: 5.44140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.18%) |Training time=0.46s (21.20%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4887|ppo_ep: 1|act_loss: 0.021514892578125|cri_loss: 0.01136016845703125|unsuper_loss: 0.0
+average reward score: 4.515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.42%) |Training time=0.48s (21.97%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4888|ppo_ep: 1|act_loss: 0.00982666015625|cri_loss: 0.005466461181640625|unsuper_loss: 0.0
+average reward score: 5.39453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.69%) |Training time=0.47s (21.70%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.47
+[2023-04-14 11:47:42,965] [INFO] [logging.py:96:log_dist] [Rank 0] step=4890, skipped=70, lr=[3.6481163004770804e-06, 3.6481163004770804e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:47:42,983] [INFO] [timer.py:199:stop] epoch=0/micro_step=4890/global_step=4890, RunningAvgSamplesPerSec=105.22021353634193, CurrSamplesPerSec=99.59072874905486, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:47:43,076] [INFO] [logging.py:96:log_dist] [Rank 0] step=4890, skipped=82, lr=[1.9014232959984846e-06, 1.9014232959984846e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4889|ppo_ep: 1|act_loss: 0.009552001953125|cri_loss: 0.00537109375|unsuper_loss: 0.0
+average reward score: 6.1640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.29%) |Training time=0.48s (22.12%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4890|ppo_ep: 1|act_loss: 0.017578125|cri_loss: 0.009979248046875|unsuper_loss: 0.0
+average reward score: 6.390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.56%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4891|ppo_ep: 1|act_loss: 0.0318603515625|cri_loss: 0.016632080078125|unsuper_loss: 0.0
+average reward score: 5.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.59s |Gather latency=0.00s (0.00%) |Generate time=1.77s (68.22%) |Training time=0.48s (18.37%) |Others=0.35 (13.41%)|CurSamplesPerSec=12.34 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4892|ppo_ep: 1|act_loss: 0.033935546875|cri_loss: 0.0182037353515625|unsuper_loss: 0.0
+average reward score: 5.05859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.21%) |Training time=0.48s (22.17%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4893|ppo_ep: 1|act_loss: 0.0110015869140625|cri_loss: 0.006687164306640625|unsuper_loss: 0.0
+average reward score: 4.71484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.53%) |Training time=0.48s (21.84%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4894|ppo_ep: 1|act_loss: -0.01580810546875|cri_loss: -0.00707244873046875|unsuper_loss: 0.0
+average reward score: 4.953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.97%) |Training time=0.47s (21.41%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4895|ppo_ep: 1|act_loss: -0.0055999755859375|cri_loss: -0.0019741058349609375|unsuper_loss: 0.0
+average reward score: 5.48046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.00%) |Training time=0.46s (21.38%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4896|ppo_ep: 1|act_loss: -0.039031982421875|cri_loss: -0.0185546875|unsuper_loss: 0.0
+average reward score: 5.296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.46s (21.33%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4897|ppo_ep: 1|act_loss: 0.0193634033203125|cri_loss: 0.0103302001953125|unsuper_loss: 0.0
+average reward score: 5.5859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.47%) |Training time=0.48s (21.91%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4898|ppo_ep: 1|act_loss: 0.00010347366333007812|cri_loss: 0.0005092620849609375|unsuper_loss: 0.0
+average reward score: 4.90625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.55%) |Training time=0.47s (21.82%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.47
+[2023-04-14 11:48:05,138] [INFO] [logging.py:96:log_dist] [Rank 0] step=4900, skipped=70, lr=[3.6301099189037464e-06, 3.6301099189037464e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:48:05,156] [INFO] [timer.py:199:stop] epoch=0/micro_step=4900/global_step=4900, RunningAvgSamplesPerSec=105.21540054820468, CurrSamplesPerSec=98.67209316003058, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:48:05,251] [INFO] [logging.py:96:log_dist] [Rank 0] step=4900, skipped=82, lr=[1.892082735050632e-06, 1.892082735050632e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4899|ppo_ep: 1|act_loss: -0.01531982421875|cri_loss: -0.00739288330078125|unsuper_loss: 0.0
+average reward score: 5.91796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.09%) |Training time=0.49s (22.24%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4900|ppo_ep: 1|act_loss: -0.0083160400390625|cri_loss: -0.0040283203125|unsuper_loss: 0.0
+average reward score: 5.30078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.43%) |Training time=0.51s (23.02%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.49 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4901|ppo_ep: 1|act_loss: -0.04620361328125|cri_loss: -0.0215606689453125|unsuper_loss: 0.0
+average reward score: 5.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.58%) |Training time=0.47s (21.74%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4902|ppo_ep: 1|act_loss: 0.06402587890625|cri_loss: 0.034637451171875|unsuper_loss: 0.0
+average reward score: 4.89453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.45%) |Training time=0.45s (20.77%) |Others=0.10 (4.78%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4903|ppo_ep: 1|act_loss: 0.04351806640625|cri_loss: 0.0232696533203125|unsuper_loss: 0.0
+average reward score: 4.90234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.85%) |Training time=0.42s (19.47%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4904|ppo_ep: 1|act_loss: -0.001407623291015625|cri_loss: -0.00046324729919433594|unsuper_loss: 0.0
+average reward score: 4.47265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.50%) |Training time=0.48s (21.84%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4905|ppo_ep: 1|act_loss: 0.0149078369140625|cri_loss: 0.007965087890625|unsuper_loss: 0.0
+average reward score: 5.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.41s |Gather latency=0.00s (0.00%) |Generate time=1.61s (66.96%) |Training time=0.47s (19.60%) |Others=0.32 (13.44%)|CurSamplesPerSec=13.28 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4906|ppo_ep: 1|act_loss: 0.036285400390625|cri_loss: 0.02142333984375|unsuper_loss: 0.0
+average reward score: 5.6484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.25%) |Training time=0.47s (21.13%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4907|ppo_ep: 1|act_loss: -0.0006389617919921875|cri_loss: 0.000934600830078125|unsuper_loss: 0.0
+average reward score: 5.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.27%) |Training time=0.46s (21.10%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4908|ppo_ep: 1|act_loss: -0.0016384124755859375|cri_loss: -0.0007228851318359375|unsuper_loss: 0.0
+average reward score: 4.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.74s (75.90%) |Training time=0.45s (19.69%) |Others=0.10 (4.42%)|CurSamplesPerSec=13.96 |AvgSamplesPerSec=14.47
+[2023-04-14 11:48:27,335] [INFO] [logging.py:96:log_dist] [Rank 0] step=4910, skipped=70, lr=[3.6121212484839295e-06, 3.6121212484839295e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:48:27,353] [INFO] [timer.py:199:stop] epoch=0/micro_step=4910/global_step=4910, RunningAvgSamplesPerSec=105.21634977782396, CurrSamplesPerSec=102.24609105099212, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:48:27,446] [INFO] [logging.py:96:log_dist] [Rank 0] step=4910, skipped=82, lr=[1.882751184903167e-06, 1.882751184903167e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4909|ppo_ep: 1|act_loss: -0.01042938232421875|cri_loss: -0.00495147705078125|unsuper_loss: 0.0
+average reward score: 4.76953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.62%) |Training time=0.48s (21.78%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4910|ppo_ep: 1|act_loss: -0.005237579345703125|cri_loss: -0.0025177001953125|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.34%) |Training time=0.48s (22.05%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4911|ppo_ep: 1|act_loss: -0.02166748046875|cri_loss: -0.010040283203125|unsuper_loss: 0.0
+average reward score: 4.90625
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.55%) |Training time=0.47s (21.12%) |Others=0.16 (7.32%)|CurSamplesPerSec=14.28 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4912|ppo_ep: 1|act_loss: -0.04510498046875|cri_loss: -0.02032470703125|unsuper_loss: 0.0
+average reward score: 4.21484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.35%) |Training time=0.48s (22.03%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4913|ppo_ep: 1|act_loss: -0.019134521484375|cri_loss: -0.00905609130859375|unsuper_loss: 0.0
+average reward score: 5.00390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.88%) |Training time=0.48s (21.47%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.46 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4914|ppo_ep: 1|act_loss: 0.01371002197265625|cri_loss: 0.006977081298828125|unsuper_loss: 0.0
+average reward score: 5.13671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.21%) |Training time=0.46s (21.14%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4915|ppo_ep: 1|act_loss: 0.01544189453125|cri_loss: 0.00787353515625|unsuper_loss: 0.0
+average reward score: 5.33984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.30%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4916|ppo_ep: 1|act_loss: 0.017669677734375|cri_loss: 0.00927734375|unsuper_loss: 0.0
+average reward score: 5.33984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.75%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4917|ppo_ep: 1|act_loss: 0.04022216796875|cri_loss: 0.021209716796875|unsuper_loss: 0.0
+average reward score: 4.89453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.60%) |Training time=0.47s (21.76%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4918|ppo_ep: 1|act_loss: 0.020233154296875|cri_loss: 0.0102386474609375|unsuper_loss: 0.0
+average reward score: 5.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.69%) |Training time=0.47s (21.66%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.47
+[2023-04-14 11:48:49,170] [INFO] [logging.py:96:log_dist] [Rank 0] step=4920, skipped=70, lr=[3.594150555853121e-06, 3.594150555853121e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:48:49,187] [INFO] [timer.py:199:stop] epoch=0/micro_step=4920/global_step=4920, RunningAvgSamplesPerSec=105.21233766377085, CurrSamplesPerSec=101.24344625791944, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:48:49,280] [INFO] [logging.py:96:log_dist] [Rank 0] step=4920, skipped=82, lr=[1.8734287838721742e-06, 1.8734287838721742e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4919|ppo_ep: 1|act_loss: 0.045318603515625|cri_loss: 0.024871826171875|unsuper_loss: 0.0
+average reward score: 5.671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.29%) |Training time=0.48s (22.08%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4920|ppo_ep: 1|act_loss: 0.04046630859375|cri_loss: 0.02252197265625|unsuper_loss: 0.0
+average reward score: 5.15625
+-------------------------------------------------------------------------------------
+|E2E latency=2.38s |Gather latency=0.00s (0.00%) |Generate time=1.69s (70.85%) |Training time=0.59s (24.92%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.44 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4921|ppo_ep: 1|act_loss: -0.029449462890625|cri_loss: -0.01418304443359375|unsuper_loss: 0.0
+average reward score: 4.5234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.30%) |Training time=0.48s (22.05%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4922|ppo_ep: 1|act_loss: -0.01837158203125|cri_loss: -0.0085601806640625|unsuper_loss: 0.0
+average reward score: 5.328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.47s (21.93%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4923|ppo_ep: 1|act_loss: 0.047027587890625|cri_loss: 0.0258941650390625|unsuper_loss: 0.0
+average reward score: 4.68359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.74%) |Training time=0.47s (21.63%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4924|ppo_ep: 1|act_loss: 0.0205841064453125|cri_loss: 0.010498046875|unsuper_loss: 0.0
+average reward score: 4.75390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.47s (21.94%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4925|ppo_ep: 1|act_loss: 0.00734710693359375|cri_loss: 0.003772735595703125|unsuper_loss: 0.0
+average reward score: 4.86328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (22.00%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4926|ppo_ep: 1|act_loss: -0.010284423828125|cri_loss: -0.004993438720703125|unsuper_loss: 0.0
+average reward score: 4.35546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.78%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4927|ppo_ep: 1|act_loss: -0.03948974609375|cri_loss: -0.0186767578125|unsuper_loss: 0.0
+average reward score: 5.28515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.31%) |Training time=0.48s (22.07%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4928|ppo_ep: 1|act_loss: -0.00012969970703125|cri_loss: 0.0004787445068359375|unsuper_loss: 0.0
+average reward score: 4.94140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.84%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47
+[2023-04-14 11:49:11,006] [INFO] [logging.py:96:log_dist] [Rank 0] step=4930, skipped=70, lr=[3.576198107380337e-06, 3.576198107380337e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:49:11,024] [INFO] [timer.py:199:stop] epoch=0/micro_step=4930/global_step=4930, RunningAvgSamplesPerSec=105.20099548291466, CurrSamplesPerSec=102.2384583684684, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:49:11,117] [INFO] [logging.py:96:log_dist] [Rank 0] step=4930, skipped=82, lr=[1.8641156701381243e-06, 1.8641156701381243e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4929|ppo_ep: 1|act_loss: 0.003734588623046875|cri_loss: 0.003139495849609375|unsuper_loss: 0.0
+average reward score: 4.70703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (21.97%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4930|ppo_ep: 1|act_loss: 0.0060272216796875|cri_loss: 0.0032634735107421875|unsuper_loss: 0.0
+average reward score: 4.55859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.78%) |Training time=0.49s (22.61%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4931|ppo_ep: 1|act_loss: 0.07098388671875|cri_loss: 0.03961181640625|unsuper_loss: 0.0
+average reward score: 4.91015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.06%) |Training time=0.49s (22.34%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4932|ppo_ep: 1|act_loss: 0.013092041015625|cri_loss: 0.0085296630859375|unsuper_loss: 0.0
+average reward score: 5.203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.59s (69.88%) |Training time=0.49s (21.32%) |Others=0.20 (8.80%)|CurSamplesPerSec=14.06 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4933|ppo_ep: 1|act_loss: -0.0150146484375|cri_loss: -0.007354736328125|unsuper_loss: 0.0
+average reward score: 5.41796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.47s (21.85%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4934|ppo_ep: 1|act_loss: -0.023773193359375|cri_loss: -0.0106201171875|unsuper_loss: 0.0
+average reward score: 4.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.26%) |Training time=0.48s (22.13%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4935|ppo_ep: 1|act_loss: -0.062286376953125|cri_loss: -0.0286865234375|unsuper_loss: 0.0
+average reward score: 4.9921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.57%) |Training time=0.47s (21.81%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4936|ppo_ep: 1|act_loss: -0.036224365234375|cri_loss: -0.01629638671875|unsuper_loss: 0.0
+average reward score: 6.27734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.50%) |Training time=0.47s (20.21%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4937|ppo_ep: 1|act_loss: -0.03900146484375|cri_loss: -0.0185546875|unsuper_loss: 0.0
+average reward score: 5.05859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.62s (71.80%) |Training time=0.49s (21.56%) |Others=0.15 (6.64%)|CurSamplesPerSec=14.18 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4938|ppo_ep: 1|act_loss: 0.005672454833984375|cri_loss: 0.00292205810546875|unsuper_loss: 0.0
+average reward score: 5.17578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.64%) |Training time=0.47s (21.75%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.47
+[2023-04-14 11:49:33,102] [INFO] [logging.py:96:log_dist] [Rank 0] step=4940, skipped=70, lr=[3.5582641691641734e-06, 3.5582641691641734e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:49:33,121] [INFO] [timer.py:199:stop] epoch=0/micro_step=4940/global_step=4940, RunningAvgSamplesPerSec=105.19055610924273, CurrSamplesPerSec=97.9630665250696, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:49:33,213] [INFO] [logging.py:96:log_dist] [Rank 0] step=4940, skipped=82, lr=[1.8548119817438313e-06, 1.8548119817438313e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4939|ppo_ep: 1|act_loss: -0.02252197265625|cri_loss: -0.01056671142578125|unsuper_loss: 0.0
+average reward score: 5.61328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.97%) |Training time=0.49s (22.45%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4940|ppo_ep: 1|act_loss: 0.033447265625|cri_loss: 0.0177001953125|unsuper_loss: 0.0
+average reward score: 5.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.62%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4941|ppo_ep: 1|act_loss: 0.01522064208984375|cri_loss: 0.00787353515625|unsuper_loss: 0.0
+average reward score: 4.125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.80%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4942|ppo_ep: 1|act_loss: 0.037078857421875|cri_loss: 0.0198516845703125|unsuper_loss: 0.0
+average reward score: 4.9296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.68%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4943|ppo_ep: 1|act_loss: 0.009552001953125|cri_loss: 0.00495147705078125|unsuper_loss: 0.0
+average reward score: 5.87890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.70%) |Training time=0.47s (21.68%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4944|ppo_ep: 1|act_loss: -0.0019121170043945312|cri_loss: -0.0006513595581054688|unsuper_loss: 0.0
+average reward score: 4.28515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.36%) |Training time=0.48s (22.00%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4945|ppo_ep: 1|act_loss: 0.001495361328125|cri_loss: 0.0010557174682617188|unsuper_loss: 0.0
+average reward score: 5.1015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.65%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4946|ppo_ep: 1|act_loss: 0.0030002593994140625|cri_loss: 0.001708984375|unsuper_loss: 0.0
+average reward score: 4.83203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (21.98%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4947|ppo_ep: 1|act_loss: 0.00046539306640625|cri_loss: 0.0007371902465820312|unsuper_loss: 0.0
+average reward score: 5.08203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.91%) |Training time=0.49s (22.48%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4948|ppo_ep: 1|act_loss: -0.00258636474609375|cri_loss: -0.0011606216430664062|unsuper_loss: 0.0
+average reward score: 5.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.06%) |Training time=0.48s (22.31%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47
+[2023-04-14 11:49:54,750] [INFO] [logging.py:96:log_dist] [Rank 0] step=4950, skipped=70, lr=[3.5403490070288552e-06, 3.5403490070288552e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:49:54,769] [INFO] [timer.py:199:stop] epoch=0/micro_step=4950/global_step=4950, RunningAvgSamplesPerSec=105.18537741541601, CurrSamplesPerSec=103.79347362934539, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:49:54,861] [INFO] [logging.py:96:log_dist] [Rank 0] step=4950, skipped=82, lr=[1.8455178565923993e-06, 1.8455178565923993e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4949|ppo_ep: 1|act_loss: -0.017669677734375|cri_loss: -0.00864410400390625|unsuper_loss: 0.0
+average reward score: 4.9296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.55%) |Training time=0.47s (21.83%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4950|ppo_ep: 1|act_loss: -0.01171875|cri_loss: -0.0056610107421875|unsuper_loss: 0.0
+average reward score: 5.71875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.08%) |Training time=0.48s (22.32%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4951|ppo_ep: 1|act_loss: -0.0146484375|cri_loss: -0.007099151611328125|unsuper_loss: 0.0
+average reward score: 4.75
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.97%) |Training time=0.49s (22.41%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4952|ppo_ep: 1|act_loss: -0.01824951171875|cri_loss: -0.0089569091796875|unsuper_loss: 0.0
+average reward score: 5.54296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.72s (73.83%) |Training time=0.50s (21.57%) |Others=0.11 (4.60%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4953|ppo_ep: 1|act_loss: 0.00946044921875|cri_loss: 0.005218505859375|unsuper_loss: 0.0
+average reward score: 5.58203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.10%) |Training time=0.48s (22.30%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4954|ppo_ep: 1|act_loss: -0.01007080078125|cri_loss: -0.0048370361328125|unsuper_loss: 0.0
+average reward score: 5.49609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.89%) |Training time=0.49s (22.52%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4955|ppo_ep: 1|act_loss: -0.00392913818359375|cri_loss: -0.0012559890747070312|unsuper_loss: 0.0
+average reward score: 4.26171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.31%) |Training time=0.48s (22.06%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4956|ppo_ep: 1|act_loss: -0.0238494873046875|cri_loss: -0.0116119384765625|unsuper_loss: 0.0
+average reward score: 5.6640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.09%) |Training time=0.48s (22.31%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4957|ppo_ep: 1|act_loss: -0.0015230178833007812|cri_loss: -3.814697265625e-05|unsuper_loss: 0.0
+average reward score: 4.828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.18%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4958|ppo_ep: 1|act_loss: -0.002532958984375|cri_loss: -0.0006256103515625|unsuper_loss: 0.0
+average reward score: 4.70703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (21.98%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.47
+[2023-04-14 11:50:16,589] [INFO] [logging.py:96:log_dist] [Rank 0] step=4960, skipped=70, lr=[3.5224528865203054e-06, 3.5224528865203054e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:50:16,607] [INFO] [timer.py:199:stop] epoch=0/micro_step=4960/global_step=4960, RunningAvgSamplesPerSec=105.17390656645385, CurrSamplesPerSec=102.20575977508565, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:50:16,700] [INFO] [logging.py:96:log_dist] [Rank 0] step=4960, skipped=82, lr=[1.8362334324451853e-06, 1.8362334324451853e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4959|ppo_ep: 1|act_loss: -0.0043487548828125|cri_loss: -0.002040863037109375|unsuper_loss: 0.0
+average reward score: 4.890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.48s (21.94%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4960|ppo_ep: 1|act_loss: -0.0313720703125|cri_loss: -0.0146636962890625|unsuper_loss: 0.0
+average reward score: 5.5
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.30%) |Training time=0.48s (21.95%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4961|ppo_ep: 1|act_loss: -0.01314544677734375|cri_loss: -0.00518798828125|unsuper_loss: 0.0
+average reward score: 5.328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.72%) |Training time=0.50s (22.71%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4962|ppo_ep: 1|act_loss: -0.002475738525390625|cri_loss: -0.001026153564453125|unsuper_loss: 0.0
+average reward score: 5.55859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.03%) |Training time=0.49s (22.35%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4963|ppo_ep: 1|act_loss: 0.06939697265625|cri_loss: 0.03741455078125|unsuper_loss: 0.0
+average reward score: 6.1875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.13%) |Training time=0.48s (22.27%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4964|ppo_ep: 1|act_loss: 0.00904083251953125|cri_loss: 0.00525665283203125|unsuper_loss: 0.0
+average reward score: 5.7578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.79%) |Training time=0.49s (22.60%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4965|ppo_ep: 1|act_loss: 0.002620697021484375|cri_loss: 0.0014257431030273438|unsuper_loss: 0.0
+average reward score: 4.8515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.12%) |Training time=0.48s (21.96%) |Others=0.11 (4.92%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4966|ppo_ep: 1|act_loss: 0.003570556640625|cri_loss: 0.0019779205322265625|unsuper_loss: 0.0
+average reward score: 4.97265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.60s (70.65%) |Training time=0.51s (22.61%) |Others=0.15 (6.74%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4967|ppo_ep: 1|act_loss: 0.01971435546875|cri_loss: 0.01021575927734375|unsuper_loss: 0.0
+average reward score: 5.37109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.43s |Gather latency=0.00s (0.00%) |Generate time=1.80s (73.83%) |Training time=0.54s (22.05%) |Others=0.10 (4.12%)|CurSamplesPerSec=13.14 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4968|ppo_ep: 1|act_loss: 0.004993438720703125|cri_loss: 0.0026988983154296875|unsuper_loss: 0.0
+average reward score: 7.1328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.48s (22.33%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.47
+[2023-04-14 11:50:38,698] [INFO] [logging.py:96:log_dist] [Rank 0] step=4970, skipped=70, lr=[3.5045760729022005e-06, 3.5045760729022005e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:50:38,716] [INFO] [timer.py:199:stop] epoch=0/micro_step=4970/global_step=4970, RunningAvgSamplesPerSec=105.15504260283433, CurrSamplesPerSec=98.29984817535782, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:50:38,809] [INFO] [logging.py:96:log_dist] [Rank 0] step=4970, skipped=82, lr=[1.8269588469197518e-06, 1.8269588469197518e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4969|ppo_ep: 1|act_loss: 0.00392913818359375|cri_loss: 0.00235748291015625|unsuper_loss: 0.0
+average reward score: 5.50390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.90%) |Training time=0.49s (22.51%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4970|ppo_ep: 1|act_loss: 0.01861572265625|cri_loss: 0.00971221923828125|unsuper_loss: 0.0
+average reward score: 5.15234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.92%) |Training time=0.49s (22.45%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4971|ppo_ep: 1|act_loss: 0.010284423828125|cri_loss: 0.005462646484375|unsuper_loss: 0.0
+average reward score: 5.0
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.94%) |Training time=0.49s (22.44%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4972|ppo_ep: 1|act_loss: 0.0051422119140625|cri_loss: 0.002857208251953125|unsuper_loss: 0.0
+average reward score: 5.84765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.81%) |Training time=0.49s (22.56%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4973|ppo_ep: 1|act_loss: -0.009521484375|cri_loss: -0.0037899017333984375|unsuper_loss: 0.0
+average reward score: 5.8125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.59%) |Training time=0.50s (22.80%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4974|ppo_ep: 1|act_loss: -0.014678955078125|cri_loss: -0.00605010986328125|unsuper_loss: 0.0
+average reward score: 4.63671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.68%) |Training time=0.47s (21.69%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4975|ppo_ep: 1|act_loss: 0.0279693603515625|cri_loss: 0.01470947265625|unsuper_loss: 0.0
+average reward score: 5.94140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.75%) |Training time=0.47s (21.66%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4976|ppo_ep: 1|act_loss: 0.014923095703125|cri_loss: 0.00783538818359375|unsuper_loss: 0.0
+average reward score: 4.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.15%) |Training time=0.48s (22.23%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4977|ppo_ep: 1|act_loss: 4.2438507080078125e-05|cri_loss: 0.00018358230590820312|unsuper_loss: 0.0
+average reward score: 5.58203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.47%) |Training time=0.47s (21.89%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4978|ppo_ep: 1|act_loss: 0.0005669593811035156|cri_loss: 0.0005650520324707031|unsuper_loss: 0.0
+average reward score: 5.31640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.08%) |Training time=0.48s (22.32%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.47
+[2023-04-14 11:51:00,406] [INFO] [logging.py:96:log_dist] [Rank 0] step=4980, skipped=70, lr=[3.4867188311520448e-06, 3.4867188311520448e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:51:00,424] [INFO] [timer.py:199:stop] epoch=0/micro_step=4980/global_step=4980, RunningAvgSamplesPerSec=105.14420331571479, CurrSamplesPerSec=101.33762187033065, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:51:00,517] [INFO] [logging.py:96:log_dist] [Rank 0] step=4980, skipped=82, lr=[1.817694237487831e-06, 1.817694237487831e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4979|ppo_ep: 1|act_loss: -0.0105438232421875|cri_loss: -0.004886627197265625|unsuper_loss: 0.0
+average reward score: 4.90234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.25%) |Training time=0.48s (22.14%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4980|ppo_ep: 1|act_loss: -0.0079193115234375|cri_loss: -0.003604888916015625|unsuper_loss: 0.0
+average reward score: 4.36328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.18%) |Training time=0.48s (22.20%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4981|ppo_ep: 1|act_loss: 0.03680419921875|cri_loss: 0.0190582275390625|unsuper_loss: 0.0
+average reward score: 6.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.06%) |Training time=0.48s (22.27%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4982|ppo_ep: 1|act_loss: -0.0011386871337890625|cri_loss: -0.00029850006103515625|unsuper_loss: 0.0
+average reward score: 5.54296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.37%) |Training time=0.50s (21.38%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.60 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4983|ppo_ep: 1|act_loss: -0.024505615234375|cri_loss: -0.0117034912109375|unsuper_loss: 0.0
+average reward score: 4.6015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.78%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4984|ppo_ep: 1|act_loss: -0.02398681640625|cri_loss: -0.011444091796875|unsuper_loss: 0.0
+average reward score: 4.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.12%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4985|ppo_ep: 1|act_loss: 0.014617919921875|cri_loss: 0.007610321044921875|unsuper_loss: 0.0
+average reward score: 4.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.48s (22.32%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4986|ppo_ep: 1|act_loss: 0.00223541259765625|cri_loss: 0.0012683868408203125|unsuper_loss: 0.0
+average reward score: 5.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.15%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4987|ppo_ep: 1|act_loss: -0.008880615234375|cri_loss: -0.00402069091796875|unsuper_loss: 0.0
+average reward score: 5.1640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.35%) |Training time=0.47s (21.90%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4988|ppo_ep: 1|act_loss: 0.018798828125|cri_loss: 0.01041412353515625|unsuper_loss: 0.0
+average reward score: 4.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.47s (21.94%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47
+[2023-04-14 11:51:22,236] [INFO] [logging.py:96:log_dist] [Rank 0] step=4990, skipped=70, lr=[3.4688814259572385e-06, 3.4688814259572385e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:51:22,255] [INFO] [timer.py:199:stop] epoch=0/micro_step=4990/global_step=4990, RunningAvgSamplesPerSec=105.13524135430049, CurrSamplesPerSec=104.26232727521308, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:51:22,348] [INFO] [logging.py:96:log_dist] [Rank 0] step=4990, skipped=82, lr=[1.8084397414732826e-06, 1.8084397414732826e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4989|ppo_ep: 1|act_loss: 0.007671356201171875|cri_loss: 0.004161834716796875|unsuper_loss: 0.0
+average reward score: 6.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.77%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4990|ppo_ep: 1|act_loss: 0.00853729248046875|cri_loss: 0.00528717041015625|unsuper_loss: 0.0
+average reward score: 4.984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.89%) |Training time=0.46s (21.44%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4991|ppo_ep: 1|act_loss: 0.004535675048828125|cri_loss: 0.0027008056640625|unsuper_loss: 0.0
+average reward score: 5.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.41%) |Training time=0.47s (21.95%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4992|ppo_ep: 1|act_loss: 0.0101776123046875|cri_loss: 0.005615234375|unsuper_loss: 0.0
+average reward score: 5.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.35%) |Training time=0.45s (21.01%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4993|ppo_ep: 1|act_loss: 0.0082550048828125|cri_loss: 0.00426483154296875|unsuper_loss: 0.0
+average reward score: 5.05859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.24%) |Training time=0.46s (21.11%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4994|ppo_ep: 1|act_loss: -0.0047760009765625|cri_loss: -0.002117156982421875|unsuper_loss: 0.0
+average reward score: 5.609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.22%) |Training time=0.46s (21.12%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4995|ppo_ep: 1|act_loss: 0.05169677734375|cri_loss: 0.0268707275390625|unsuper_loss: 0.0
+average reward score: 4.87890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.31%) |Training time=0.46s (21.08%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4996|ppo_ep: 1|act_loss: -0.016204833984375|cri_loss: -0.00792694091796875|unsuper_loss: 0.0
+average reward score: 4.71875
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.51%) |Training time=0.45s (20.20%) |Others=0.14 (6.29%)|CurSamplesPerSec=14.37 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4997|ppo_ep: 1|act_loss: 0.01568603515625|cri_loss: 0.0084228515625|unsuper_loss: 0.0
+average reward score: 5.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.50%) |Training time=0.45s (20.89%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.47
+epoch: 0|step: 4998|ppo_ep: 1|act_loss: 0.03155517578125|cri_loss: 0.0169525146484375|unsuper_loss: 0.0
+average reward score: 5.49609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.23%) |Training time=0.45s (19.46%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.78 |AvgSamplesPerSec=14.47
+[2023-04-14 11:51:44,060] [INFO] [logging.py:96:log_dist] [Rank 0] step=5000, skipped=70, lr=[3.4510641217111588e-06, 3.4510641217111588e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:51:44,078] [INFO] [timer.py:199:stop] epoch=0/micro_step=5000/global_step=5000, RunningAvgSamplesPerSec=105.14249500141725, CurrSamplesPerSec=110.73403004604508, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:51:44,171] [INFO] [logging.py:96:log_dist] [Rank 0] step=5000, skipped=82, lr=[1.7991954960500646e-06, 1.7991954960500646e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 4999|ppo_ep: 1|act_loss: 0.030792236328125|cri_loss: 0.01605224609375|unsuper_loss: 0.0
+average reward score: 5.6640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.36%) |Training time=0.45s (21.01%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5000|ppo_ep: 1|act_loss: 0.006744384765625|cri_loss: 0.003589630126953125|unsuper_loss: 0.0
+average reward score: 6.08203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.17%) |Training time=0.46s (21.17%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5001|ppo_ep: 1|act_loss: 0.0019435882568359375|cri_loss: 0.001239776611328125|unsuper_loss: 0.0
+average reward score: 5.2109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.11%) |Training time=0.46s (21.24%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5002|ppo_ep: 1|act_loss: -0.00066375732421875|cri_loss: -0.0001690387725830078|unsuper_loss: 0.0
+average reward score: 4.44140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.70%) |Training time=0.46s (21.08%) |Others=0.11 (5.23%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5003|ppo_ep: 1|act_loss: -0.00406646728515625|cri_loss: -0.0018291473388671875|unsuper_loss: 0.0
+average reward score: 4.70703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.08%) |Training time=0.46s (21.26%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5004|ppo_ep: 1|act_loss: -0.012481689453125|cri_loss: -0.00572967529296875|unsuper_loss: 0.0
+average reward score: 5.55859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.45s (21.06%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5005|ppo_ep: 1|act_loss: -0.0100860595703125|cri_loss: -0.00489044189453125|unsuper_loss: 0.0
+average reward score: 4.91796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.06%) |Training time=0.46s (21.27%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5006|ppo_ep: 1|act_loss: 0.0216827392578125|cri_loss: 0.01190185546875|unsuper_loss: 0.0
+average reward score: 5.67578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.16%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5007|ppo_ep: 1|act_loss: -0.004222869873046875|cri_loss: -0.0015840530395507812|unsuper_loss: 0.0
+average reward score: 5.71484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.29%) |Training time=0.45s (21.07%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5008|ppo_ep: 1|act_loss: 0.00676727294921875|cri_loss: 0.003551483154296875|unsuper_loss: 0.0
+average reward score: 6.16015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.18%) |Training time=0.46s (21.18%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.47
+[2023-04-14 11:52:05,635] [INFO] [logging.py:96:log_dist] [Rank 0] step=5010, skipped=70, lr=[3.4332671825092336e-06, 3.4332671825092336e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:52:05,654] [INFO] [timer.py:199:stop] epoch=0/micro_step=5010/global_step=5010, RunningAvgSamplesPerSec=105.14970221940123, CurrSamplesPerSec=110.66372811649983, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:52:05,747] [INFO] [logging.py:96:log_dist] [Rank 0] step=5010, skipped=82, lr=[1.7899616382401935e-06, 1.7899616382401935e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5009|ppo_ep: 1|act_loss: 0.01934814453125|cri_loss: 0.0098876953125|unsuper_loss: 0.0
+average reward score: 5.3046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.36%) |Training time=0.45s (20.99%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5010|ppo_ep: 1|act_loss: 0.0182952880859375|cri_loss: 0.0097503662109375|unsuper_loss: 0.0
+average reward score: 4.62109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.46s (21.16%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5011|ppo_ep: 1|act_loss: 0.02813720703125|cri_loss: 0.01523590087890625|unsuper_loss: 0.0
+average reward score: 6.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.16%) |Training time=0.46s (21.19%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5012|ppo_ep: 1|act_loss: 0.058380126953125|cri_loss: 0.0302886962890625|unsuper_loss: 0.0
+average reward score: 6.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.58%) |Training time=0.45s (20.86%) |Others=0.12 (5.56%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5013|ppo_ep: 1|act_loss: -0.017242431640625|cri_loss: -0.00830841064453125|unsuper_loss: 0.0
+average reward score: 6.109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.73s (75.64%) |Training time=0.46s (19.97%) |Others=0.10 (4.39%)|CurSamplesPerSec=13.98 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5014|ppo_ep: 1|act_loss: -0.01389312744140625|cri_loss: -0.0064849853515625|unsuper_loss: 0.0
+average reward score: 4.3046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.09%) |Training time=0.46s (21.27%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5015|ppo_ep: 1|act_loss: 0.0277099609375|cri_loss: 0.0140838623046875|unsuper_loss: 0.0
+average reward score: 5.85546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.17%) |Training time=0.46s (21.20%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5016|ppo_ep: 1|act_loss: 0.0049591064453125|cri_loss: 0.002826690673828125|unsuper_loss: 0.0
+average reward score: 4.80078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.45s (20.81%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5017|ppo_ep: 1|act_loss: 0.002994537353515625|cri_loss: 0.0016851425170898438|unsuper_loss: 0.0
+average reward score: 5.60546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (20.97%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5018|ppo_ep: 1|act_loss: 0.0037860870361328125|cri_loss: 0.0021533966064453125|unsuper_loss: 0.0
+average reward score: 5.61328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.18%) |Training time=0.46s (21.17%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.47
+[2023-04-14 11:52:27,314] [INFO] [logging.py:96:log_dist] [Rank 0] step=5020, skipped=70, lr=[3.4154908721450375e-06, 3.4154908721450375e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:52:27,333] [INFO] [timer.py:199:stop] epoch=0/micro_step=5020/global_step=5020, RunningAvgSamplesPerSec=105.15842373077857, CurrSamplesPerSec=111.53061617927632, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:52:27,425] [INFO] [logging.py:96:log_dist] [Rank 0] step=5020, skipped=82, lr=[1.7807383049117185e-06, 1.7807383049117185e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5019|ppo_ep: 1|act_loss: -0.00528717041015625|cri_loss: -0.0023040771484375|unsuper_loss: 0.0
+average reward score: 5.6640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.44%) |Training time=0.45s (20.91%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5020|ppo_ep: 1|act_loss: -0.02325439453125|cri_loss: -0.01102447509765625|unsuper_loss: 0.0
+average reward score: 5.89453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.74%) |Training time=0.45s (20.60%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5021|ppo_ep: 1|act_loss: -0.0023365020751953125|cri_loss: -0.0009179115295410156|unsuper_loss: 0.0
+average reward score: 4.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.46%) |Training time=0.45s (20.89%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5022|ppo_ep: 1|act_loss: 0.02752685546875|cri_loss: 0.014404296875|unsuper_loss: 0.0
+average reward score: 4.6875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.24%) |Training time=0.46s (21.12%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5023|ppo_ep: 1|act_loss: -0.00722503662109375|cri_loss: -0.0035037994384765625|unsuper_loss: 0.0
+average reward score: 5.44140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.23%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5024|ppo_ep: 1|act_loss: -0.030426025390625|cri_loss: -0.01476287841796875|unsuper_loss: 0.0
+average reward score: 6.5
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.39%) |Training time=0.45s (20.97%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5025|ppo_ep: 1|act_loss: -0.03466796875|cri_loss: -0.0169677734375|unsuper_loss: 0.0
+average reward score: 5.98828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.48%) |Training time=0.45s (20.80%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5026|ppo_ep: 1|act_loss: 0.00052642822265625|cri_loss: 0.0003490447998046875|unsuper_loss: 0.0
+average reward score: 4.234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.56%) |Training time=0.45s (19.97%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.20 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5027|ppo_ep: 1|act_loss: 0.012939453125|cri_loss: 0.0068511962890625|unsuper_loss: 0.0
+average reward score: 6.3203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.03%) |Training time=0.46s (21.34%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5028|ppo_ep: 1|act_loss: 0.01593017578125|cri_loss: 0.00855255126953125|unsuper_loss: 0.0
+average reward score: 5.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.34%) |Training time=0.45s (19.32%) |Others=0.10 (4.34%)|CurSamplesPerSec=13.80 |AvgSamplesPerSec=14.47
+[2023-04-14 11:52:49,163] [INFO] [logging.py:96:log_dist] [Rank 0] step=5030, skipped=70, lr=[3.397735454106371e-06, 3.397735454106371e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:52:49,181] [INFO] [timer.py:199:stop] epoch=0/micro_step=5030/global_step=5030, RunningAvgSamplesPerSec=105.16850634145916, CurrSamplesPerSec=111.15689603482346, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:52:49,274] [INFO] [logging.py:96:log_dist] [Rank 0] step=5030, skipped=82, lr=[1.7715256327766887e-06, 1.7715256327766887e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5029|ppo_ep: 1|act_loss: 0.00400543212890625|cri_loss: 0.002166748046875|unsuper_loss: 0.0
+average reward score: 5.8359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.40%) |Training time=0.45s (20.95%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5030|ppo_ep: 1|act_loss: 0.051605224609375|cri_loss: 0.027069091796875|unsuper_loss: 0.0
+average reward score: 5.23828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.30%) |Training time=0.46s (21.10%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5031|ppo_ep: 1|act_loss: 0.003597259521484375|cri_loss: 0.0019397735595703125|unsuper_loss: 0.0
+average reward score: 5.23046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.34%) |Training time=0.45s (21.01%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5032|ppo_ep: 1|act_loss: 0.017059326171875|cri_loss: 0.0095672607421875|unsuper_loss: 0.0
+average reward score: 6.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.22%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5033|ppo_ep: 1|act_loss: 0.00244903564453125|cri_loss: 0.00151824951171875|unsuper_loss: 0.0
+average reward score: 5.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.16%) |Training time=0.45s (20.97%) |Others=0.10 (4.87%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5034|ppo_ep: 1|act_loss: -0.0243072509765625|cri_loss: -0.01183319091796875|unsuper_loss: 0.0
+average reward score: 4.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.45s (21.07%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5035|ppo_ep: 1|act_loss: -0.00789642333984375|cri_loss: -0.0035724639892578125|unsuper_loss: 0.0
+average reward score: 5.9140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.89%) |Training time=0.47s (21.48%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5036|ppo_ep: 1|act_loss: -0.0139312744140625|cri_loss: -0.006259918212890625|unsuper_loss: 0.0
+average reward score: 5.37890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.07%) |Training time=0.46s (21.23%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5037|ppo_ep: 1|act_loss: 0.03485107421875|cri_loss: 0.0181884765625|unsuper_loss: 0.0
+average reward score: 5.23046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.86%) |Training time=0.44s (20.45%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5038|ppo_ep: 1|act_loss: -0.0124359130859375|cri_loss: -0.005916595458984375|unsuper_loss: 0.0
+average reward score: 4.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.44s (20.76%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.47
+[2023-04-14 11:53:10,712] [INFO] [logging.py:96:log_dist] [Rank 0] step=5040, skipped=70, lr=[3.380001191571363e-06, 3.380001191571363e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:53:10,731] [INFO] [timer.py:199:stop] epoch=0/micro_step=5040/global_step=5040, RunningAvgSamplesPerSec=105.17816044808174, CurrSamplesPerSec=113.32568471926469, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:53:10,823] [INFO] [logging.py:96:log_dist] [Rank 0] step=5040, skipped=82, lr=[1.7623237583891302e-06, 1.7623237583891302e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5039|ppo_ep: 1|act_loss: 0.00580596923828125|cri_loss: 0.0030307769775390625|unsuper_loss: 0.0
+average reward score: 6.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.57%) |Training time=0.44s (20.76%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5040|ppo_ep: 1|act_loss: 0.0025310516357421875|cri_loss: 0.0014247894287109375|unsuper_loss: 0.0
+average reward score: 5.55078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.55%) |Training time=0.45s (20.79%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5041|ppo_ep: 1|act_loss: 0.0207977294921875|cri_loss: 0.0117645263671875|unsuper_loss: 0.0
+average reward score: 5.35546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.51%) |Training time=0.45s (20.80%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5042|ppo_ep: 1|act_loss: -0.002902984619140625|cri_loss: -0.0012054443359375|unsuper_loss: 0.0
+average reward score: 6.1796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.61%) |Training time=0.44s (20.71%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5043|ppo_ep: 1|act_loss: -0.0191192626953125|cri_loss: -0.0092010498046875|unsuper_loss: 0.0
+average reward score: 6.94140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.54%) |Training time=0.47s (20.16%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5044|ppo_ep: 1|act_loss: 0.036712646484375|cri_loss: 0.019195556640625|unsuper_loss: 0.0
+average reward score: 5.23046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.28%) |Training time=0.45s (21.07%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5045|ppo_ep: 1|act_loss: -0.0135040283203125|cri_loss: -0.00640869140625|unsuper_loss: 0.0
+average reward score: 5.015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.92%) |Training time=0.44s (20.39%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.47
+epoch: 0|step: 5046|ppo_ep: 1|act_loss: -0.01305389404296875|cri_loss: -0.005558013916015625|unsuper_loss: 0.0
+average reward score: 5.72265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.61%) |Training time=0.44s (20.73%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5047|ppo_ep: 1|act_loss: 0.01013946533203125|cri_loss: 0.005229949951171875|unsuper_loss: 0.0
+average reward score: 4.7109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.42%) |Training time=0.45s (20.92%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5048|ppo_ep: 1|act_loss: -0.0214691162109375|cri_loss: -0.010406494140625|unsuper_loss: 0.0
+average reward score: 5.8828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.71%) |Training time=0.44s (20.62%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.48
+[2023-04-14 11:53:32,354] [INFO] [logging.py:96:log_dist] [Rank 0] step=5050, skipped=70, lr=[3.3622883474045655e-06, 3.3622883474045655e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:53:32,372] [INFO] [timer.py:199:stop] epoch=0/micro_step=5050/global_step=5050, RunningAvgSamplesPerSec=105.19147333167017, CurrSamplesPerSec=115.78938432684527, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:53:32,465] [INFO] [logging.py:96:log_dist] [Rank 0] step=5050, skipped=82, lr=[1.7531328181430188e-06, 1.7531328181430188e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5049|ppo_ep: 1|act_loss: -0.008087158203125|cri_loss: -0.0038394927978515625|unsuper_loss: 0.0
+average reward score: 5.66015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.85%) |Training time=0.44s (20.47%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5050|ppo_ep: 1|act_loss: -0.0283050537109375|cri_loss: -0.01366424560546875|unsuper_loss: 0.0
+average reward score: 4.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.51%) |Training time=0.45s (20.83%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5051|ppo_ep: 1|act_loss: -0.0078582763671875|cri_loss: -0.0035495758056640625|unsuper_loss: 0.0
+average reward score: 3.90234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.23%) |Training time=0.49s (22.18%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5052|ppo_ep: 1|act_loss: 0.010894775390625|cri_loss: 0.005992889404296875|unsuper_loss: 0.0
+average reward score: 6.34375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.46s (21.05%) |Others=0.10 (4.78%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5053|ppo_ep: 1|act_loss: -0.018035888671875|cri_loss: -0.00865936279296875|unsuper_loss: 0.0
+average reward score: 5.54296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.06%) |Training time=0.44s (20.20%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5054|ppo_ep: 1|act_loss: 0.00310516357421875|cri_loss: 0.00226593017578125|unsuper_loss: 0.0
+average reward score: 6.3359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.80%) |Training time=0.45s (20.50%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5055|ppo_ep: 1|act_loss: 0.0013713836669921875|cri_loss: 0.0008897781372070312|unsuper_loss: 0.0
+average reward score: 5.65625
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.48%) |Training time=0.44s (19.84%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.51 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5056|ppo_ep: 1|act_loss: 0.0013103485107421875|cri_loss: 0.0010128021240234375|unsuper_loss: 0.0
+average reward score: 5.65625
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.73s (76.33%) |Training time=0.43s (19.13%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.13 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5057|ppo_ep: 1|act_loss: -0.0010585784912109375|cri_loss: -0.0004253387451171875|unsuper_loss: 0.0
+average reward score: 5.15234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.10%) |Training time=0.46s (20.41%) |Others=0.15 (6.49%)|CurSamplesPerSec=14.27 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5058|ppo_ep: 1|act_loss: 0.0149688720703125|cri_loss: 0.00787353515625|unsuper_loss: 0.0
+average reward score: 5.3125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.50%) |Training time=0.41s (18.78%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.48
+[2023-04-14 11:53:54,288] [INFO] [logging.py:96:log_dist] [Rank 0] step=5060, skipped=70, lr=[3.34459718415306e-06, 3.34459718415306e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:53:54,306] [INFO] [timer.py:199:stop] epoch=0/micro_step=5060/global_step=5060, RunningAvgSamplesPerSec=105.20452398487495, CurrSamplesPerSec=101.51252440100772, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:53:54,399] [INFO] [logging.py:96:log_dist] [Rank 0] step=5060, skipped=82, lr=[1.7439529482702621e-06, 1.7439529482702621e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5059|ppo_ep: 1|act_loss: 0.025421142578125|cri_loss: 0.01319122314453125|unsuper_loss: 0.0
+average reward score: 4.3359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.06%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5060|ppo_ep: 1|act_loss: -0.00881195068359375|cri_loss: -0.00376129150390625|unsuper_loss: 0.0
+average reward score: 5.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.47s (21.76%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5061|ppo_ep: 1|act_loss: -0.020477294921875|cri_loss: -0.0099945068359375|unsuper_loss: 0.0
+average reward score: 5.015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.46s (21.51%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5062|ppo_ep: 1|act_loss: -0.0039825439453125|cri_loss: -0.0017499923706054688|unsuper_loss: 0.0
+average reward score: 6.15234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.47s (21.88%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5063|ppo_ep: 1|act_loss: -0.0006098747253417969|cri_loss: -0.00013113021850585938|unsuper_loss: 0.0
+average reward score: 4.94140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.62%) |Training time=0.47s (21.66%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5064|ppo_ep: 1|act_loss: -0.018096923828125|cri_loss: -0.00875091552734375|unsuper_loss: 0.0
+average reward score: 5.40234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.47s (21.85%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5065|ppo_ep: 1|act_loss: -0.0052947998046875|cri_loss: -0.00177001953125|unsuper_loss: 0.0
+average reward score: 5.28125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.51%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5066|ppo_ep: 1|act_loss: -0.033935546875|cri_loss: -0.0165252685546875|unsuper_loss: 0.0
+average reward score: 6.1953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.06%) |Training time=0.49s (22.25%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5067|ppo_ep: 1|act_loss: -0.0284423828125|cri_loss: -0.012725830078125|unsuper_loss: 0.0
+average reward score: 4.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.47s (21.73%) |Others=0.10 (4.82%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5068|ppo_ep: 1|act_loss: 0.0450439453125|cri_loss: 0.0234527587890625|unsuper_loss: 0.0
+average reward score: 6.26171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.47%) |Training time=0.47s (21.87%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.48
+[2023-04-14 11:54:15,945] [INFO] [logging.py:96:log_dist] [Rank 0] step=5070, skipped=70, lr=[3.326927964042562e-06, 3.326927964042562e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:54:15,963] [INFO] [timer.py:199:stop] epoch=0/micro_step=5070/global_step=5070, RunningAvgSamplesPerSec=105.20114928702438, CurrSamplesPerSec=102.08806252258077, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:54:16,056] [INFO] [logging.py:96:log_dist] [Rank 0] step=5070, skipped=82, lr=[1.734784284838676e-06, 1.734784284838676e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5069|ppo_ep: 1|act_loss: 0.03436279296875|cri_loss: 0.0176849365234375|unsuper_loss: 0.0
+average reward score: 4.53515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (21.96%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5070|ppo_ep: 1|act_loss: 0.022369384765625|cri_loss: 0.0124053955078125|unsuper_loss: 0.0
+average reward score: 4.06640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.92%) |Training time=0.49s (22.42%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5071|ppo_ep: 1|act_loss: 0.0203399658203125|cri_loss: 0.0111236572265625|unsuper_loss: 0.0
+average reward score: 6.125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.79%) |Training time=0.49s (22.55%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5072|ppo_ep: 1|act_loss: 0.00518798828125|cri_loss: 0.0029449462890625|unsuper_loss: 0.0
+average reward score: 4.8125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.17%) |Training time=0.48s (22.16%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5073|ppo_ep: 1|act_loss: -0.034881591796875|cri_loss: -0.01507568359375|unsuper_loss: 0.0
+average reward score: 5.96875
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.68%) |Training time=0.49s (20.99%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5074|ppo_ep: 1|act_loss: 0.001644134521484375|cri_loss: 0.0011005401611328125|unsuper_loss: 0.0
+average reward score: 5.12109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.73%) |Training time=0.49s (22.64%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5075|ppo_ep: 1|act_loss: 0.001613616943359375|cri_loss: 0.004695892333984375|unsuper_loss: 0.0
+average reward score: 4.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.97%) |Training time=0.49s (22.41%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5076|ppo_ep: 1|act_loss: -0.02197265625|cri_loss: -0.01043701171875|unsuper_loss: 0.0
+average reward score: 4.91015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.78%) |Training time=0.49s (22.57%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5077|ppo_ep: 1|act_loss: -0.00782012939453125|cri_loss: -0.0032196044921875|unsuper_loss: 0.0
+average reward score: 5.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.26%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5078|ppo_ep: 1|act_loss: 0.00946044921875|cri_loss: 0.00511932373046875|unsuper_loss: 0.0
+average reward score: 5.64453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.66%) |Training time=0.49s (22.74%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.48
+[2023-04-14 11:54:37,791] [INFO] [logging.py:96:log_dist] [Rank 0] step=5080, skipped=70, lr=[3.309280948973539e-06, 3.309280948973539e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:54:37,809] [INFO] [timer.py:199:stop] epoch=0/micro_step=5080/global_step=5080, RunningAvgSamplesPerSec=105.1869830044589, CurrSamplesPerSec=97.66836872434452, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:54:37,902] [INFO] [logging.py:96:log_dist] [Rank 0] step=5080, skipped=82, lr=[1.725626963749971e-06, 1.725626963749971e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5079|ppo_ep: 1|act_loss: 0.003009796142578125|cri_loss: 0.00188446044921875|unsuper_loss: 0.0
+average reward score: 5.93359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.81%) |Training time=0.49s (22.58%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5080|ppo_ep: 1|act_loss: 0.009307861328125|cri_loss: 0.00501251220703125|unsuper_loss: 0.0
+average reward score: 4.3515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.95%) |Training time=0.49s (22.45%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5081|ppo_ep: 1|act_loss: 0.03912353515625|cri_loss: 0.0212554931640625|unsuper_loss: 0.0
+average reward score: 4.7578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.16%) |Training time=0.46s (21.19%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5082|ppo_ep: 1|act_loss: -0.02783203125|cri_loss: -0.0126953125|unsuper_loss: 0.0
+average reward score: 5.19140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.29%) |Training time=0.48s (22.09%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5083|ppo_ep: 1|act_loss: -0.0003559589385986328|cri_loss: 4.00543212890625e-05|unsuper_loss: 0.0
+average reward score: 5.48828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (21.91%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5084|ppo_ep: 1|act_loss: -0.01495361328125|cri_loss: -0.007282257080078125|unsuper_loss: 0.0
+average reward score: 6.19140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.45%) |Training time=0.44s (19.90%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5085|ppo_ep: 1|act_loss: -0.029693603515625|cri_loss: -0.01422119140625|unsuper_loss: 0.0
+average reward score: 6.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.73s (73.78%) |Training time=0.50s (21.41%) |Others=0.11 (4.80%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5086|ppo_ep: 1|act_loss: -0.016754150390625|cri_loss: -0.0082244873046875|unsuper_loss: 0.0
+average reward score: 4.765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.00%) |Training time=0.46s (21.31%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5087|ppo_ep: 1|act_loss: -0.0077972412109375|cri_loss: -0.003520965576171875|unsuper_loss: 0.0
+average reward score: 5.06640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.05%) |Training time=0.49s (20.63%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.50 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5088|ppo_ep: 1|act_loss: -0.0093994140625|cri_loss: -0.004535675048828125|unsuper_loss: 0.0
+average reward score: 6.8359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.20%) |Training time=0.46s (21.04%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.48
+[2023-04-14 11:54:59,905] [INFO] [logging.py:96:log_dist] [Rank 0] step=5090, skipped=70, lr=[3.291656400517325e-06, 3.291656400517325e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:54:59,923] [INFO] [timer.py:199:stop] epoch=0/micro_step=5090/global_step=5090, RunningAvgSamplesPerSec=105.1857576987737, CurrSamplesPerSec=112.81056883645637, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:55:00,016] [INFO] [logging.py:96:log_dist] [Rank 0] step=5090, skipped=82, lr=[1.7164811207377364e-06, 1.7164811207377364e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5089|ppo_ep: 1|act_loss: 0.03240966796875|cri_loss: 0.016815185546875|unsuper_loss: 0.0
+average reward score: 5.37890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.76%) |Training time=0.45s (20.55%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5090|ppo_ep: 1|act_loss: 0.01641845703125|cri_loss: 0.00855255126953125|unsuper_loss: 0.0
+average reward score: 4.61328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.59%) |Training time=0.45s (20.72%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5091|ppo_ep: 1|act_loss: 0.004547119140625|cri_loss: 0.002780914306640625|unsuper_loss: 0.0
+average reward score: 4.9609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.77%) |Training time=0.47s (21.56%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5092|ppo_ep: 1|act_loss: 0.001316070556640625|cri_loss: 0.0007786750793457031|unsuper_loss: 0.0
+average reward score: 5.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.94%) |Training time=0.46s (21.36%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5093|ppo_ep: 1|act_loss: 0.020843505859375|cri_loss: 0.01065826416015625|unsuper_loss: 0.0
+average reward score: 3.98046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.91%) |Training time=0.46s (21.38%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5094|ppo_ep: 1|act_loss: 0.0036144256591796875|cri_loss: 0.002353668212890625|unsuper_loss: 0.0
+average reward score: 4.765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.63%) |Training time=0.47s (21.71%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5095|ppo_ep: 1|act_loss: -0.01236724853515625|cri_loss: -0.005863189697265625|unsuper_loss: 0.0
+average reward score: 5.55859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.80%) |Training time=0.47s (21.51%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5096|ppo_ep: 1|act_loss: -0.01849365234375|cri_loss: -0.00894927978515625|unsuper_loss: 0.0
+average reward score: 6.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.37%) |Training time=0.48s (21.97%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5097|ppo_ep: 1|act_loss: 0.0105133056640625|cri_loss: 0.0054931640625|unsuper_loss: 0.0
+average reward score: 5.24609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.27%) |Training time=0.46s (21.04%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5098|ppo_ep: 1|act_loss: -0.031341552734375|cri_loss: -0.01502227783203125|unsuper_loss: 0.0
+average reward score: 4.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.57%) |Training time=0.45s (20.76%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.48
+[2023-04-14 11:55:21,634] [INFO] [logging.py:96:log_dist] [Rank 0] step=5100, skipped=70, lr=[3.2740545799122467e-06, 3.2740545799122467e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:55:21,653] [INFO] [timer.py:199:stop] epoch=0/micro_step=5100/global_step=5100, RunningAvgSamplesPerSec=105.18997521642397, CurrSamplesPerSec=117.28935534330132, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:55:21,745] [INFO] [logging.py:96:log_dist] [Rank 0] step=5100, skipped=82, lr=[1.707346891365429e-06, 1.707346891365429e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5099|ppo_ep: 1|act_loss: -0.01549530029296875|cri_loss: -0.007568359375|unsuper_loss: 0.0
+average reward score: 4.921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.24%) |Training time=0.44s (20.11%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5100|ppo_ep: 1|act_loss: 0.0120697021484375|cri_loss: 0.006183624267578125|unsuper_loss: 0.0
+average reward score: 5.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.84%) |Training time=0.44s (20.44%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5101|ppo_ep: 1|act_loss: 0.01096343994140625|cri_loss: 0.005687713623046875|unsuper_loss: 0.0
+average reward score: 4.8515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.55%) |Training time=0.44s (20.76%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5102|ppo_ep: 1|act_loss: -0.017669677734375|cri_loss: -0.00824737548828125|unsuper_loss: 0.0
+average reward score: 5.90625
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.65s (73.06%) |Training time=0.51s (22.46%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5103|ppo_ep: 1|act_loss: -0.026336669921875|cri_loss: -0.01267242431640625|unsuper_loss: 0.0
+average reward score: 4.98046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.46%) |Training time=0.45s (20.89%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5104|ppo_ep: 1|act_loss: 0.019775390625|cri_loss: 0.01068115234375|unsuper_loss: 0.0
+average reward score: 5.125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.58%) |Training time=0.44s (20.71%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5105|ppo_ep: 1|act_loss: 0.024261474609375|cri_loss: 0.0125885009765625|unsuper_loss: 0.0
+average reward score: 5.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.62%) |Training time=0.44s (20.69%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5106|ppo_ep: 1|act_loss: -0.05169677734375|cri_loss: -0.0247955322265625|unsuper_loss: 0.0
+average reward score: 4.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.44%) |Training time=0.45s (20.89%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5107|ppo_ep: 1|act_loss: -0.0169830322265625|cri_loss: -0.00818634033203125|unsuper_loss: 0.0
+average reward score: 5.28515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.50%) |Training time=0.45s (20.79%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5108|ppo_ep: 1|act_loss: 0.0013885498046875|cri_loss: 0.0008983612060546875|unsuper_loss: 0.0
+average reward score: 4.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.33%) |Training time=0.45s (20.98%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.48
+[2023-04-14 11:55:43,186] [INFO] [logging.py:96:log_dist] [Rank 0] step=5110, skipped=70, lr=[3.256475748059745e-06, 3.256475748059745e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:55:43,204] [INFO] [timer.py:199:stop] epoch=0/micro_step=5110/global_step=5110, RunningAvgSamplesPerSec=105.20440380442844, CurrSamplesPerSec=112.62106255936578, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:55:43,297] [INFO] [logging.py:96:log_dist] [Rank 0] step=5110, skipped=82, lr=[1.6982244110243626e-06, 1.6982244110243626e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5109|ppo_ep: 1|act_loss: 0.00341033935546875|cri_loss: 0.001926422119140625|unsuper_loss: 0.0
+average reward score: 5.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.49%) |Training time=0.45s (20.83%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5110|ppo_ep: 1|act_loss: 0.006072998046875|cri_loss: 0.0032405853271484375|unsuper_loss: 0.0
+average reward score: 6.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.32%) |Training time=0.45s (21.02%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5111|ppo_ep: 1|act_loss: -0.02947998046875|cri_loss: -0.01439666748046875|unsuper_loss: 0.0
+average reward score: 7.17578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.19%) |Training time=0.45s (21.15%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5112|ppo_ep: 1|act_loss: -0.005096435546875|cri_loss: -0.00234222412109375|unsuper_loss: 0.0
+average reward score: 4.328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.46s (21.19%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5113|ppo_ep: 1|act_loss: -0.017669677734375|cri_loss: -0.00860595703125|unsuper_loss: 0.0
+average reward score: 5.546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.03%) |Training time=0.46s (21.34%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5114|ppo_ep: 1|act_loss: -0.01715087890625|cri_loss: -0.0082550048828125|unsuper_loss: 0.0
+average reward score: 5.71875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.95%) |Training time=0.44s (20.41%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5115|ppo_ep: 1|act_loss: -0.0175323486328125|cri_loss: -0.00847625732421875|unsuper_loss: 0.0
+average reward score: 5.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (75.06%) |Training time=0.46s (20.48%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.19 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5116|ppo_ep: 1|act_loss: 0.01445770263671875|cri_loss: 0.007415771484375|unsuper_loss: 0.0
+average reward score: 4.45703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.78%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5117|ppo_ep: 1|act_loss: 0.035400390625|cri_loss: 0.018798828125|unsuper_loss: 0.0
+average reward score: 5.4296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.64s (71.04%) |Training time=0.55s (23.99%) |Others=0.11 (4.96%)|CurSamplesPerSec=13.84 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5118|ppo_ep: 1|act_loss: 0.00244140625|cri_loss: 0.001384735107421875|unsuper_loss: 0.0
+average reward score: 6.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.85%) |Training time=0.46s (21.50%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.48
+[2023-04-14 11:56:05,008] [INFO] [logging.py:96:log_dist] [Rank 0] step=5120, skipped=70, lr=[3.2389201655205164e-06, 3.2389201655205164e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:56:05,026] [INFO] [timer.py:199:stop] epoch=0/micro_step=5120/global_step=5120, RunningAvgSamplesPerSec=105.20632463741921, CurrSamplesPerSec=105.29688544396153, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:56:05,120] [INFO] [logging.py:96:log_dist] [Rank 0] step=5120, skipped=82, lr=[1.6891138149317038e-06, 1.6891138149317038e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5119|ppo_ep: 1|act_loss: 0.02301025390625|cri_loss: 0.011810302734375|unsuper_loss: 0.0
+average reward score: 5.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.65%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5120|ppo_ep: 1|act_loss: 0.00733184814453125|cri_loss: 0.00391387939453125|unsuper_loss: 0.0
+average reward score: 5.390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.85%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5121|ppo_ep: 1|act_loss: 0.0180816650390625|cri_loss: 0.00943756103515625|unsuper_loss: 0.0
+average reward score: 5.26171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.61%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5122|ppo_ep: 1|act_loss: 0.0029296875|cri_loss: 0.00183868408203125|unsuper_loss: 0.0
+average reward score: 4.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.58%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5123|ppo_ep: 1|act_loss: 0.0094451904296875|cri_loss: 0.004840850830078125|unsuper_loss: 0.0
+average reward score: 5.57421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.60%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5124|ppo_ep: 1|act_loss: -0.0184326171875|cri_loss: -0.00870513916015625|unsuper_loss: 0.0
+average reward score: 5.76171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.64%) |Training time=0.47s (21.73%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5125|ppo_ep: 1|act_loss: -0.023223876953125|cri_loss: -0.01103973388671875|unsuper_loss: 0.0
+average reward score: 5.48046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.90%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5126|ppo_ep: 1|act_loss: -0.01800537109375|cri_loss: -0.00847625732421875|unsuper_loss: 0.0
+average reward score: 5.42578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.75%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5127|ppo_ep: 1|act_loss: -0.0146942138671875|cri_loss: -0.00698089599609375|unsuper_loss: 0.0
+average reward score: 5.24609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.28%) |Training time=0.48s (22.12%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5128|ppo_ep: 1|act_loss: 0.027069091796875|cri_loss: 0.01407623291015625|unsuper_loss: 0.0
+average reward score: 5.0859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.13%) |Training time=0.48s (22.26%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.48
+[2023-04-14 11:56:26,608] [INFO] [logging.py:96:log_dist] [Rank 0] step=5130, skipped=70, lr=[3.221388092510643e-06, 3.221388092510643e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:56:26,627] [INFO] [timer.py:199:stop] epoch=0/micro_step=5130/global_step=5130, RunningAvgSamplesPerSec=105.2030162387455, CurrSamplesPerSec=103.4807402444506, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:56:26,719] [INFO] [logging.py:96:log_dist] [Rank 0] step=5130, skipped=82, lr=[1.6800152381284634e-06, 1.6800152381284634e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5129|ppo_ep: 1|act_loss: 0.0220489501953125|cri_loss: 0.0125732421875|unsuper_loss: 0.0
+average reward score: 5.390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.90%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5130|ppo_ep: 1|act_loss: -0.0015277862548828125|cri_loss: -0.000614166259765625|unsuper_loss: 0.0
+average reward score: 5.55078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (21.81%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5131|ppo_ep: 1|act_loss: 5.0961971282958984e-05|cri_loss: 0.00015354156494140625|unsuper_loss: 0.0
+average reward score: 5.3515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.73%) |Training time=0.47s (21.63%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5132|ppo_ep: 1|act_loss: 0.022308349609375|cri_loss: 0.0114288330078125|unsuper_loss: 0.0
+average reward score: 5.109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.24%) |Training time=0.51s (23.17%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5133|ppo_ep: 1|act_loss: -0.03240966796875|cri_loss: -0.01531219482421875|unsuper_loss: 0.0
+average reward score: 5.75390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.34%) |Training time=0.45s (21.02%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5134|ppo_ep: 1|act_loss: -0.001499176025390625|cri_loss: -0.0005617141723632812|unsuper_loss: 0.0
+average reward score: 4.89453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.60%) |Training time=0.47s (21.77%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5135|ppo_ep: 1|act_loss: -0.013702392578125|cri_loss: -0.00635528564453125|unsuper_loss: 0.0
+average reward score: 5.03515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.73%) |Training time=0.50s (22.69%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5136|ppo_ep: 1|act_loss: -0.02093505859375|cri_loss: -0.01007080078125|unsuper_loss: 0.0
+average reward score: 6.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.02%) |Training time=0.49s (22.37%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5137|ppo_ep: 1|act_loss: -0.005767822265625|cri_loss: -0.0024394989013671875|unsuper_loss: 0.0
+average reward score: 4.74609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.88%) |Training time=0.49s (22.55%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5138|ppo_ep: 1|act_loss: -0.01102447509765625|cri_loss: -0.00304412841796875|unsuper_loss: 0.0
+average reward score: 5.5234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.22%) |Training time=0.48s (22.15%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.48
+[2023-04-14 11:56:48,341] [INFO] [logging.py:96:log_dist] [Rank 0] step=5140, skipped=70, lr=[3.203879788897743e-06, 3.203879788897743e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:56:48,359] [INFO] [timer.py:199:stop] epoch=0/micro_step=5140/global_step=5140, RunningAvgSamplesPerSec=105.19522715135726, CurrSamplesPerSec=101.88409671789803, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:56:48,452] [INFO] [logging.py:96:log_dist] [Rank 0] step=5140, skipped=82, lr=[1.6709288154775e-06, 1.6709288154775e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5139|ppo_ep: 1|act_loss: 0.0029087066650390625|cri_loss: 0.0019779205322265625|unsuper_loss: 0.0
+average reward score: 5.828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (21.96%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5140|ppo_ep: 1|act_loss: -0.0158843994140625|cri_loss: -0.007659912109375|unsuper_loss: 0.0
+average reward score: 5.62890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.00%) |Training time=0.49s (22.41%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5141|ppo_ep: 1|act_loss: 0.006500244140625|cri_loss: 0.0035247802734375|unsuper_loss: 0.0
+average reward score: 4.609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.65%) |Training time=0.47s (21.71%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5142|ppo_ep: 1|act_loss: -0.00617218017578125|cri_loss: -0.0020294189453125|unsuper_loss: 0.0
+average reward score: 5.5703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.31%) |Training time=0.46s (21.09%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5143|ppo_ep: 1|act_loss: 0.007320404052734375|cri_loss: 0.003997802734375|unsuper_loss: 0.0
+average reward score: 4.8203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.68%) |Training time=0.47s (21.43%) |Others=0.11 (4.89%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5144|ppo_ep: 1|act_loss: 0.0160675048828125|cri_loss: 0.00841522216796875|unsuper_loss: 0.0
+average reward score: 4.828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.62s (71.75%) |Training time=0.54s (23.79%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.21 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5145|ppo_ep: 1|act_loss: 0.04425048828125|cri_loss: 0.0236968994140625|unsuper_loss: 0.0
+average reward score: 5.625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.37%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5146|ppo_ep: 1|act_loss: 0.02178955078125|cri_loss: 0.01132965087890625|unsuper_loss: 0.0
+average reward score: 4.28515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.46s (21.56%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5147|ppo_ep: 1|act_loss: 0.03814697265625|cri_loss: 0.019622802734375|unsuper_loss: 0.0
+average reward score: 5.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.73%) |Training time=0.47s (21.64%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5148|ppo_ep: 1|act_loss: 0.012054443359375|cri_loss: 0.0062408447265625|unsuper_loss: 0.0
+average reward score: 4.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.19%) |Training time=0.51s (23.24%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.48
+[2023-04-14 11:57:10,149] [INFO] [logging.py:96:log_dist] [Rank 0] step=5150, skipped=70, lr=[3.186395514197109e-06, 3.186395514197109e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:57:10,168] [INFO] [timer.py:199:stop] epoch=0/micro_step=5150/global_step=5150, RunningAvgSamplesPerSec=105.18848918908238, CurrSamplesPerSec=107.31580260338376, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:57:10,261] [INFO] [logging.py:96:log_dist] [Rank 0] step=5150, skipped=82, lr=[1.6618546816615162e-06, 1.6618546816615162e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5149|ppo_ep: 1|act_loss: 0.004528045654296875|cri_loss: 0.002605438232421875|unsuper_loss: 0.0
+average reward score: 4.08984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.79%) |Training time=0.47s (21.59%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5150|ppo_ep: 1|act_loss: -0.020782470703125|cri_loss: -0.01001739501953125|unsuper_loss: 0.0
+average reward score: 4.7421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.82%) |Training time=0.47s (21.55%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5151|ppo_ep: 1|act_loss: 0.0063323974609375|cri_loss: 0.0055999755859375|unsuper_loss: 0.0
+average reward score: 4.18359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.10%) |Training time=0.46s (21.23%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5152|ppo_ep: 1|act_loss: -0.0015277862548828125|cri_loss: -0.0006008148193359375|unsuper_loss: 0.0
+average reward score: 4.44140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.73%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5153|ppo_ep: 1|act_loss: 0.0142669677734375|cri_loss: 0.0084686279296875|unsuper_loss: 0.0
+average reward score: 5.046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.42%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5154|ppo_ep: 1|act_loss: -0.004833221435546875|cri_loss: -0.002147674560546875|unsuper_loss: 0.0
+average reward score: 5.59375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.58%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5155|ppo_ep: 1|act_loss: -0.0033206939697265625|cri_loss: -0.0005054473876953125|unsuper_loss: 0.0
+average reward score: 5.3125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.65%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5156|ppo_ep: 1|act_loss: -0.02227783203125|cri_loss: -0.01050567626953125|unsuper_loss: 0.0
+average reward score: 5.1875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.32%) |Training time=0.45s (21.00%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5157|ppo_ep: 1|act_loss: -0.030426025390625|cri_loss: -0.01453399658203125|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.50%) |Training time=0.45s (20.74%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5158|ppo_ep: 1|act_loss: 0.0006160736083984375|cri_loss: 0.0019016265869140625|unsuper_loss: 0.0
+average reward score: 5.59375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.50%) |Training time=0.45s (20.83%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.48
+[2023-04-14 11:57:31,697] [INFO] [logging.py:96:log_dist] [Rank 0] step=5160, skipped=70, lr=[3.1689355275678734e-06, 3.1689355275678734e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:57:31,715] [INFO] [timer.py:199:stop] epoch=0/micro_step=5160/global_step=5160, RunningAvgSamplesPerSec=105.19364901967187, CurrSamplesPerSec=107.49044406429995, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:57:31,809] [INFO] [logging.py:96:log_dist] [Rank 0] step=5160, skipped=82, lr=[1.652792971181065e-06, 1.652792971181065e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5159|ppo_ep: 1|act_loss: -0.00487518310546875|cri_loss: -0.0021419525146484375|unsuper_loss: 0.0
+average reward score: 6.12890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.36%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5160|ppo_ep: 1|act_loss: 0.019805908203125|cri_loss: 0.011138916015625|unsuper_loss: 0.0
+average reward score: 5.14453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.45s (21.07%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5161|ppo_ep: 1|act_loss: 0.0233612060546875|cri_loss: 0.012054443359375|unsuper_loss: 0.0
+average reward score: 5.2734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.43%) |Training time=0.45s (20.91%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5162|ppo_ep: 1|act_loss: 0.00848388671875|cri_loss: 0.004398345947265625|unsuper_loss: 0.0
+average reward score: 5.92578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.96%) |Training time=0.46s (21.42%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5163|ppo_ep: 1|act_loss: 0.00827789306640625|cri_loss: 0.004608154296875|unsuper_loss: 0.0
+average reward score: 5.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.19%) |Training time=0.48s (20.50%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5164|ppo_ep: 1|act_loss: 0.013702392578125|cri_loss: 0.00714874267578125|unsuper_loss: 0.0
+average reward score: 4.94140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.23%) |Training time=0.45s (21.11%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5165|ppo_ep: 1|act_loss: 0.0002040863037109375|cri_loss: 0.00017511844635009766|unsuper_loss: 0.0
+average reward score: 4.8125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.33%) |Training time=0.45s (21.03%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5166|ppo_ep: 1|act_loss: -0.012786865234375|cri_loss: -0.006092071533203125|unsuper_loss: 0.0
+average reward score: 5.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.39%) |Training time=0.45s (20.95%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5167|ppo_ep: 1|act_loss: -0.0180206298828125|cri_loss: -0.0085296630859375|unsuper_loss: 0.0
+average reward score: 5.51171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.22%) |Training time=0.46s (21.14%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5168|ppo_ep: 1|act_loss: 0.002552032470703125|cri_loss: 0.0014467239379882812|unsuper_loss: 0.0
+average reward score: 5.18359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.45s (21.09%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.48
+[2023-04-14 11:57:53,391] [INFO] [logging.py:96:log_dist] [Rank 0] step=5170, skipped=70, lr=[3.151500087809158e-06, 3.151500087809158e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:57:53,410] [INFO] [timer.py:199:stop] epoch=0/micro_step=5170/global_step=5170, RunningAvgSamplesPerSec=105.20113500419411, CurrSamplesPerSec=112.40855699709468, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:57:53,494] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+[2023-04-14 11:57:53,494] [INFO] [logging.py:96:log_dist] [Rank 0] step=5170, skipped=83, lr=[1.6446481647168905e-06, 1.6446481647168905e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5169|ppo_ep: 1|act_loss: -0.0045623779296875|cri_loss: -0.00177001953125|unsuper_loss: 0.0
+average reward score: 4.703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.93%) |Training time=0.45s (20.91%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.48
+[2023-04-14 11:57:55,633] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 5170|ppo_ep: 1|act_loss: 0.01143646240234375|cri_loss: 0.00624847412109375|unsuper_loss: 0.0
+average reward score: 5.3203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.79%) |Training time=0.45s (21.02%) |Others=0.09 (4.18%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5171|ppo_ep: 1|act_loss: 0.0195159912109375|cri_loss: 0.01044464111328125|unsuper_loss: 0.0
+average reward score: 5.30859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.68%) |Training time=0.44s (20.66%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5172|ppo_ep: 1|act_loss: -0.0211944580078125|cri_loss: -0.0100250244140625|unsuper_loss: 0.0
+average reward score: 5.875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.90%) |Training time=0.44s (20.23%) |Others=0.11 (4.87%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5173|ppo_ep: 1|act_loss: -0.00702667236328125|cri_loss: -0.0029582977294921875|unsuper_loss: 0.0
+average reward score: 5.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.37%) |Training time=0.43s (20.02%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5174|ppo_ep: 1|act_loss: -0.0139312744140625|cri_loss: -0.006595611572265625|unsuper_loss: 0.0
+average reward score: 6.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.71s (76.28%) |Training time=0.43s (19.25%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.26 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5175|ppo_ep: 1|act_loss: 0.001678466796875|cri_loss: 0.00147247314453125|unsuper_loss: 0.0
+average reward score: 4.75390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.45s (21.15%) |Others=0.10 (4.82%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5176|ppo_ep: 1|act_loss: -0.0050201416015625|cri_loss: -0.0023059844970703125|unsuper_loss: 0.0
+average reward score: 4.6171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.21%) |Training time=0.45s (21.12%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5177|ppo_ep: 1|act_loss: 0.0244293212890625|cri_loss: 0.01271820068359375|unsuper_loss: 0.0
+average reward score: 5.6875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.86%) |Training time=0.46s (21.47%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5178|ppo_ep: 1|act_loss: 0.016876220703125|cri_loss: 0.008758544921875|unsuper_loss: 0.0
+average reward score: 6.0859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (76.19%) |Training time=0.45s (19.46%) |Others=0.10 (4.35%)|CurSamplesPerSec=13.82 |AvgSamplesPerSec=14.48
+[2023-04-14 11:58:15,143] [INFO] [logging.py:96:log_dist] [Rank 0] step=5180, skipped=70, lr=[3.13408945335624e-06, 3.13408945335624e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:58:15,161] [INFO] [timer.py:199:stop] epoch=0/micro_step=5180/global_step=5180, RunningAvgSamplesPerSec=105.21499798949904, CurrSamplesPerSec=114.18577494421204, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:58:15,254] [INFO] [logging.py:96:log_dist] [Rank 0] step=5180, skipped=84, lr=[1.6365136277409998e-06, 1.6365136277409998e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5179|ppo_ep: 1|act_loss: -0.024688720703125|cri_loss: -0.0120391845703125|unsuper_loss: 0.0
+average reward score: 6.1796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.67%) |Training time=0.44s (20.67%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5180|ppo_ep: 1|act_loss: 0.00354766845703125|cri_loss: 0.0020904541015625|unsuper_loss: 0.0
+average reward score: 4.97265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.01%) |Training time=0.43s (20.27%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5181|ppo_ep: 1|act_loss: 0.008453369140625|cri_loss: 0.0045013427734375|unsuper_loss: 0.0
+average reward score: 5.0234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.71%) |Training time=0.44s (20.59%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5182|ppo_ep: 1|act_loss: -0.015106201171875|cri_loss: -0.006755828857421875|unsuper_loss: 0.0
+average reward score: 5.6953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.63%) |Training time=0.44s (20.71%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5183|ppo_ep: 1|act_loss: 0.004367828369140625|cri_loss: 0.002498626708984375|unsuper_loss: 0.0
+average reward score: 5.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.82%) |Training time=0.44s (20.51%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5184|ppo_ep: 1|act_loss: -0.016021728515625|cri_loss: -0.00772857666015625|unsuper_loss: 0.0
+average reward score: 5.8359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.69%) |Training time=0.44s (20.58%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5185|ppo_ep: 1|act_loss: 0.039581298828125|cri_loss: 0.0210113525390625|unsuper_loss: 0.0
+average reward score: 5.1953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.73%) |Training time=0.44s (20.60%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5186|ppo_ep: 1|act_loss: 0.04901123046875|cri_loss: 0.0275726318359375|unsuper_loss: 0.0
+average reward score: 4.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.72%) |Training time=0.44s (20.59%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5187|ppo_ep: 1|act_loss: 0.002056121826171875|cri_loss: 0.0012464523315429688|unsuper_loss: 0.0
+average reward score: 5.76171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.83%) |Training time=0.44s (20.49%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5188|ppo_ep: 1|act_loss: 0.0018367767333984375|cri_loss: 0.001033782958984375|unsuper_loss: 0.0
+average reward score: 5.02734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.95%) |Training time=0.44s (20.40%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.48
+[2023-04-14 11:58:36,588] [INFO] [logging.py:96:log_dist] [Rank 0] step=5190, skipped=70, lr=[3.116703882276723e-06, 3.116703882276723e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:58:36,607] [INFO] [timer.py:199:stop] epoch=0/micro_step=5190/global_step=5190, RunningAvgSamplesPerSec=105.23254817548194, CurrSamplesPerSec=114.20268043044129, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:58:36,699] [INFO] [logging.py:96:log_dist] [Rank 0] step=5190, skipped=84, lr=[1.6274874165679296e-06, 1.6274874165679296e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5189|ppo_ep: 1|act_loss: -0.01806640625|cri_loss: -0.008819580078125|unsuper_loss: 0.0
+average reward score: 4.8125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.66%) |Training time=0.44s (20.70%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5190|ppo_ep: 1|act_loss: -0.032012939453125|cri_loss: -0.015045166015625|unsuper_loss: 0.0
+average reward score: 4.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.81%) |Training time=0.44s (20.50%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5191|ppo_ep: 1|act_loss: -0.02008056640625|cri_loss: -0.0090179443359375|unsuper_loss: 0.0
+average reward score: 5.01953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.64%) |Training time=0.44s (20.69%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5192|ppo_ep: 1|act_loss: -0.008148193359375|cri_loss: -0.003566741943359375|unsuper_loss: 0.0
+average reward score: 6.046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.74%) |Training time=0.44s (20.58%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5193|ppo_ep: 1|act_loss: -0.00064849853515625|cri_loss: -0.000164031982421875|unsuper_loss: 0.0
+average reward score: 4.40234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.68%) |Training time=0.44s (18.98%) |Others=0.10 (4.34%)|CurSamplesPerSec=13.89 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5194|ppo_ep: 1|act_loss: -0.017242431640625|cri_loss: -0.0084228515625|unsuper_loss: 0.0
+average reward score: 5.2734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.61%) |Training time=0.44s (20.71%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5195|ppo_ep: 1|act_loss: 0.015167236328125|cri_loss: 0.00780487060546875|unsuper_loss: 0.0
+average reward score: 4.328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.60%) |Training time=0.44s (20.73%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5196|ppo_ep: 1|act_loss: 0.0142059326171875|cri_loss: 0.00827789306640625|unsuper_loss: 0.0
+average reward score: 4.98046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.64%) |Training time=0.44s (20.69%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5197|ppo_ep: 1|act_loss: 0.00450897216796875|cri_loss: 0.0025196075439453125|unsuper_loss: 0.0
+average reward score: 4.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.35%) |Training time=0.45s (21.01%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5198|ppo_ep: 1|act_loss: -0.00450897216796875|cri_loss: -0.00199127197265625|unsuper_loss: 0.0
+average reward score: 5.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.76%) |Training time=0.44s (20.58%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.48
+[2023-04-14 11:58:58,181] [INFO] [logging.py:96:log_dist] [Rank 0] step=5200, skipped=70, lr=[3.0993436322667104e-06, 3.0993436322667104e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:58:58,199] [INFO] [timer.py:199:stop] epoch=0/micro_step=5200/global_step=5200, RunningAvgSamplesPerSec=105.24831529325668, CurrSamplesPerSec=111.86488639962661, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:58:58,292] [INFO] [logging.py:96:log_dist] [Rank 0] step=5200, skipped=84, lr=[1.6184741381361684e-06, 1.6184741381361684e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5199|ppo_ep: 1|act_loss: -0.01104736328125|cri_loss: -0.004955291748046875|unsuper_loss: 0.0
+average reward score: 4.76953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.46%) |Training time=0.45s (20.90%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5200|ppo_ep: 1|act_loss: 0.0209808349609375|cri_loss: 0.0107574462890625|unsuper_loss: 0.0
+average reward score: 4.74609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.99%) |Training time=0.44s (20.34%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5201|ppo_ep: 1|act_loss: 0.00376129150390625|cri_loss: 0.002201080322265625|unsuper_loss: 0.0
+average reward score: 6.21484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.92%) |Training time=0.44s (20.44%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5202|ppo_ep: 1|act_loss: -0.01300048828125|cri_loss: -0.0059051513671875|unsuper_loss: 0.0
+average reward score: 5.15625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.65%) |Training time=0.44s (20.67%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5203|ppo_ep: 1|act_loss: -0.0003268718719482422|cri_loss: -6.699562072753906e-05|unsuper_loss: 0.0
+average reward score: 5.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.53%) |Training time=0.46s (20.87%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5204|ppo_ep: 1|act_loss: 0.033721923828125|cri_loss: 0.01727294921875|unsuper_loss: 0.0
+average reward score: 5.1171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.71s (76.03%) |Training time=0.44s (19.50%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.24 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5205|ppo_ep: 1|act_loss: 0.0024261474609375|cri_loss: 0.0014257431030273438|unsuper_loss: 0.0
+average reward score: 5.38671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.59%) |Training time=0.44s (20.70%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5206|ppo_ep: 1|act_loss: -0.006072998046875|cri_loss: -0.0028514862060546875|unsuper_loss: 0.0
+average reward score: 4.96484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.85%) |Training time=0.46s (21.48%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5207|ppo_ep: 1|act_loss: 0.021270751953125|cri_loss: 0.01160430908203125|unsuper_loss: 0.0
+average reward score: 6.6015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.08%) |Training time=0.46s (21.23%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5208|ppo_ep: 1|act_loss: -0.006290435791015625|cri_loss: -0.0029544830322265625|unsuper_loss: 0.0
+average reward score: 4.88671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.67%) |Training time=0.46s (20.01%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.48
+[2023-04-14 11:59:19,968] [INFO] [logging.py:96:log_dist] [Rank 0] step=5210, skipped=70, lr=[3.0820089606469873e-06, 3.0820089606469873e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:59:19,986] [INFO] [timer.py:199:stop] epoch=0/micro_step=5210/global_step=5210, RunningAvgSamplesPerSec=105.25921683045586, CurrSamplesPerSec=106.84806254005278, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:59:20,078] [INFO] [logging.py:96:log_dist] [Rank 0] step=5210, skipped=84, lr=[1.6094739260442448e-06, 1.6094739260442448e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5209|ppo_ep: 1|act_loss: -0.01486968994140625|cri_loss: -0.007076263427734375|unsuper_loss: 0.0
+average reward score: 4.4296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.82%) |Training time=0.46s (21.55%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5210|ppo_ep: 1|act_loss: -0.00803375244140625|cri_loss: -0.0034542083740234375|unsuper_loss: 0.0
+average reward score: 4.625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.94%) |Training time=0.46s (21.38%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5211|ppo_ep: 1|act_loss: 0.0084381103515625|cri_loss: 0.004375457763671875|unsuper_loss: 0.0
+average reward score: 5.1015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.67%) |Training time=0.47s (21.64%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5212|ppo_ep: 1|act_loss: 0.0023822784423828125|cri_loss: 0.0013141632080078125|unsuper_loss: 0.0
+average reward score: 5.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.41%) |Training time=0.45s (20.92%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5213|ppo_ep: 1|act_loss: 0.011505126953125|cri_loss: 0.00585174560546875|unsuper_loss: 0.0
+average reward score: 5.5703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.61%) |Training time=0.45s (20.73%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5214|ppo_ep: 1|act_loss: 0.05206298828125|cri_loss: 0.0285491943359375|unsuper_loss: 0.0
+average reward score: 4.56640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.65%) |Training time=0.44s (20.68%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5215|ppo_ep: 1|act_loss: -0.00803375244140625|cri_loss: -0.00379180908203125|unsuper_loss: 0.0
+average reward score: 5.59765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.45s (20.80%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5216|ppo_ep: 1|act_loss: -0.02471923828125|cri_loss: -0.0115203857421875|unsuper_loss: 0.0
+average reward score: 4.6796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (20.98%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5217|ppo_ep: 1|act_loss: -0.0132598876953125|cri_loss: -0.006458282470703125|unsuper_loss: 0.0
+average reward score: 5.859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.45s (20.76%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5218|ppo_ep: 1|act_loss: 0.0123748779296875|cri_loss: 0.00687408447265625|unsuper_loss: 0.0
+average reward score: 5.87109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.45%) |Training time=0.42s (19.84%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.48
+[2023-04-14 11:59:41,416] [INFO] [logging.py:96:log_dist] [Rank 0] step=5220, skipped=70, lr=[3.0647001243592035e-06, 3.0647001243592035e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 11:59:41,434] [INFO] [timer.py:199:stop] epoch=0/micro_step=5220/global_step=5220, RunningAvgSamplesPerSec=105.27236962780096, CurrSamplesPerSec=115.6615648442179, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 11:59:41,527] [INFO] [logging.py:96:log_dist] [Rank 0] step=5220, skipped=84, lr=[1.6004869136970152e-06, 1.6004869136970152e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5219|ppo_ep: 1|act_loss: -0.003662109375|cri_loss: -0.0016155242919921875|unsuper_loss: 0.0
+average reward score: 5.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.81%) |Training time=0.44s (20.53%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5220|ppo_ep: 1|act_loss: -0.03924560546875|cri_loss: -0.01666259765625|unsuper_loss: 0.0
+average reward score: 4.4921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.43%) |Training time=0.45s (20.89%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5221|ppo_ep: 1|act_loss: -0.0004165172576904297|cri_loss: -7.677078247070312e-05|unsuper_loss: 0.0
+average reward score: 4.9765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.15%) |Training time=0.45s (21.02%) |Others=0.10 (4.83%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5222|ppo_ep: 1|act_loss: -0.00646209716796875|cri_loss: -0.002994537353515625|unsuper_loss: 0.0
+average reward score: 5.828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.34%) |Training time=0.45s (20.99%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5223|ppo_ep: 1|act_loss: 0.0120849609375|cri_loss: 0.006378173828125|unsuper_loss: 0.0
+average reward score: 4.95703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.76s (76.22%) |Training time=0.45s (19.42%) |Others=0.10 (4.36%)|CurSamplesPerSec=13.86 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5224|ppo_ep: 1|act_loss: 0.036102294921875|cri_loss: 0.0192718505859375|unsuper_loss: 0.0
+average reward score: 5.59375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.34%) |Training time=0.45s (21.00%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5225|ppo_ep: 1|act_loss: -0.00815582275390625|cri_loss: -0.0037250518798828125|unsuper_loss: 0.0
+average reward score: 5.4453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.51%) |Training time=0.45s (20.87%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5226|ppo_ep: 1|act_loss: -0.00539398193359375|cri_loss: -0.002559661865234375|unsuper_loss: 0.0
+average reward score: 4.5859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.65%) |Training time=0.44s (20.66%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5227|ppo_ep: 1|act_loss: 0.01152801513671875|cri_loss: 0.006114959716796875|unsuper_loss: 0.0
+average reward score: 5.23046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.39%) |Training time=0.45s (20.95%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5228|ppo_ep: 1|act_loss: -0.00246429443359375|cri_loss: -0.0006666183471679688|unsuper_loss: 0.0
+average reward score: 5.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.34%) |Training time=0.45s (21.01%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.48
+[2023-04-14 12:00:03,045] [INFO] [logging.py:96:log_dist] [Rank 0] step=5230, skipped=70, lr=[3.04741737996207e-06, 3.04741737996207e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:00:03,063] [INFO] [timer.py:199:stop] epoch=0/micro_step=5230/global_step=5230, RunningAvgSamplesPerSec=105.28411092329453, CurrSamplesPerSec=113.17489367012611, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:00:03,156] [INFO] [logging.py:96:log_dist] [Rank 0] step=5230, skipped=84, lr=[1.591513234303681e-06, 1.591513234303681e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5229|ppo_ep: 1|act_loss: 0.0034198760986328125|cri_loss: 0.0023326873779296875|unsuper_loss: 0.0
+average reward score: 5.30078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.55%) |Training time=0.45s (20.78%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5230|ppo_ep: 1|act_loss: 0.002960205078125|cri_loss: 0.001796722412109375|unsuper_loss: 0.0
+average reward score: 5.58203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.62%) |Training time=0.47s (21.72%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5231|ppo_ep: 1|act_loss: 0.0132293701171875|cri_loss: 0.00711822509765625|unsuper_loss: 0.0
+average reward score: 4.48828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.59%) |Training time=0.47s (21.74%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5232|ppo_ep: 1|act_loss: -0.0017986297607421875|cri_loss: -0.0006337165832519531|unsuper_loss: 0.0
+average reward score: 5.44921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.91%) |Training time=0.46s (21.44%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5233|ppo_ep: 1|act_loss: 0.0102081298828125|cri_loss: 0.00537109375|unsuper_loss: 0.0
+average reward score: 5.6015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.53%) |Training time=0.45s (20.69%) |Others=0.10 (4.78%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5234|ppo_ep: 1|act_loss: -0.009002685546875|cri_loss: -0.004108428955078125|unsuper_loss: 0.0
+average reward score: 5.44921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.69s (75.68%) |Training time=0.44s (19.84%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.31 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5235|ppo_ep: 1|act_loss: -0.029693603515625|cri_loss: -0.014312744140625|unsuper_loss: 0.0
+average reward score: 5.37109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.74%) |Training time=0.46s (21.58%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5236|ppo_ep: 1|act_loss: -0.00720977783203125|cri_loss: -0.0032634735107421875|unsuper_loss: 0.0
+average reward score: 5.20703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.52%) |Training time=0.44s (20.78%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5237|ppo_ep: 1|act_loss: -0.004589080810546875|cri_loss: -0.0019083023071289062|unsuper_loss: 0.0
+average reward score: 5.37109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.62%) |Training time=0.44s (20.72%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5238|ppo_ep: 1|act_loss: 0.00301361083984375|cri_loss: 0.00196075439453125|unsuper_loss: 0.0
+average reward score: 4.9375
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.70%) |Training time=0.44s (19.01%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.48
+[2023-04-14 12:00:24,816] [INFO] [logging.py:96:log_dist] [Rank 0] step=5240, skipped=70, lr=[3.0301609836275488e-06, 3.0301609836275488e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:00:24,834] [INFO] [timer.py:199:stop] epoch=0/micro_step=5240/global_step=5240, RunningAvgSamplesPerSec=105.29263773127231, CurrSamplesPerSec=111.4146109844281, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:00:24,926] [INFO] [logging.py:96:log_dist] [Rank 0] step=5240, skipped=84, lr=[1.58255302087582e-06, 1.58255302087582e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5239|ppo_ep: 1|act_loss: 0.002498626708984375|cri_loss: 0.0015401840209960938|unsuper_loss: 0.0
+average reward score: 4.515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.36%) |Training time=0.45s (20.98%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5240|ppo_ep: 1|act_loss: 0.0056915283203125|cri_loss: 0.0032215118408203125|unsuper_loss: 0.0
+average reward score: 5.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.39%) |Training time=0.45s (20.94%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5241|ppo_ep: 1|act_loss: 0.02130126953125|cri_loss: 0.01110076904296875|unsuper_loss: 0.0
+average reward score: 4.5234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.42%) |Training time=0.45s (20.89%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5242|ppo_ep: 1|act_loss: 0.1400146484375|cri_loss: 0.08123779296875|unsuper_loss: 0.0
+average reward score: 5.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.40%) |Training time=0.45s (20.93%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.48
+epoch: 0|step: 5243|ppo_ep: 1|act_loss: 0.0140228271484375|cri_loss: 0.00774383544921875|unsuper_loss: 0.0
+average reward score: 5.0859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.50%) |Training time=0.45s (20.86%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5244|ppo_ep: 1|act_loss: 0.0035495758056640625|cri_loss: 0.00228118896484375|unsuper_loss: 0.0
+average reward score: 5.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.48%) |Training time=0.45s (20.84%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5245|ppo_ep: 1|act_loss: 0.01265716552734375|cri_loss: 0.006671905517578125|unsuper_loss: 0.0
+average reward score: 4.2734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.61%) |Training time=0.44s (20.70%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5246|ppo_ep: 1|act_loss: -0.0294189453125|cri_loss: -0.01203155517578125|unsuper_loss: 0.0
+average reward score: 4.44140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.60%) |Training time=0.44s (20.74%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5247|ppo_ep: 1|act_loss: -0.04681396484375|cri_loss: -0.0164337158203125|unsuper_loss: 0.0
+average reward score: 5.25390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.59%) |Training time=0.44s (20.71%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5248|ppo_ep: 1|act_loss: -0.00452423095703125|cri_loss: -0.0018215179443359375|unsuper_loss: 0.0
+average reward score: 4.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.49%) |Training time=0.45s (20.84%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.49
+[2023-04-14 12:00:46,261] [INFO] [logging.py:96:log_dist] [Rank 0] step=5250, skipped=70, lr=[3.012931191137065e-06, 3.012931191137065e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:00:46,279] [INFO] [timer.py:199:stop] epoch=0/micro_step=5250/global_step=5250, RunningAvgSamplesPerSec=105.3044718220239, CurrSamplesPerSec=103.76530800000619, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:00:46,372] [INFO] [logging.py:96:log_dist] [Rank 0] step=5250, skipped=84, lr=[1.5736064062254094e-06, 1.5736064062254094e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5249|ppo_ep: 1|act_loss: -0.00366973876953125|cri_loss: -0.0006198883056640625|unsuper_loss: 0.0
+average reward score: 4.3671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.70%) |Training time=0.47s (21.73%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5250|ppo_ep: 1|act_loss: 0.0237884521484375|cri_loss: 0.0128631591796875|unsuper_loss: 0.0
+average reward score: 4.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.69%) |Training time=0.44s (20.61%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5251|ppo_ep: 1|act_loss: -0.011444091796875|cri_loss: -0.005619049072265625|unsuper_loss: 0.0
+average reward score: 4.7109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.61%) |Training time=0.44s (20.71%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5252|ppo_ep: 1|act_loss: -0.0035037994384765625|cri_loss: -0.0016393661499023438|unsuper_loss: 0.0
+average reward score: 4.2109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.52%) |Training time=0.45s (20.83%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5253|ppo_ep: 1|act_loss: -0.0030460357666015625|cri_loss: -0.0012302398681640625|unsuper_loss: 0.0
+average reward score: 5.62109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.53%) |Training time=0.44s (19.12%) |Others=0.10 (4.35%)|CurSamplesPerSec=13.80 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5254|ppo_ep: 1|act_loss: 0.006267547607421875|cri_loss: 0.0032138824462890625|unsuper_loss: 0.0
+average reward score: 5.19921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.44%) |Training time=0.45s (20.88%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5255|ppo_ep: 1|act_loss: 0.012847900390625|cri_loss: 0.006938934326171875|unsuper_loss: 0.0
+average reward score: 5.234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.09%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5256|ppo_ep: 1|act_loss: -0.0014257431030273438|cri_loss: -0.0003261566162109375|unsuper_loss: 0.0
+average reward score: 5.234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.45s (20.79%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5257|ppo_ep: 1|act_loss: 0.0113677978515625|cri_loss: 0.005828857421875|unsuper_loss: 0.0
+average reward score: 5.19921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.49%) |Training time=0.45s (20.83%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5258|ppo_ep: 1|act_loss: -0.03814697265625|cri_loss: -0.0180816650390625|unsuper_loss: 0.0
+average reward score: 4.875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.84%) |Training time=0.44s (20.47%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49
+[2023-04-14 12:01:07,849] [INFO] [logging.py:96:log_dist] [Rank 0] step=5260, skipped=70, lr=[2.9957282578777047e-06, 2.9957282578777047e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:01:07,867] [INFO] [timer.py:199:stop] epoch=0/micro_step=5260/global_step=5260, RunningAvgSamplesPerSec=105.31875428619172, CurrSamplesPerSec=111.46577904454388, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:01:07,960] [INFO] [logging.py:96:log_dist] [Rank 0] step=5260, skipped=84, lr=[1.5646735229628619e-06, 1.5646735229628619e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5259|ppo_ep: 1|act_loss: 0.012237548828125|cri_loss: 0.00710296630859375|unsuper_loss: 0.0
+average reward score: 4.65625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.40%) |Training time=0.45s (20.96%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5260|ppo_ep: 1|act_loss: -0.004589080810546875|cri_loss: -0.0017518997192382812|unsuper_loss: 0.0
+average reward score: 4.87890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.58%) |Training time=0.44s (20.75%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5261|ppo_ep: 1|act_loss: -0.028533935546875|cri_loss: -0.0138702392578125|unsuper_loss: 0.0
+average reward score: 4.8359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.50%) |Training time=0.45s (20.83%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5262|ppo_ep: 1|act_loss: -0.03631591796875|cri_loss: -0.017333984375|unsuper_loss: 0.0
+average reward score: 4.24609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.52%) |Training time=0.45s (20.83%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5263|ppo_ep: 1|act_loss: -0.007282257080078125|cri_loss: -0.003139495849609375|unsuper_loss: 0.0
+average reward score: 5.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.21%) |Training time=0.49s (22.26%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.44 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5264|ppo_ep: 1|act_loss: -0.0225830078125|cri_loss: -0.0083465576171875|unsuper_loss: 0.0
+average reward score: 5.3125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.81%) |Training time=0.44s (20.50%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5265|ppo_ep: 1|act_loss: -0.0156707763671875|cri_loss: -0.00771331787109375|unsuper_loss: 0.0
+average reward score: 5.08203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.73%) |Training time=0.44s (20.57%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5266|ppo_ep: 1|act_loss: 0.018646240234375|cri_loss: 0.01043701171875|unsuper_loss: 0.0
+average reward score: 4.8046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.37%) |Training time=0.45s (20.97%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5267|ppo_ep: 1|act_loss: -0.0035762786865234375|cri_loss: -0.0013942718505859375|unsuper_loss: 0.0
+average reward score: 5.6328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.50%) |Training time=0.45s (20.80%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5268|ppo_ep: 1|act_loss: 0.0124969482421875|cri_loss: 0.00662994384765625|unsuper_loss: 0.0
+average reward score: 4.65234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.14%) |Training time=0.48s (22.14%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.49
+[2023-04-14 12:01:29,408] [INFO] [logging.py:96:log_dist] [Rank 0] step=5270, skipped=70, lr=[2.978552438838442e-06, 2.978552438838442e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:01:29,426] [INFO] [timer.py:199:stop] epoch=0/micro_step=5270/global_step=5270, RunningAvgSamplesPerSec=105.33081301411826, CurrSamplesPerSec=112.13020758057145, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:01:29,519] [INFO] [logging.py:96:log_dist] [Rank 0] step=5270, skipped=84, lr=[1.5557545034950558e-06, 1.5557545034950558e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5269|ppo_ep: 1|act_loss: 0.025146484375|cri_loss: 0.01287841796875|unsuper_loss: 0.0
+average reward score: 4.71875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.46%) |Training time=0.45s (20.89%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5270|ppo_ep: 1|act_loss: 0.0148773193359375|cri_loss: 0.007671356201171875|unsuper_loss: 0.0
+average reward score: 5.5859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.57%) |Training time=0.44s (20.75%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.49
+[2023-04-14 12:01:33,792] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 5271|ppo_ep: 1|act_loss: 0.027099609375|cri_loss: 0.013824462890625|unsuper_loss: 0.0
+average reward score: 5.21484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.05%) |Training time=0.44s (20.73%) |Others=0.09 (4.22%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.49
+[2023-04-14 12:01:35,924] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 5272|ppo_ep: 1|act_loss: 0.023406982421875|cri_loss: 0.0120086669921875|unsuper_loss: 0.0
+average reward score: 4.51953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.87%) |Training time=0.45s (20.94%) |Others=0.09 (4.19%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5273|ppo_ep: 1|act_loss: 0.009521484375|cri_loss: 0.005828857421875|unsuper_loss: 0.0
+average reward score: 4.5546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.51%) |Training time=0.45s (20.81%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5274|ppo_ep: 1|act_loss: 0.0012540817260742188|cri_loss: 0.0006866455078125|unsuper_loss: 0.0
+average reward score: 5.36328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.61%) |Training time=0.45s (20.72%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5275|ppo_ep: 1|act_loss: 0.0109710693359375|cri_loss: 0.005619049072265625|unsuper_loss: 0.0
+average reward score: 5.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.55%) |Training time=0.44s (20.79%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5276|ppo_ep: 1|act_loss: -0.01338958740234375|cri_loss: -0.006500244140625|unsuper_loss: 0.0
+average reward score: 4.58203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (20.92%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5277|ppo_ep: 1|act_loss: -0.017333984375|cri_loss: -0.008331298828125|unsuper_loss: 0.0
+average reward score: 5.984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.64%) |Training time=0.44s (20.66%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5278|ppo_ep: 1|act_loss: -0.021484375|cri_loss: -0.00748443603515625|unsuper_loss: 0.0
+average reward score: 5.3671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.53%) |Training time=0.44s (20.81%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49
+[2023-04-14 12:01:50,806] [INFO] [logging.py:96:log_dist] [Rank 0] step=5280, skipped=70, lr=[2.9614039886063483e-06, 2.9614039886063483e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:01:50,824] [INFO] [timer.py:199:stop] epoch=0/micro_step=5280/global_step=5280, RunningAvgSamplesPerSec=105.34468671241142, CurrSamplesPerSec=112.25146359250908, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:01:50,917] [INFO] [logging.py:96:log_dist] [Rank 0] step=5280, skipped=86, lr=[1.5486293586993107e-06, 1.5486293586993107e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5279|ppo_ep: 1|act_loss: -0.0190277099609375|cri_loss: -0.0081939697265625|unsuper_loss: 0.0
+average reward score: 5.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.48%) |Training time=0.45s (20.88%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5280|ppo_ep: 1|act_loss: -0.025604248046875|cri_loss: -0.01247406005859375|unsuper_loss: 0.0
+average reward score: 5.6875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.99%) |Training time=0.46s (21.38%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5281|ppo_ep: 1|act_loss: -0.0189208984375|cri_loss: -0.0078582763671875|unsuper_loss: 0.0
+average reward score: 5.2109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.22%) |Training time=0.46s (21.17%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5282|ppo_ep: 1|act_loss: 0.01389312744140625|cri_loss: 0.00717926025390625|unsuper_loss: 0.0
+average reward score: 4.76171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.47%) |Training time=0.45s (20.88%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5283|ppo_ep: 1|act_loss: 0.03839111328125|cri_loss: 0.02044677734375|unsuper_loss: 0.0
+average reward score: 4.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.64s (72.98%) |Training time=0.50s (22.23%) |Others=0.11 (4.79%)|CurSamplesPerSec=14.24 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5284|ppo_ep: 1|act_loss: 0.01202392578125|cri_loss: 0.0062408447265625|unsuper_loss: 0.0
+average reward score: 5.13671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.95%) |Training time=0.46s (21.40%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5285|ppo_ep: 1|act_loss: 0.0244903564453125|cri_loss: 0.01262664794921875|unsuper_loss: 0.0
+average reward score: 4.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.11%) |Training time=0.46s (21.25%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5286|ppo_ep: 1|act_loss: 0.04376220703125|cri_loss: 0.0239715576171875|unsuper_loss: 0.0
+average reward score: 5.5859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.98%) |Training time=0.46s (21.35%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5287|ppo_ep: 1|act_loss: -0.0039215087890625|cri_loss: -0.0018444061279296875|unsuper_loss: 0.0
+average reward score: 4.80078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (20.96%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5288|ppo_ep: 1|act_loss: -0.0125885009765625|cri_loss: -0.004970550537109375|unsuper_loss: 0.0
+average reward score: 5.9296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.32%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.49
+[2023-04-14 12:02:12,452] [INFO] [logging.py:96:log_dist] [Rank 0] step=5290, skipped=70, lr=[2.9442831613628225e-06, 2.9442831613628225e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:02:12,471] [INFO] [timer.py:199:stop] epoch=0/micro_step=5290/global_step=5290, RunningAvgSamplesPerSec=105.35216406875476, CurrSamplesPerSec=109.4159719697328, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:02:12,563] [INFO] [logging.py:96:log_dist] [Rank 0] step=5290, skipped=86, lr=[1.5397356270701858e-06, 1.5397356270701858e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5289|ppo_ep: 1|act_loss: 0.0186767578125|cri_loss: 0.009857177734375|unsuper_loss: 0.0
+average reward score: 5.7578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.18%) |Training time=0.46s (21.19%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5290|ppo_ep: 1|act_loss: -0.0070648193359375|cri_loss: -0.00283050537109375|unsuper_loss: 0.0
+average reward score: 4.8828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.30%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5291|ppo_ep: 1|act_loss: 0.008026123046875|cri_loss: 0.00466156005859375|unsuper_loss: 0.0
+average reward score: 4.828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.33%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5292|ppo_ep: 1|act_loss: 0.007476806640625|cri_loss: 0.00417327880859375|unsuper_loss: 0.0
+average reward score: 4.8515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.34%) |Training time=0.45s (21.00%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5293|ppo_ep: 1|act_loss: 0.0211029052734375|cri_loss: 0.0108184814453125|unsuper_loss: 0.0
+average reward score: 4.484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.49%) |Training time=0.46s (20.11%) |Others=0.10 (4.40%)|CurSamplesPerSec=14.06 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5294|ppo_ep: 1|act_loss: 0.002414703369140625|cri_loss: 0.001720428466796875|unsuper_loss: 0.0
+average reward score: 5.109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.36%) |Training time=0.45s (20.99%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5295|ppo_ep: 1|act_loss: 0.03570556640625|cri_loss: 0.0189056396484375|unsuper_loss: 0.0
+average reward score: 5.8125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.67%) |Training time=0.45s (20.66%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5296|ppo_ep: 1|act_loss: 0.05523681640625|cri_loss: 0.029052734375|unsuper_loss: 0.0
+average reward score: 4.55078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.60%) |Training time=0.45s (20.72%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5297|ppo_ep: 1|act_loss: 0.0104522705078125|cri_loss: 0.00646209716796875|unsuper_loss: 0.0
+average reward score: 4.38671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.45s (20.79%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5298|ppo_ep: 1|act_loss: -0.01441192626953125|cri_loss: -0.0060272216796875|unsuper_loss: 0.0
+average reward score: 4.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.59s (70.73%) |Training time=0.48s (21.46%) |Others=0.18 (7.80%)|CurSamplesPerSec=14.20 |AvgSamplesPerSec=14.49
+[2023-04-14 12:02:34,208] [INFO] [logging.py:96:log_dist] [Rank 0] step=5300, skipped=70, lr=[2.927190210879829e-06, 2.927190210879829e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:02:34,226] [INFO] [timer.py:199:stop] epoch=0/micro_step=5300/global_step=5300, RunningAvgSamplesPerSec=105.36035663269071, CurrSamplesPerSec=112.99862095906488, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:02:34,319] [INFO] [logging.py:96:log_dist] [Rank 0] step=5300, skipped=86, lr=[1.5308561288755794e-06, 1.5308561288755794e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5299|ppo_ep: 1|act_loss: -0.004306793212890625|cri_loss: -0.001880645751953125|unsuper_loss: 0.0
+average reward score: 4.703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.52%) |Training time=0.45s (20.77%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5300|ppo_ep: 1|act_loss: -0.01861572265625|cri_loss: -0.008331298828125|unsuper_loss: 0.0
+average reward score: 5.01171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.45s (21.15%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5301|ppo_ep: 1|act_loss: 0.006916046142578125|cri_loss: 0.0038356781005859375|unsuper_loss: 0.0
+average reward score: 4.9296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.45s (21.13%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5302|ppo_ep: 1|act_loss: -0.03961181640625|cri_loss: -0.018798828125|unsuper_loss: 0.0
+average reward score: 4.8984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.45s (21.06%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5303|ppo_ep: 1|act_loss: 0.013214111328125|cri_loss: 0.008453369140625|unsuper_loss: 0.0
+average reward score: 4.42578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.05%) |Training time=0.46s (21.31%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5304|ppo_ep: 1|act_loss: -0.0126495361328125|cri_loss: -0.0062103271484375|unsuper_loss: 0.0
+average reward score: 4.890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.09%) |Training time=0.46s (21.25%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5305|ppo_ep: 1|act_loss: 0.01092529296875|cri_loss: 0.005596160888671875|unsuper_loss: 0.0
+average reward score: 6.02734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.23%) |Training time=0.45s (21.11%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5306|ppo_ep: 1|act_loss: -0.0031070709228515625|cri_loss: -0.000335693359375|unsuper_loss: 0.0
+average reward score: 4.3125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.00%) |Training time=0.46s (21.37%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5307|ppo_ep: 1|act_loss: -0.013458251953125|cri_loss: -0.00453948974609375|unsuper_loss: 0.0
+average reward score: 5.23828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.05%) |Training time=0.46s (21.27%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5308|ppo_ep: 1|act_loss: 0.03436279296875|cri_loss: 0.0176849365234375|unsuper_loss: 0.0
+average reward score: 5.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.10%) |Training time=0.46s (21.26%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.49
+[2023-04-14 12:02:55,722] [INFO] [logging.py:96:log_dist] [Rank 0] step=5310, skipped=70, lr=[2.910125390516126e-06, 2.910125390516126e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:02:55,740] [INFO] [timer.py:199:stop] epoch=0/micro_step=5310/global_step=5310, RunningAvgSamplesPerSec=105.3658343095268, CurrSamplesPerSec=102.50064569932177, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:02:55,833] [INFO] [logging.py:96:log_dist] [Rank 0] step=5310, skipped=86, lr=[1.521990995731075e-06, 1.521990995731075e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5309|ppo_ep: 1|act_loss: 0.00766754150390625|cri_loss: 0.00418853759765625|unsuper_loss: 0.0
+average reward score: 5.671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.90%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5310|ppo_ep: 1|act_loss: 0.01290130615234375|cri_loss: 0.007411956787109375|unsuper_loss: 0.0
+average reward score: 5.0234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.11%) |Training time=0.46s (21.28%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5311|ppo_ep: 1|act_loss: 0.0115509033203125|cri_loss: 0.00615692138671875|unsuper_loss: 0.0
+average reward score: 5.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.70%) |Training time=0.44s (20.63%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5312|ppo_ep: 1|act_loss: 0.018096923828125|cri_loss: 0.01111602783203125|unsuper_loss: 0.0
+average reward score: 4.6015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.32%) |Training time=0.45s (21.01%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5313|ppo_ep: 1|act_loss: -0.019775390625|cri_loss: -0.00923919677734375|unsuper_loss: 0.0
+average reward score: 4.77734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.16%) |Training time=0.45s (21.11%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5314|ppo_ep: 1|act_loss: 0.0172271728515625|cri_loss: 0.0090484619140625|unsuper_loss: 0.0
+average reward score: 5.8359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.29%) |Training time=0.45s (21.04%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5315|ppo_ep: 1|act_loss: -0.00021028518676757812|cri_loss: 0.0001347064971923828|unsuper_loss: 0.0
+average reward score: 5.19140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.67%) |Training time=0.47s (20.04%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5316|ppo_ep: 1|act_loss: -0.00426483154296875|cri_loss: -0.000179290771484375|unsuper_loss: 0.0
+average reward score: 5.34375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.09%) |Training time=0.46s (21.28%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5317|ppo_ep: 1|act_loss: -0.0338134765625|cri_loss: -0.0160064697265625|unsuper_loss: 0.0
+average reward score: 5.75
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.47s (21.99%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5318|ppo_ep: 1|act_loss: -0.045074462890625|cri_loss: -0.022003173828125|unsuper_loss: 0.0
+average reward score: 4.75390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.87%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.49
+[2023-04-14 12:03:17,417] [INFO] [logging.py:96:log_dist] [Rank 0] step=5320, skipped=70, lr=[2.89308895321352e-06, 2.89308895321352e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:03:17,435] [INFO] [timer.py:199:stop] epoch=0/micro_step=5320/global_step=5320, RunningAvgSamplesPerSec=105.3698549311855, CurrSamplesPerSec=103.94926211193594, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:03:17,527] [INFO] [logging.py:96:log_dist] [Rank 0] step=5320, skipped=86, lr=[1.5131403590393323e-06, 1.5131403590393323e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5319|ppo_ep: 1|act_loss: 0.01412200927734375|cri_loss: 0.007457733154296875|unsuper_loss: 0.0
+average reward score: 5.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.48%) |Training time=0.47s (21.90%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5320|ppo_ep: 1|act_loss: 0.02178955078125|cri_loss: 0.01132965087890625|unsuper_loss: 0.0
+average reward score: 5.10546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.15%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5321|ppo_ep: 1|act_loss: 0.0303497314453125|cri_loss: 0.0167083740234375|unsuper_loss: 0.0
+average reward score: 5.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.42%) |Training time=0.47s (21.92%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5322|ppo_ep: 1|act_loss: -0.023895263671875|cri_loss: -0.0081329345703125|unsuper_loss: 0.0
+average reward score: 4.63671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.54%) |Training time=0.50s (22.91%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5323|ppo_ep: 1|act_loss: -0.0027008056640625|cri_loss: -0.001178741455078125|unsuper_loss: 0.0
+average reward score: 5.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.68s (73.67%) |Training time=0.50s (21.88%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.02 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5324|ppo_ep: 1|act_loss: 0.0748291015625|cri_loss: 0.04034423828125|unsuper_loss: 0.0
+average reward score: 5.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.33%) |Training time=0.47s (21.99%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5325|ppo_ep: 1|act_loss: -0.0117340087890625|cri_loss: -0.00556182861328125|unsuper_loss: 0.0
+average reward score: 5.19921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.41%) |Training time=0.45s (20.92%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5326|ppo_ep: 1|act_loss: 0.0284271240234375|cri_loss: 0.0148162841796875|unsuper_loss: 0.0
+average reward score: 4.53515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.49%) |Training time=0.45s (20.80%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5327|ppo_ep: 1|act_loss: 0.02947998046875|cri_loss: 0.01580810546875|unsuper_loss: 0.0
+average reward score: 5.3125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.47s (21.84%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5328|ppo_ep: 1|act_loss: 0.0015621185302734375|cri_loss: 0.0009093284606933594|unsuper_loss: 0.0
+average reward score: 4.6015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.74%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.49
+[2023-04-14 12:03:39,193] [INFO] [logging.py:96:log_dist] [Rank 0] step=5330, skipped=70, lr=[2.8760811514931076e-06, 2.8760811514931076e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:03:39,211] [INFO] [timer.py:199:stop] epoch=0/micro_step=5330/global_step=5330, RunningAvgSamplesPerSec=105.36426483738646, CurrSamplesPerSec=100.47936851593158, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:03:39,303] [INFO] [logging.py:96:log_dist] [Rank 0] step=5330, skipped=86, lr=[1.5043043499881378e-06, 1.5043043499881378e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5329|ppo_ep: 1|act_loss: 0.021636962890625|cri_loss: 0.0137786865234375|unsuper_loss: 0.0
+average reward score: 5.13671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.29%) |Training time=0.48s (22.12%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5330|ppo_ep: 1|act_loss: 0.02362060546875|cri_loss: 0.013427734375|unsuper_loss: 0.0
+average reward score: 5.1953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.20%) |Training time=0.48s (20.50%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.65 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5331|ppo_ep: 1|act_loss: 0.01336669921875|cri_loss: 0.00736236572265625|unsuper_loss: 0.0
+average reward score: 5.44921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.37%) |Training time=0.45s (20.97%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5332|ppo_ep: 1|act_loss: 0.004276275634765625|cri_loss: 0.0023345947265625|unsuper_loss: 0.0
+average reward score: 5.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.40%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5333|ppo_ep: 1|act_loss: -0.00860595703125|cri_loss: -0.003955841064453125|unsuper_loss: 0.0
+average reward score: 4.453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.86%) |Training time=0.46s (21.50%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5334|ppo_ep: 1|act_loss: -0.0253448486328125|cri_loss: -0.0120697021484375|unsuper_loss: 0.0
+average reward score: 4.625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.16%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5335|ppo_ep: 1|act_loss: -0.0122528076171875|cri_loss: -0.005382537841796875|unsuper_loss: 0.0
+average reward score: 6.08984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.22%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5336|ppo_ep: 1|act_loss: -0.0132598876953125|cri_loss: -0.005645751953125|unsuper_loss: 0.0
+average reward score: 4.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.55%) |Training time=0.46s (21.13%) |Others=0.12 (5.32%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5337|ppo_ep: 1|act_loss: 0.0557861328125|cri_loss: 0.02984619140625|unsuper_loss: 0.0
+average reward score: 4.90234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.13%) |Training time=0.46s (21.22%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5338|ppo_ep: 1|act_loss: 0.036956787109375|cri_loss: 0.01953125|unsuper_loss: 0.0
+average reward score: 4.61328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.21%) |Training time=0.46s (21.16%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.49
+[2023-04-14 12:04:00,950] [INFO] [logging.py:96:log_dist] [Rank 0] step=5340, skipped=70, lr=[2.8591022374515417e-06, 2.8591022374515417e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:04:00,968] [INFO] [timer.py:199:stop] epoch=0/micro_step=5340/global_step=5340, RunningAvgSamplesPerSec=105.36797444530119, CurrSamplesPerSec=107.08180623625253, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:04:01,061] [INFO] [logging.py:96:log_dist] [Rank 0] step=5340, skipped=86, lr=[1.495483099548462e-06, 1.495483099548462e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5339|ppo_ep: 1|act_loss: 0.0024166107177734375|cri_loss: 0.0014972686767578125|unsuper_loss: 0.0
+average reward score: 5.10546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.99%) |Training time=0.46s (21.39%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5340|ppo_ep: 1|act_loss: 0.0214385986328125|cri_loss: 0.013397216796875|unsuper_loss: 0.0
+average reward score: 4.5234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.30%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5341|ppo_ep: 1|act_loss: 0.0792236328125|cri_loss: 0.044647216796875|unsuper_loss: 0.0
+average reward score: 4.71484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.24%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5342|ppo_ep: 1|act_loss: -0.01378631591796875|cri_loss: -0.006000518798828125|unsuper_loss: 0.0
+average reward score: 4.6953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.06%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5343|ppo_ep: 1|act_loss: 0.0090484619140625|cri_loss: 0.005382537841796875|unsuper_loss: 0.0
+average reward score: 4.8984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.87%) |Training time=0.44s (20.43%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5344|ppo_ep: 1|act_loss: -0.015045166015625|cri_loss: -0.00681304931640625|unsuper_loss: 0.0
+average reward score: 5.390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.14%) |Training time=0.49s (22.28%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5345|ppo_ep: 1|act_loss: -0.0557861328125|cri_loss: -0.0258026123046875|unsuper_loss: 0.0
+average reward score: 5.00390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.78%) |Training time=0.44s (20.50%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5346|ppo_ep: 1|act_loss: -0.0288238525390625|cri_loss: -0.01389312744140625|unsuper_loss: 0.0
+average reward score: 4.21484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.86%) |Training time=0.44s (20.44%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5347|ppo_ep: 1|act_loss: -0.026397705078125|cri_loss: -0.01287841796875|unsuper_loss: 0.0
+average reward score: 5.21875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.83%) |Training time=0.44s (20.45%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5348|ppo_ep: 1|act_loss: -0.0306396484375|cri_loss: -0.0144195556640625|unsuper_loss: 0.0
+average reward score: 5.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.61%) |Training time=0.44s (20.63%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.49
+[2023-04-14 12:04:22,444] [INFO] [logging.py:96:log_dist] [Rank 0] step=5350, skipped=70, lr=[2.8421524627572866e-06, 2.8421524627572866e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:04:22,463] [INFO] [timer.py:199:stop] epoch=0/micro_step=5350/global_step=5350, RunningAvgSamplesPerSec=105.38047175709768, CurrSamplesPerSec=112.89862840396556, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:04:22,555] [INFO] [logging.py:96:log_dist] [Rank 0] step=5350, skipped=86, lr=[1.486676738472515e-06, 1.486676738472515e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5349|ppo_ep: 1|act_loss: 0.0217742919921875|cri_loss: 0.01186370849609375|unsuper_loss: 0.0
+average reward score: 4.67578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.53%) |Training time=0.45s (20.81%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5350|ppo_ep: 1|act_loss: 0.1251220703125|cri_loss: 0.07086181640625|unsuper_loss: 0.0
+average reward score: 4.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.77%) |Training time=0.44s (20.55%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5351|ppo_ep: 1|act_loss: 0.006988525390625|cri_loss: 0.003627777099609375|unsuper_loss: 0.0
+average reward score: 6.07421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.70%) |Training time=0.44s (20.65%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5352|ppo_ep: 1|act_loss: 0.0302734375|cri_loss: 0.0184783935546875|unsuper_loss: 0.0
+average reward score: 4.58984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.89%) |Training time=0.45s (20.49%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5353|ppo_ep: 1|act_loss: 0.1055908203125|cri_loss: 0.05938720703125|unsuper_loss: 0.0
+average reward score: 4.58203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.76%) |Training time=0.44s (19.74%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.28 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5354|ppo_ep: 1|act_loss: -0.09637451171875|cri_loss: -0.041259765625|unsuper_loss: 0.0
+average reward score: 4.77734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.59%) |Training time=0.45s (20.76%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5355|ppo_ep: 1|act_loss: -0.01036834716796875|cri_loss: -0.00457000732421875|unsuper_loss: 0.0
+average reward score: 5.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.89%) |Training time=0.44s (20.39%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5356|ppo_ep: 1|act_loss: -0.0211181640625|cri_loss: -0.0093994140625|unsuper_loss: 0.0
+average reward score: 4.48046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.31%) |Training time=0.45s (21.05%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5357|ppo_ep: 1|act_loss: 0.004425048828125|cri_loss: 0.0028228759765625|unsuper_loss: 0.0
+average reward score: 4.546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.54%) |Training time=0.44s (20.79%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5358|ppo_ep: 1|act_loss: 0.0533447265625|cri_loss: 0.0287322998046875|unsuper_loss: 0.0
+average reward score: 5.3515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.56%) |Training time=0.44s (20.71%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.49
+[2023-04-14 12:04:44,014] [INFO] [logging.py:96:log_dist] [Rank 0] step=5360, skipped=70, lr=[2.8252320786468936e-06, 2.8252320786468936e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:04:44,033] [INFO] [timer.py:199:stop] epoch=0/micro_step=5360/global_step=5360, RunningAvgSamplesPerSec=105.39471890271973, CurrSamplesPerSec=112.58941413422879, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:04:44,125] [INFO] [logging.py:96:log_dist] [Rank 0] step=5360, skipped=86, lr=[1.477885397291814e-06, 1.477885397291814e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5359|ppo_ep: 1|act_loss: 0.03546142578125|cri_loss: 0.018402099609375|unsuper_loss: 0.0
+average reward score: 4.31640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.50%) |Training time=0.45s (20.85%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5360|ppo_ep: 1|act_loss: -0.01268768310546875|cri_loss: -0.005405426025390625|unsuper_loss: 0.0
+average reward score: 4.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.74s (75.74%) |Training time=0.46s (19.90%) |Others=0.10 (4.36%)|CurSamplesPerSec=13.89 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5361|ppo_ep: 1|act_loss: 0.218994140625|cri_loss: 0.1324462890625|unsuper_loss: 0.0
+average reward score: 4.7578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.71%) |Training time=0.46s (21.61%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5362|ppo_ep: 1|act_loss: -0.01126861572265625|cri_loss: -0.0054168701171875|unsuper_loss: 0.0
+average reward score: 5.40625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.68%) |Training time=0.46s (21.64%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5363|ppo_ep: 1|act_loss: 0.004543304443359375|cri_loss: 0.0038318634033203125|unsuper_loss: 0.0
+average reward score: 4.3125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.77%) |Training time=0.46s (21.55%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5364|ppo_ep: 1|act_loss: 0.0152435302734375|cri_loss: 0.0079498291015625|unsuper_loss: 0.0
+average reward score: 4.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.62%) |Training time=0.46s (21.70%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5365|ppo_ep: 1|act_loss: 0.0078277587890625|cri_loss: 0.00411224365234375|unsuper_loss: 0.0
+average reward score: 5.47265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.76%) |Training time=0.46s (21.54%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5366|ppo_ep: 1|act_loss: 0.002910614013671875|cri_loss: 0.0019168853759765625|unsuper_loss: 0.0
+average reward score: 4.17578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.55%) |Training time=0.47s (21.76%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5367|ppo_ep: 1|act_loss: -0.0033702850341796875|cri_loss: -0.0005016326904296875|unsuper_loss: 0.0
+average reward score: 4.6875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.72%) |Training time=0.46s (21.60%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5368|ppo_ep: 1|act_loss: 0.004711151123046875|cri_loss: 0.0026111602783203125|unsuper_loss: 0.0
+average reward score: 4.46484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.81%) |Training time=0.46s (21.50%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49
+[2023-04-14 12:05:05,578] [INFO] [logging.py:96:log_dist] [Rank 0] step=5370, skipped=70, lr=[2.808341335921272e-06, 2.808341335921272e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:05:05,596] [INFO] [timer.py:199:stop] epoch=0/micro_step=5370/global_step=5370, RunningAvgSamplesPerSec=105.39691619380453, CurrSamplesPerSec=104.9207163684404, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:05:05,689] [INFO] [logging.py:96:log_dist] [Rank 0] step=5370, skipped=86, lr=[1.4691092063152417e-06, 1.4691092063152417e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5369|ppo_ep: 1|act_loss: -0.0125885009765625|cri_loss: -0.006072998046875|unsuper_loss: 0.0
+average reward score: 4.87890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.57%) |Training time=0.47s (21.79%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5370|ppo_ep: 1|act_loss: -0.005306243896484375|cri_loss: -0.002552032470703125|unsuper_loss: 0.0
+average reward score: 4.66796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.59%) |Training time=0.47s (21.74%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5371|ppo_ep: 1|act_loss: -0.149658203125|cri_loss: -0.0469970703125|unsuper_loss: 0.0
+average reward score: 4.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.23%) |Training time=0.48s (22.12%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5372|ppo_ep: 1|act_loss: -0.0189666748046875|cri_loss: -0.009185791015625|unsuper_loss: 0.0
+average reward score: 4.51953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.37%) |Training time=0.45s (20.93%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.49
+[2023-04-14 12:05:14,283] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 5373|ppo_ep: 1|act_loss: 0.0158843994140625|cri_loss: 0.0085906982421875|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.09%) |Training time=0.46s (21.71%) |Others=0.09 (4.19%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.49
+[2023-04-14 12:05:16,416] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 5374|ppo_ep: 1|act_loss: -0.03131103515625|cri_loss: -0.0146484375|unsuper_loss: 0.0
+average reward score: 4.73046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.21%) |Training time=0.46s (21.59%) |Others=0.09 (4.19%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5375|ppo_ep: 1|act_loss: -0.0276336669921875|cri_loss: -0.0133819580078125|unsuper_loss: 0.0
+average reward score: 4.17578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.90%) |Training time=0.45s (19.74%) |Others=0.10 (4.36%)|CurSamplesPerSec=13.88 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5376|ppo_ep: 1|act_loss: -0.02777099609375|cri_loss: -0.01239776611328125|unsuper_loss: 0.0
+average reward score: 4.32421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.54%) |Training time=0.47s (21.74%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5377|ppo_ep: 1|act_loss: -0.025543212890625|cri_loss: -0.00637054443359375|unsuper_loss: 0.0
+average reward score: 4.20703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.72%) |Training time=0.46s (21.62%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5378|ppo_ep: 1|act_loss: -0.023193359375|cri_loss: -0.011199951171875|unsuper_loss: 0.0
+average reward score: 4.1875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.79%) |Training time=0.46s (21.51%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.49
+[2023-04-14 12:05:27,179] [INFO] [logging.py:96:log_dist] [Rank 0] step=5380, skipped=70, lr=[2.7914804849419754e-06, 2.7914804849419754e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:05:27,197] [INFO] [timer.py:199:stop] epoch=0/micro_step=5380/global_step=5380, RunningAvgSamplesPerSec=105.39935198633347, CurrSamplesPerSec=106.7170614225843, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:05:27,294] [INFO] [logging.py:96:log_dist] [Rank 0] step=5380, skipped=88, lr=[1.462099249105216e-06, 1.462099249105216e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5379|ppo_ep: 1|act_loss: 0.0011959075927734375|cri_loss: 0.00070953369140625|unsuper_loss: 0.0
+average reward score: 4.671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.70%) |Others=0.10 (4.82%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5380|ppo_ep: 1|act_loss: -0.0013675689697265625|cri_loss: 0.0005321502685546875|unsuper_loss: 0.0
+average reward score: 5.3046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.72%) |Training time=0.46s (21.61%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5381|ppo_ep: 1|act_loss: 0.01001739501953125|cri_loss: 0.0052490234375|unsuper_loss: 0.0
+average reward score: 5.57421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.84%) |Training time=0.46s (21.13%) |Others=0.11 (5.03%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5382|ppo_ep: 1|act_loss: 0.0014362335205078125|cri_loss: 0.0013036727905273438|unsuper_loss: 0.0
+average reward score: 4.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.42%) |Training time=0.48s (22.01%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5383|ppo_ep: 1|act_loss: 0.016265869140625|cri_loss: 0.00885772705078125|unsuper_loss: 0.0
+average reward score: 5.296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.61%) |Training time=0.47s (21.73%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5384|ppo_ep: 1|act_loss: 0.021728515625|cri_loss: 0.01099395751953125|unsuper_loss: 0.0
+average reward score: 4.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.82%) |Training time=0.46s (21.51%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5385|ppo_ep: 1|act_loss: 0.01617431640625|cri_loss: 0.008880615234375|unsuper_loss: 0.0
+average reward score: 4.75
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.90%) |Training time=0.46s (21.43%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5386|ppo_ep: 1|act_loss: 0.01033782958984375|cri_loss: 0.005344390869140625|unsuper_loss: 0.0
+average reward score: 4.85546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.00%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5387|ppo_ep: 1|act_loss: 0.0189056396484375|cri_loss: 0.0101318359375|unsuper_loss: 0.0
+average reward score: 5.10546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.06%) |Training time=0.46s (21.26%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5388|ppo_ep: 1|act_loss: -0.087646484375|cri_loss: -0.0062255859375|unsuper_loss: 0.0
+average reward score: 4.29296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.74%) |Training time=0.46s (21.53%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.49
+[2023-04-14 12:05:48,661] [INFO] [logging.py:96:log_dist] [Rank 0] step=5390, skipped=70, lr=[2.774649775627491e-06, 2.774649775627491e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:05:48,679] [INFO] [timer.py:199:stop] epoch=0/micro_step=5390/global_step=5390, RunningAvgSamplesPerSec=105.40158110513609, CurrSamplesPerSec=108.0063218002324, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:05:48,771] [INFO] [logging.py:96:log_dist] [Rank 0] step=5390, skipped=88, lr=[1.4533506561564305e-06, 1.4533506561564305e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5389|ppo_ep: 1|act_loss: -0.0211029052734375|cri_loss: -0.0100860595703125|unsuper_loss: 0.0
+average reward score: 3.73828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.64%) |Training time=0.46s (21.40%) |Others=0.11 (4.95%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5390|ppo_ep: 1|act_loss: 0.06304931640625|cri_loss: 0.03387451171875|unsuper_loss: 0.0
+average reward score: 4.76171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.20%) |Training time=0.46s (20.09%) |Others=0.11 (4.71%)|CurSamplesPerSec=14.00 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5391|ppo_ep: 1|act_loss: -0.00705718994140625|cri_loss: -0.00333404541015625|unsuper_loss: 0.0
+average reward score: 4.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.79%) |Training time=0.46s (21.55%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5392|ppo_ep: 1|act_loss: 0.036102294921875|cri_loss: 0.0203704833984375|unsuper_loss: 0.0
+average reward score: 5.90234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.04%) |Training time=0.48s (22.32%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5393|ppo_ep: 1|act_loss: -0.0137176513671875|cri_loss: -0.0059967041015625|unsuper_loss: 0.0
+average reward score: 5.15234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.58%) |Training time=0.47s (21.74%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5394|ppo_ep: 1|act_loss: 0.02825927734375|cri_loss: 0.0151519775390625|unsuper_loss: 0.0
+average reward score: 5.45703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.93%) |Training time=0.46s (21.40%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5395|ppo_ep: 1|act_loss: 0.01000213623046875|cri_loss: 0.00513458251953125|unsuper_loss: 0.0
+average reward score: 4.45703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.70%) |Training time=0.46s (21.51%) |Others=0.10 (4.78%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5396|ppo_ep: 1|act_loss: -0.00942230224609375|cri_loss: -0.00244140625|unsuper_loss: 0.0
+average reward score: 5.80078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.76%) |Training time=0.46s (21.54%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5397|ppo_ep: 1|act_loss: -0.0196685791015625|cri_loss: -0.0056610107421875|unsuper_loss: 0.0
+average reward score: 3.70703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.95%) |Training time=0.46s (21.39%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5398|ppo_ep: 1|act_loss: -0.02227783203125|cri_loss: -0.0088958740234375|unsuper_loss: 0.0
+average reward score: 4.4453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.66%) |Training time=0.46s (21.68%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.49
+[2023-04-14 12:06:10,231] [INFO] [logging.py:96:log_dist] [Rank 0] step=5400, skipped=70, lr=[2.757849457449532e-06, 2.757849457449532e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:06:10,249] [INFO] [timer.py:199:stop] epoch=0/micro_step=5400/global_step=5400, RunningAvgSamplesPerSec=105.40350440139551, CurrSamplesPerSec=107.39463033761628, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:06:10,342] [INFO] [logging.py:96:log_dist] [Rank 0] step=5400, skipped=88, lr=[1.4446175770758245e-06, 1.4446175770758245e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5399|ppo_ep: 1|act_loss: -0.012451171875|cri_loss: -0.00595855712890625|unsuper_loss: 0.0
+average reward score: 4.16015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.82%) |Training time=0.46s (21.52%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5400|ppo_ep: 1|act_loss: -0.0237579345703125|cri_loss: -0.011627197265625|unsuper_loss: 0.0
+average reward score: 4.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.45%) |Training time=0.47s (21.91%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5401|ppo_ep: 1|act_loss: -0.0233154296875|cri_loss: -0.0113372802734375|unsuper_loss: 0.0
+average reward score: 4.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.63%) |Training time=0.46s (21.64%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5402|ppo_ep: 1|act_loss: -0.0163421630859375|cri_loss: -0.00795745849609375|unsuper_loss: 0.0
+average reward score: 4.75390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.00%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5403|ppo_ep: 1|act_loss: -0.006744384765625|cri_loss: -0.0031108856201171875|unsuper_loss: 0.0
+average reward score: 4.984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.76%) |Training time=0.49s (22.64%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5404|ppo_ep: 1|act_loss: -0.060516357421875|cri_loss: -0.02740478515625|unsuper_loss: 0.0
+average reward score: 4.55859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.88%) |Training time=0.49s (22.52%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5405|ppo_ep: 1|act_loss: -0.004638671875|cri_loss: -0.0005283355712890625|unsuper_loss: 0.0
+average reward score: 4.53515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.57%) |Training time=0.50s (21.16%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.59 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5406|ppo_ep: 1|act_loss: 0.019073486328125|cri_loss: 0.0097198486328125|unsuper_loss: 0.0
+average reward score: 5.9375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.69%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5407|ppo_ep: 1|act_loss: 0.024169921875|cri_loss: 0.012420654296875|unsuper_loss: 0.0
+average reward score: 4.74609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.50%) |Training time=0.47s (21.87%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5408|ppo_ep: 1|act_loss: 0.007778167724609375|cri_loss: 0.0053558349609375|unsuper_loss: 0.0
+average reward score: 5.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.33%) |Training time=0.48s (22.02%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.49
+[2023-04-14 12:06:31,997] [INFO] [logging.py:96:log_dist] [Rank 0] step=5410, skipped=70, lr=[2.7410797794293427e-06, 2.7410797794293427e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:06:32,015] [INFO] [timer.py:199:stop] epoch=0/micro_step=5410/global_step=5410, RunningAvgSamplesPerSec=105.39676190547151, CurrSamplesPerSec=106.50273442583405, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:06:32,108] [INFO] [logging.py:96:log_dist] [Rank 0] step=5410, skipped=88, lr=[1.4359001413086976e-06, 1.4359001413086976e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5409|ppo_ep: 1|act_loss: -0.00499725341796875|cri_loss: -0.002079010009765625|unsuper_loss: 0.0
+average reward score: 5.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.84%) |Training time=0.46s (21.52%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5410|ppo_ep: 1|act_loss: -0.00609588623046875|cri_loss: -0.002689361572265625|unsuper_loss: 0.0
+average reward score: 4.4453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.46s (21.41%) |Others=0.11 (4.88%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5411|ppo_ep: 1|act_loss: 0.01546478271484375|cri_loss: 0.00794219970703125|unsuper_loss: 0.0
+average reward score: 5.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.50%) |Training time=0.49s (22.34%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5412|ppo_ep: 1|act_loss: -0.0040435791015625|cri_loss: -0.0018091201782226562|unsuper_loss: 0.0
+average reward score: 4.921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.68s (73.97%) |Training time=0.49s (21.62%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.05 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5413|ppo_ep: 1|act_loss: 0.0044708251953125|cri_loss: 0.0024280548095703125|unsuper_loss: 0.0
+average reward score: 5.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.81%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.49
+epoch: 0|step: 5414|ppo_ep: 1|act_loss: -0.035888671875|cri_loss: -0.0156097412109375|unsuper_loss: 0.0
+average reward score: 5.58984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.52%) |Training time=0.47s (21.80%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5415|ppo_ep: 1|act_loss: 0.0011444091796875|cri_loss: 0.0019741058349609375|unsuper_loss: 0.0
+average reward score: 6.15625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.42%) |Training time=0.47s (21.96%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5416|ppo_ep: 1|act_loss: -0.031219482421875|cri_loss: -0.01428985595703125|unsuper_loss: 0.0
+average reward score: 5.03515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.37%) |Training time=0.47s (21.97%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5417|ppo_ep: 1|act_loss: -0.01873779296875|cri_loss: -0.005611419677734375|unsuper_loss: 0.0
+average reward score: 5.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.59%) |Training time=0.47s (21.79%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5418|ppo_ep: 1|act_loss: 0.0180816650390625|cri_loss: 0.0113677978515625|unsuper_loss: 0.0
+average reward score: 4.5703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.21%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50
+[2023-04-14 12:06:53,724] [INFO] [logging.py:96:log_dist] [Rank 0] step=5420, skipped=70, lr=[2.724340990134006e-06, 2.724340990134006e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:06:53,742] [INFO] [timer.py:199:stop] epoch=0/micro_step=5420/global_step=5420, RunningAvgSamplesPerSec=105.39063099074835, CurrSamplesPerSec=103.22773090500765, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:06:53,834] [INFO] [logging.py:96:log_dist] [Rank 0] step=5420, skipped=88, lr=[1.4271984780684778e-06, 1.4271984780684778e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5419|ppo_ep: 1|act_loss: 1.4781951904296875e-05|cri_loss: 0.00043654441833496094|unsuper_loss: 0.0
+average reward score: 5.55859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.47s (21.99%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5420|ppo_ep: 1|act_loss: -0.0020809173583984375|cri_loss: -0.0009274482727050781|unsuper_loss: 0.0
+average reward score: 4.046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.58%) |Training time=0.50s (21.18%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.57 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5421|ppo_ep: 1|act_loss: 0.0184173583984375|cri_loss: 0.00998687744140625|unsuper_loss: 0.0
+average reward score: 4.9921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.58%) |Training time=0.47s (21.68%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5422|ppo_ep: 1|act_loss: 0.0377197265625|cri_loss: 0.019622802734375|unsuper_loss: 0.0
+average reward score: 6.3125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.48s (22.14%) |Others=0.10 (4.80%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5423|ppo_ep: 1|act_loss: 0.007061004638671875|cri_loss: 0.0047607421875|unsuper_loss: 0.0
+average reward score: 5.26171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.13%) |Training time=0.48s (22.23%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5424|ppo_ep: 1|act_loss: -0.003932952880859375|cri_loss: -0.0017566680908203125|unsuper_loss: 0.0
+average reward score: 4.0078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.37%) |Training time=0.47s (21.97%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5425|ppo_ep: 1|act_loss: 0.0092010498046875|cri_loss: 0.004894256591796875|unsuper_loss: 0.0
+average reward score: 5.08984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.17%) |Training time=0.48s (22.21%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5426|ppo_ep: 1|act_loss: 0.0012369155883789062|cri_loss: 0.0012493133544921875|unsuper_loss: 0.0
+average reward score: 4.671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.31%) |Training time=0.47s (22.03%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5427|ppo_ep: 1|act_loss: 0.018707275390625|cri_loss: 0.01065826416015625|unsuper_loss: 0.0
+average reward score: 5.1875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.21%) |Training time=0.48s (22.14%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5428|ppo_ep: 1|act_loss: -0.0004367828369140625|cri_loss: 4.744529724121094e-05|unsuper_loss: 0.0
+average reward score: 6.0
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.10%) |Training time=0.48s (22.26%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.50
+[2023-04-14 12:07:15,439] [INFO] [logging.py:96:log_dist] [Rank 0] step=5430, skipped=70, lr=[2.7076333376727596e-06, 2.7076333376727596e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:07:15,458] [INFO] [timer.py:199:stop] epoch=0/micro_step=5430/global_step=5430, RunningAvgSamplesPerSec=105.38353117169935, CurrSamplesPerSec=104.13257693310327, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:07:15,550] [INFO] [logging.py:96:log_dist] [Rank 0] step=5430, skipped=88, lr=[1.4185127163348057e-06, 1.4185127163348057e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5429|ppo_ep: 1|act_loss: -0.0153350830078125|cri_loss: -0.007526397705078125|unsuper_loss: 0.0
+average reward score: 5.08984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.87%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5430|ppo_ep: 1|act_loss: -0.00426483154296875|cri_loss: -0.002044677734375|unsuper_loss: 0.0
+average reward score: 5.65234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.30%) |Training time=0.47s (22.05%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5431|ppo_ep: 1|act_loss: 0.0103607177734375|cri_loss: 0.0079803466796875|unsuper_loss: 0.0
+average reward score: 5.43359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.25%) |Training time=0.48s (22.08%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5432|ppo_ep: 1|act_loss: 0.003936767578125|cri_loss: 0.00244903564453125|unsuper_loss: 0.0
+average reward score: 5.59765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (21.94%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5433|ppo_ep: 1|act_loss: -0.005859375|cri_loss: -0.00262451171875|unsuper_loss: 0.0
+average reward score: 5.69921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.47s (21.79%) |Others=0.10 (4.78%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5434|ppo_ep: 1|act_loss: 0.02886962890625|cri_loss: 0.01512908935546875|unsuper_loss: 0.0
+average reward score: 4.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.96%) |Training time=0.49s (22.45%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5435|ppo_ep: 1|act_loss: 0.0050048828125|cri_loss: 0.0025997161865234375|unsuper_loss: 0.0
+average reward score: 4.9765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.87%) |Training time=0.49s (21.83%) |Others=0.14 (6.30%)|CurSamplesPerSec=14.40 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5436|ppo_ep: 1|act_loss: -0.01375579833984375|cri_loss: -0.006671905517578125|unsuper_loss: 0.0
+average reward score: 5.58984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.68s (74.51%) |Training time=0.47s (20.98%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.19 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5437|ppo_ep: 1|act_loss: 0.007053375244140625|cri_loss: 0.00396728515625|unsuper_loss: 0.0
+average reward score: 5.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.48%) |Training time=0.48s (21.94%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5438|ppo_ep: 1|act_loss: -0.014312744140625|cri_loss: -0.0068359375|unsuper_loss: 0.0
+average reward score: 5.15625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.68%) |Training time=0.47s (21.71%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.50
+[2023-04-14 12:07:37,238] [INFO] [logging.py:96:log_dist] [Rank 0] step=5440, skipped=70, lr=[2.69095706969332e-06, 2.69095706969332e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:07:37,257] [INFO] [timer.py:199:stop] epoch=0/micro_step=5440/global_step=5440, RunningAvgSamplesPerSec=105.37736171331294, CurrSamplesPerSec=105.67992189222387, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:07:37,349] [INFO] [logging.py:96:log_dist] [Rank 0] step=5440, skipped=88, lr=[1.4098429848516231e-06, 1.4098429848516231e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5439|ppo_ep: 1|act_loss: -0.002811431884765625|cri_loss: -0.0012006759643554688|unsuper_loss: 0.0
+average reward score: 4.47265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.88%) |Training time=0.47s (21.51%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5440|ppo_ep: 1|act_loss: -0.0201873779296875|cri_loss: -0.00963592529296875|unsuper_loss: 0.0
+average reward score: 5.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.59%) |Training time=0.48s (21.83%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5441|ppo_ep: 1|act_loss: -0.0162506103515625|cri_loss: -0.007709503173828125|unsuper_loss: 0.0
+average reward score: 4.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.19%) |Training time=0.47s (21.26%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5442|ppo_ep: 1|act_loss: 0.10546875|cri_loss: 0.05682373046875|unsuper_loss: 0.0
+average reward score: 5.04296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.90%) |Training time=0.47s (20.68%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.08 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5443|ppo_ep: 1|act_loss: 0.0166015625|cri_loss: 0.0086212158203125|unsuper_loss: 0.0
+average reward score: 5.61328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.87%) |Training time=0.46s (21.49%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5444|ppo_ep: 1|act_loss: -0.0185394287109375|cri_loss: -0.0087432861328125|unsuper_loss: 0.0
+average reward score: 4.95703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.70%) |Training time=0.47s (21.67%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5445|ppo_ep: 1|act_loss: 0.0125274658203125|cri_loss: 0.00646209716796875|unsuper_loss: 0.0
+average reward score: 6.6328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.80%) |Training time=0.47s (21.55%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5446|ppo_ep: 1|act_loss: 0.0301513671875|cri_loss: 0.0159759521484375|unsuper_loss: 0.0
+average reward score: 5.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.79%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5447|ppo_ep: 1|act_loss: 0.0225982666015625|cri_loss: 0.01230621337890625|unsuper_loss: 0.0
+average reward score: 5.01953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.62%) |Training time=0.47s (21.69%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5448|ppo_ep: 1|act_loss: -0.020538330078125|cri_loss: -0.009857177734375|unsuper_loss: 0.0
+average reward score: 5.35546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.44%) |Training time=0.48s (21.91%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.50
+[2023-04-14 12:07:59,090] [INFO] [logging.py:96:log_dist] [Rank 0] step=5450, skipped=70, lr=[2.6743124333782032e-06, 2.6743124333782032e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:07:59,109] [INFO] [timer.py:199:stop] epoch=0/micro_step=5450/global_step=5450, RunningAvgSamplesPerSec=105.37402939249111, CurrSamplesPerSec=102.53619653180074, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:07:59,202] [INFO] [logging.py:96:log_dist] [Rank 0] step=5450, skipped=88, lr=[1.4011894121252672e-06, 1.4011894121252672e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5449|ppo_ep: 1|act_loss: -0.00958251953125|cri_loss: -0.00408935546875|unsuper_loss: 0.0
+average reward score: 5.09375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.47s (21.90%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5450|ppo_ep: 1|act_loss: 0.00853729248046875|cri_loss: 0.004505157470703125|unsuper_loss: 0.0
+average reward score: 4.96875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.04%) |Training time=0.49s (22.37%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5451|ppo_ep: 1|act_loss: -0.00821685791015625|cri_loss: -0.0039215087890625|unsuper_loss: 0.0
+average reward score: 4.9140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.30%) |Training time=0.48s (20.40%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5452|ppo_ep: 1|act_loss: -0.01424407958984375|cri_loss: -0.006511688232421875|unsuper_loss: 0.0
+average reward score: 5.15625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.13%) |Training time=0.48s (22.23%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5453|ppo_ep: 1|act_loss: -0.027374267578125|cri_loss: -0.0124053955078125|unsuper_loss: 0.0
+average reward score: 4.53515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.47s (21.87%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5454|ppo_ep: 1|act_loss: -0.0054931640625|cri_loss: -0.002529144287109375|unsuper_loss: 0.0
+average reward score: 5.34765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.47s (21.88%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5455|ppo_ep: 1|act_loss: -0.007137298583984375|cri_loss: -0.003376007080078125|unsuper_loss: 0.0
+average reward score: 5.12109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.67%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5456|ppo_ep: 1|act_loss: 0.020599365234375|cri_loss: 0.0107574462890625|unsuper_loss: 0.0
+average reward score: 5.07421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.81%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5457|ppo_ep: 1|act_loss: -0.03179931640625|cri_loss: -0.01404571533203125|unsuper_loss: 0.0
+average reward score: 4.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.82%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5458|ppo_ep: 1|act_loss: 0.0020694732666015625|cri_loss: 0.0013256072998046875|unsuper_loss: 0.0
+average reward score: 4.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.79%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50
+[2023-04-14 12:08:20,904] [INFO] [logging.py:96:log_dist] [Rank 0] step=5460, skipped=70, lr=[2.6576996754410752e-06, 2.6576996754410752e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:08:20,922] [INFO] [timer.py:199:stop] epoch=0/micro_step=5460/global_step=5460, RunningAvgSamplesPerSec=105.36840179701572, CurrSamplesPerSec=102.48804438627545, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:08:21,015] [INFO] [logging.py:96:log_dist] [Rank 0] step=5460, skipped=88, lr=[1.3925521264225587e-06, 1.3925521264225587e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5459|ppo_ep: 1|act_loss: -0.00409698486328125|cri_loss: -0.0016803741455078125|unsuper_loss: 0.0
+average reward score: 5.59375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.47s (21.93%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5460|ppo_ep: 1|act_loss: 0.01171875|cri_loss: 0.0059967041015625|unsuper_loss: 0.0
+average reward score: 4.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.62%) |Training time=0.47s (21.73%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5461|ppo_ep: 1|act_loss: -0.004016876220703125|cri_loss: 0.001621246337890625|unsuper_loss: 0.0
+average reward score: 5.6953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.93%) |Training time=0.46s (21.44%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5462|ppo_ep: 1|act_loss: -0.0219879150390625|cri_loss: -0.0105743408203125|unsuper_loss: 0.0
+average reward score: 4.41015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.87%) |Training time=0.47s (21.52%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5463|ppo_ep: 1|act_loss: 0.010955810546875|cri_loss: 0.0068206787109375|unsuper_loss: 0.0
+average reward score: 4.73828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.75%) |Training time=0.47s (21.62%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5464|ppo_ep: 1|act_loss: 0.005199432373046875|cri_loss: 0.0028209686279296875|unsuper_loss: 0.0
+average reward score: 4.35546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.90%) |Training time=0.46s (21.47%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5465|ppo_ep: 1|act_loss: 0.00040841102600097656|cri_loss: 0.0008087158203125|unsuper_loss: 0.0
+average reward score: 5.42578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.86%) |Training time=0.46s (21.49%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5466|ppo_ep: 1|act_loss: -0.018341064453125|cri_loss: -0.00862884521484375|unsuper_loss: 0.0
+average reward score: 5.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.94%) |Training time=0.48s (20.75%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5467|ppo_ep: 1|act_loss: 0.006927490234375|cri_loss: 0.0036773681640625|unsuper_loss: 0.0
+average reward score: 5.17578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.06%) |Training time=0.48s (22.30%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5468|ppo_ep: 1|act_loss: 0.01983642578125|cri_loss: 0.01010894775390625|unsuper_loss: 0.0
+average reward score: 5.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.14%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50
+[2023-04-14 12:08:42,709] [INFO] [logging.py:96:log_dist] [Rank 0] step=5470, skipped=70, lr=[2.641119042123085e-06, 2.641119042123085e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:08:42,727] [INFO] [timer.py:199:stop] epoch=0/micro_step=5470/global_step=5470, RunningAvgSamplesPerSec=105.36424124670562, CurrSamplesPerSec=99.5353366808979, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:08:42,821] [INFO] [logging.py:96:log_dist] [Rank 0] step=5470, skipped=88, lr=[1.383931255768909e-06, 1.383931255768909e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5469|ppo_ep: 1|act_loss: 0.00701904296875|cri_loss: 0.00399017333984375|unsuper_loss: 0.0
+average reward score: 5.46484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.97%) |Training time=0.48s (22.37%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5470|ppo_ep: 1|act_loss: -0.0755615234375|cri_loss: -0.035247802734375|unsuper_loss: 0.0
+average reward score: 5.08203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.50%) |Training time=0.49s (22.24%) |Others=0.12 (5.27%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5471|ppo_ep: 1|act_loss: -0.0297088623046875|cri_loss: -0.013824462890625|unsuper_loss: 0.0
+average reward score: 7.29296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.68s (74.39%) |Training time=0.48s (21.17%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.15 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5472|ppo_ep: 1|act_loss: -0.00472259521484375|cri_loss: -0.0017681121826171875|unsuper_loss: 0.0
+average reward score: 5.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.72%) |Training time=0.49s (22.65%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5473|ppo_ep: 1|act_loss: -0.00201416015625|cri_loss: -0.00042247772216796875|unsuper_loss: 0.0
+average reward score: 5.02734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.74%) |Training time=0.49s (22.47%) |Others=0.10 (4.79%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5474|ppo_ep: 1|act_loss: 0.040985107421875|cri_loss: 0.02191162109375|unsuper_loss: 0.0
+average reward score: 4.43359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.97%) |Training time=0.48s (22.40%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50
+[2023-04-14 12:08:55,918] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 5475|ppo_ep: 1|act_loss: -0.01708984375|cri_loss: -0.0081634521484375|unsuper_loss: 0.0
+average reward score: 5.72265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.26%) |Training time=0.49s (22.58%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.50
+[2023-04-14 12:08:58,069] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 5476|ppo_ep: 1|act_loss: 0.01763916015625|cri_loss: 0.0101776123046875|unsuper_loss: 0.0
+average reward score: 5.32421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.17%) |Training time=0.49s (22.68%) |Others=0.09 (4.15%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5477|ppo_ep: 1|act_loss: -0.0136566162109375|cri_loss: -0.006420135498046875|unsuper_loss: 0.0
+average reward score: 4.8203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.73%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5478|ppo_ep: 1|act_loss: -0.00641632080078125|cri_loss: -0.0027980804443359375|unsuper_loss: 0.0
+average reward score: 4.64453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.47s (21.60%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50
+[2023-04-14 12:09:04,432] [INFO] [logging.py:96:log_dist] [Rank 0] step=5480, skipped=70, lr=[2.624570779189218e-06, 2.624570779189218e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:09:04,450] [INFO] [timer.py:199:stop] epoch=0/micro_step=5480/global_step=5480, RunningAvgSamplesPerSec=105.35506986444176, CurrSamplesPerSec=103.88135870015302, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:09:04,543] [INFO] [logging.py:96:log_dist] [Rank 0] step=5480, skipped=90, lr=[1.3770464639591064e-06, 1.3770464639591064e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5479|ppo_ep: 1|act_loss: 0.081298828125|cri_loss: 0.0440673828125|unsuper_loss: 0.0
+average reward score: 4.8359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.77%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5480|ppo_ep: 1|act_loss: 0.0038700103759765625|cri_loss: 0.0020809173583984375|unsuper_loss: 0.0
+average reward score: 5.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.13%) |Training time=0.48s (22.23%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5481|ppo_ep: 1|act_loss: 0.002227783203125|cri_loss: 0.0013637542724609375|unsuper_loss: 0.0
+average reward score: 5.58203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.94%) |Training time=0.48s (20.76%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5482|ppo_ep: 1|act_loss: 0.04876708984375|cri_loss: 0.02801513671875|unsuper_loss: 0.0
+average reward score: 4.67578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.93%) |Training time=0.49s (22.46%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5483|ppo_ep: 1|act_loss: -0.0153961181640625|cri_loss: -0.0074615478515625|unsuper_loss: 0.0
+average reward score: 4.8984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.18%) |Training time=0.48s (22.18%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5484|ppo_ep: 1|act_loss: -0.034210205078125|cri_loss: -0.0165557861328125|unsuper_loss: 0.0
+average reward score: 4.9453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.00%) |Training time=0.48s (22.35%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5485|ppo_ep: 1|act_loss: -0.01233673095703125|cri_loss: -0.005908966064453125|unsuper_loss: 0.0
+average reward score: 4.61328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.97%) |Training time=0.48s (22.42%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5486|ppo_ep: 1|act_loss: -0.01239013671875|cri_loss: -0.005908966064453125|unsuper_loss: 0.0
+average reward score: 5.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.92%) |Training time=0.49s (22.47%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5487|ppo_ep: 1|act_loss: 0.019683837890625|cri_loss: 0.01030731201171875|unsuper_loss: 0.0
+average reward score: 4.40234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.03%) |Training time=0.48s (22.35%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5488|ppo_ep: 1|act_loss: -0.0013446807861328125|cri_loss: -0.0006318092346191406|unsuper_loss: 0.0
+average reward score: 4.875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.03%) |Training time=0.48s (22.34%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50
+[2023-04-14 12:09:26,196] [INFO] [logging.py:96:log_dist] [Rank 0] step=5490, skipped=70, lr=[2.6080551319246483e-06, 2.6080551319246483e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:09:26,214] [INFO] [timer.py:199:stop] epoch=0/micro_step=5490/global_step=5490, RunningAvgSamplesPerSec=105.34444744793811, CurrSamplesPerSec=99.1912971502897, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:09:26,306] [INFO] [logging.py:96:log_dist] [Rank 0] step=5490, skipped=90, lr=[1.3684554622399404e-06, 1.3684554622399404e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5489|ppo_ep: 1|act_loss: 0.0125579833984375|cri_loss: 0.006900787353515625|unsuper_loss: 0.0
+average reward score: 5.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.96%) |Training time=0.49s (22.43%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5490|ppo_ep: 1|act_loss: 0.0027523040771484375|cri_loss: 0.0015411376953125|unsuper_loss: 0.0
+average reward score: 4.28125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.18%) |Training time=0.48s (22.20%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5491|ppo_ep: 1|act_loss: 0.029052734375|cri_loss: 0.014892578125|unsuper_loss: 0.0
+average reward score: 5.37109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (21.91%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5492|ppo_ep: 1|act_loss: -0.00307464599609375|cri_loss: -0.00016021728515625|unsuper_loss: 0.0
+average reward score: 5.15234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.72%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5493|ppo_ep: 1|act_loss: 0.0474853515625|cri_loss: 0.025177001953125|unsuper_loss: 0.0
+average reward score: 4.61328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.51%) |Training time=0.45s (20.85%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5494|ppo_ep: 1|act_loss: -0.020263671875|cri_loss: -0.0094757080078125|unsuper_loss: 0.0
+average reward score: 4.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.28%) |Training time=0.48s (22.12%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5495|ppo_ep: 1|act_loss: -0.0246429443359375|cri_loss: -0.0103759765625|unsuper_loss: 0.0
+average reward score: 5.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.97%) |Training time=0.46s (21.33%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5496|ppo_ep: 1|act_loss: -0.0308074951171875|cri_loss: -0.0125885009765625|unsuper_loss: 0.0
+average reward score: 4.64453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.70s (72.86%) |Training time=0.51s (21.72%) |Others=0.13 (5.43%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5497|ppo_ep: 1|act_loss: -0.038238525390625|cri_loss: -0.0164947509765625|unsuper_loss: 0.0
+average reward score: 5.1015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.30%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5498|ppo_ep: 1|act_loss: -0.0411376953125|cri_loss: -0.019500732421875|unsuper_loss: 0.0
+average reward score: 4.7265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.82%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.50
+[2023-04-14 12:09:48,028] [INFO] [logging.py:96:log_dist] [Rank 0] step=5500, skipped=70, lr=[2.591572345131111e-06, 2.591572345131111e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:09:48,046] [INFO] [timer.py:199:stop] epoch=0/micro_step=5500/global_step=5500, RunningAvgSamplesPerSec=105.3416564703474, CurrSamplesPerSec=104.90333129598685, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:09:48,139] [INFO] [logging.py:96:log_dist] [Rank 0] step=5500, skipped=90, lr=[1.3598812327405274e-06, 1.3598812327405274e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5499|ppo_ep: 1|act_loss: -0.023468017578125|cri_loss: -0.0111083984375|unsuper_loss: 0.0
+average reward score: 4.46484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.68%) |Training time=0.47s (21.65%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5500|ppo_ep: 1|act_loss: -0.010467529296875|cri_loss: -0.003673553466796875|unsuper_loss: 0.0
+average reward score: 5.546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.86%) |Training time=0.45s (20.52%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5501|ppo_ep: 1|act_loss: -0.0016307830810546875|cri_loss: -0.0007171630859375|unsuper_loss: 0.0
+average reward score: 5.59375
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.10%) |Training time=0.46s (20.46%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.08 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5502|ppo_ep: 1|act_loss: 0.020538330078125|cri_loss: 0.010650634765625|unsuper_loss: 0.0
+average reward score: 3.98828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.34%) |Training time=0.48s (22.00%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5503|ppo_ep: 1|act_loss: 0.0221099853515625|cri_loss: 0.01146697998046875|unsuper_loss: 0.0
+average reward score: 5.8046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.48%) |Training time=0.48s (21.92%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5504|ppo_ep: 1|act_loss: 0.015380859375|cri_loss: 0.008544921875|unsuper_loss: 0.0
+average reward score: 5.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.11%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5505|ppo_ep: 1|act_loss: -0.045379638671875|cri_loss: -0.021820068359375|unsuper_loss: 0.0
+average reward score: 6.23046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (21.98%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5506|ppo_ep: 1|act_loss: 0.0024051666259765625|cri_loss: 0.0026035308837890625|unsuper_loss: 0.0
+average reward score: 5.1796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (22.02%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5507|ppo_ep: 1|act_loss: 0.017333984375|cri_loss: 0.00897979736328125|unsuper_loss: 0.0
+average reward score: 5.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.49%) |Training time=0.48s (21.87%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5508|ppo_ep: 1|act_loss: 0.03814697265625|cri_loss: 0.0200347900390625|unsuper_loss: 0.0
+average reward score: 4.98828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.49%) |Training time=0.48s (21.91%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.50
+[2023-04-14 12:10:09,877] [INFO] [logging.py:96:log_dist] [Rank 0] step=5510, skipped=70, lr=[2.5751226631232703e-06, 2.5751226631232703e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:10:09,895] [INFO] [timer.py:199:stop] epoch=0/micro_step=5510/global_step=5510, RunningAvgSamplesPerSec=105.33779970588965, CurrSamplesPerSec=105.1696580948784, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:10:09,987] [INFO] [logging.py:96:log_dist] [Rank 0] step=5510, skipped=90, lr=[1.3513239025516312e-06, 1.3513239025516312e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5509|ppo_ep: 1|act_loss: 0.01238250732421875|cri_loss: 0.00702667236328125|unsuper_loss: 0.0
+average reward score: 4.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.58%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5510|ppo_ep: 1|act_loss: -0.0090484619140625|cri_loss: -0.0040740966796875|unsuper_loss: 0.0
+average reward score: 4.44140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.84%) |Training time=0.46s (21.50%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5511|ppo_ep: 1|act_loss: 0.001495361328125|cri_loss: 0.00122833251953125|unsuper_loss: 0.0
+average reward score: 4.609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.76%) |Training time=0.46s (19.92%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5512|ppo_ep: 1|act_loss: -0.004703521728515625|cri_loss: -0.0022373199462890625|unsuper_loss: 0.0
+average reward score: 5.6484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.91%) |Training time=0.46s (21.46%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5513|ppo_ep: 1|act_loss: -0.0171966552734375|cri_loss: -0.00771331787109375|unsuper_loss: 0.0
+average reward score: 5.3359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.56%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5514|ppo_ep: 1|act_loss: -0.035400390625|cri_loss: -0.017181396484375|unsuper_loss: 0.0
+average reward score: 4.69921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.63%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5515|ppo_ep: 1|act_loss: -0.051422119140625|cri_loss: -0.0250396728515625|unsuper_loss: 0.0
+average reward score: 5.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.80%) |Training time=0.47s (21.57%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5516|ppo_ep: 1|act_loss: -0.009002685546875|cri_loss: -0.00354766845703125|unsuper_loss: 0.0
+average reward score: 4.46484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.46s (21.53%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5517|ppo_ep: 1|act_loss: -0.044677734375|cri_loss: -0.021240234375|unsuper_loss: 0.0
+average reward score: 4.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.46s (21.54%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5518|ppo_ep: 1|act_loss: 0.028076171875|cri_loss: 0.01453399658203125|unsuper_loss: 0.0
+average reward score: 5.515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.70%) |Training time=0.47s (21.66%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50
+[2023-04-14 12:10:31,647] [INFO] [logging.py:96:log_dist] [Rank 0] step=5520, skipped=70, lr=[2.5587063297250976e-06, 2.5587063297250976e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:10:31,665] [INFO] [timer.py:199:stop] epoch=0/micro_step=5520/global_step=5520, RunningAvgSamplesPerSec=105.33906190165855, CurrSamplesPerSec=109.92073812427265, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:10:31,758] [INFO] [logging.py:96:log_dist] [Rank 0] step=5520, skipped=90, lr=[1.342783598513529e-06, 1.342783598513529e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5519|ppo_ep: 1|act_loss: 0.0145111083984375|cri_loss: 0.00746917724609375|unsuper_loss: 0.0
+average reward score: 4.609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.42%) |Training time=0.45s (20.96%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5520|ppo_ep: 1|act_loss: 0.0042877197265625|cri_loss: 0.00226593017578125|unsuper_loss: 0.0
+average reward score: 4.6484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.67%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5521|ppo_ep: 1|act_loss: -0.011566162109375|cri_loss: -0.00533294677734375|unsuper_loss: 0.0
+average reward score: 5.86328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.74%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5522|ppo_ep: 1|act_loss: 0.009521484375|cri_loss: 0.004878997802734375|unsuper_loss: 0.0
+average reward score: 5.0078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.72%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5523|ppo_ep: 1|act_loss: 0.01145172119140625|cri_loss: 0.0062408447265625|unsuper_loss: 0.0
+average reward score: 4.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.58%) |Training time=0.45s (20.75%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5524|ppo_ep: 1|act_loss: -0.00016379356384277344|cri_loss: 0.00010895729064941406|unsuper_loss: 0.0
+average reward score: 5.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.18%) |Training time=0.46s (21.17%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5525|ppo_ep: 1|act_loss: -0.0079498291015625|cri_loss: -0.003627777099609375|unsuper_loss: 0.0
+average reward score: 4.75
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.96%) |Training time=0.52s (23.35%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.42 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5526|ppo_ep: 1|act_loss: -0.0076751708984375|cri_loss: -0.0037555694580078125|unsuper_loss: 0.0
+average reward score: 4.5859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.23%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5527|ppo_ep: 1|act_loss: 0.01256561279296875|cri_loss: 0.00673675537109375|unsuper_loss: 0.0
+average reward score: 5.83984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.33%) |Training time=0.45s (21.03%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5528|ppo_ep: 1|act_loss: 0.04022216796875|cri_loss: 0.024658203125|unsuper_loss: 0.0
+average reward score: 4.41015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.46s (21.35%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50
+[2023-04-14 12:10:53,298] [INFO] [logging.py:96:log_dist] [Rank 0] step=5530, skipped=70, lr=[2.5423235882662523e-06, 2.5423235882662523e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:10:53,316] [INFO] [timer.py:199:stop] epoch=0/micro_step=5530/global_step=5530, RunningAvgSamplesPerSec=105.33915297205107, CurrSamplesPerSec=106.70993440034474, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:10:53,409] [INFO] [logging.py:96:log_dist] [Rank 0] step=5530, skipped=90, lr=[1.3342604472141296e-06, 1.3342604472141296e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5529|ppo_ep: 1|act_loss: -0.0207672119140625|cri_loss: -0.00991058349609375|unsuper_loss: 0.0
+average reward score: 4.6953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.07%) |Training time=0.46s (21.33%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5530|ppo_ep: 1|act_loss: -0.0274200439453125|cri_loss: -0.01328277587890625|unsuper_loss: 0.0
+average reward score: 5.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.50%) |Training time=0.46s (20.08%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.08 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5531|ppo_ep: 1|act_loss: 0.009490966796875|cri_loss: 0.005069732666015625|unsuper_loss: 0.0
+average reward score: 4.8203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.11%) |Training time=0.46s (21.30%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5532|ppo_ep: 1|act_loss: -0.006946563720703125|cri_loss: -0.00295257568359375|unsuper_loss: 0.0
+average reward score: 5.43359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.44%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5533|ppo_ep: 1|act_loss: 0.03204345703125|cri_loss: 0.016845703125|unsuper_loss: 0.0
+average reward score: 5.41015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.84%) |Training time=0.46s (21.53%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5534|ppo_ep: 1|act_loss: -0.0038604736328125|cri_loss: -0.001781463623046875|unsuper_loss: 0.0
+average reward score: 5.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.29%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5535|ppo_ep: 1|act_loss: -0.01529693603515625|cri_loss: -0.0073699951171875|unsuper_loss: 0.0
+average reward score: 4.7265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.25%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5536|ppo_ep: 1|act_loss: -0.0262298583984375|cri_loss: -0.0125732421875|unsuper_loss: 0.0
+average reward score: 5.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.65%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5537|ppo_ep: 1|act_loss: 0.0080108642578125|cri_loss: 0.0042877197265625|unsuper_loss: 0.0
+average reward score: 5.44140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.93%) |Training time=0.46s (21.41%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5538|ppo_ep: 1|act_loss: -0.0191802978515625|cri_loss: -0.00933837890625|unsuper_loss: 0.0
+average reward score: 5.68359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.05%) |Training time=0.44s (20.28%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50
+[2023-04-14 12:11:14,996] [INFO] [logging.py:96:log_dist] [Rank 0] step=5540, skipped=70, lr=[2.5259746815784854e-06, 2.5259746815784854e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:11:15,014] [INFO] [timer.py:199:stop] epoch=0/micro_step=5540/global_step=5540, RunningAvgSamplesPerSec=105.344337416235, CurrSamplesPerSec=110.70370709457649, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:11:15,107] [INFO] [logging.py:96:log_dist] [Rank 0] step=5540, skipped=90, lr=[1.3257545749870973e-06, 1.3257545749870973e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5539|ppo_ep: 1|act_loss: -0.007534027099609375|cri_loss: -0.00356292724609375|unsuper_loss: 0.0
+average reward score: 5.08984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.41%) |Training time=0.45s (20.96%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5540|ppo_ep: 1|act_loss: 0.03045654296875|cri_loss: 0.0161895751953125|unsuper_loss: 0.0
+average reward score: 5.3828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.76%) |Training time=0.46s (19.92%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.77 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5541|ppo_ep: 1|act_loss: -0.0088043212890625|cri_loss: -0.003543853759765625|unsuper_loss: 0.0
+average reward score: 4.87109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.03%) |Training time=0.46s (21.32%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5542|ppo_ep: 1|act_loss: -0.0079803466796875|cri_loss: -0.003627777099609375|unsuper_loss: 0.0
+average reward score: 5.8046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.84%) |Training time=0.46s (21.54%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5543|ppo_ep: 1|act_loss: 0.0159912109375|cri_loss: 0.0084991455078125|unsuper_loss: 0.0
+average reward score: 5.6328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.46s (21.53%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5544|ppo_ep: 1|act_loss: -0.01409912109375|cri_loss: -0.00643157958984375|unsuper_loss: 0.0
+average reward score: 5.3203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.85%) |Training time=0.46s (21.51%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5545|ppo_ep: 1|act_loss: 0.0128326416015625|cri_loss: 0.00830841064453125|unsuper_loss: 0.0
+average reward score: 5.33984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.47s (21.56%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5546|ppo_ep: 1|act_loss: 0.002899169921875|cri_loss: 0.0016632080078125|unsuper_loss: 0.0
+average reward score: 5.765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.83%) |Training time=0.49s (22.54%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5547|ppo_ep: 1|act_loss: 0.00019216537475585938|cri_loss: 0.00024962425231933594|unsuper_loss: 0.0
+average reward score: 4.4296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.49%) |Training time=0.47s (21.87%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5548|ppo_ep: 1|act_loss: -0.00588226318359375|cri_loss: -0.002727508544921875|unsuper_loss: 0.0
+average reward score: 5.62890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.99%) |Training time=0.48s (22.40%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50
+[2023-04-14 12:11:36,745] [INFO] [logging.py:96:log_dist] [Rank 0] step=5550, skipped=70, lr=[2.5096598519920344e-06, 2.5096598519920344e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:11:36,763] [INFO] [timer.py:199:stop] epoch=0/micro_step=5550/global_step=5550, RunningAvgSamplesPerSec=105.34174265021153, CurrSamplesPerSec=101.51943477330195, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:11:36,856] [INFO] [logging.py:96:log_dist] [Rank 0] step=5550, skipped=90, lr=[1.3172661079099752e-06, 1.3172661079099752e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5549|ppo_ep: 1|act_loss: 0.0240631103515625|cri_loss: 0.01264190673828125|unsuper_loss: 0.0
+average reward score: 6.34765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.21%) |Training time=0.48s (22.16%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5550|ppo_ep: 1|act_loss: 0.01739501953125|cri_loss: 0.00885772705078125|unsuper_loss: 0.0
+average reward score: 4.58203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.33%) |Training time=0.48s (22.03%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5551|ppo_ep: 1|act_loss: 0.00698089599609375|cri_loss: 0.00400543212890625|unsuper_loss: 0.0
+average reward score: 5.34375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.08%) |Training time=0.48s (22.30%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5552|ppo_ep: 1|act_loss: 0.049072265625|cri_loss: 0.025238037109375|unsuper_loss: 0.0
+average reward score: 4.9140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.47s (22.03%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5553|ppo_ep: 1|act_loss: -0.00720977783203125|cri_loss: -0.00344085693359375|unsuper_loss: 0.0
+average reward score: 5.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.43%) |Training time=0.45s (20.93%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5554|ppo_ep: 1|act_loss: 0.0136260986328125|cri_loss: 0.007171630859375|unsuper_loss: 0.0
+average reward score: 4.88671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.45s (21.09%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5555|ppo_ep: 1|act_loss: -0.016937255859375|cri_loss: -0.0080108642578125|unsuper_loss: 0.0
+average reward score: 5.74609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.92%) |Training time=0.46s (19.76%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5556|ppo_ep: 1|act_loss: 0.0487060546875|cri_loss: 0.025634765625|unsuper_loss: 0.0
+average reward score: 5.3828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.86%) |Training time=0.47s (21.51%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5557|ppo_ep: 1|act_loss: -0.00664520263671875|cri_loss: -0.002841949462890625|unsuper_loss: 0.0
+average reward score: 4.62109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.21%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5558|ppo_ep: 1|act_loss: -0.01214599609375|cri_loss: -0.005767822265625|unsuper_loss: 0.0
+average reward score: 5.19921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.71%) |Training time=0.47s (21.69%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.50
+[2023-04-14 12:11:58,543] [INFO] [logging.py:96:log_dist] [Rank 0] step=5560, skipped=70, lr=[2.4933793413320327e-06, 2.4933793413320327e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:11:58,561] [INFO] [timer.py:199:stop] epoch=0/micro_step=5560/global_step=5560, RunningAvgSamplesPerSec=105.34322717762855, CurrSamplesPerSec=109.43845430039579, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:11:58,654] [INFO] [logging.py:96:log_dist] [Rank 0] step=5560, skipped=90, lr=[1.308795171802324e-06, 1.308795171802324e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5559|ppo_ep: 1|act_loss: -0.021575927734375|cri_loss: -0.0096282958984375|unsuper_loss: 0.0
+average reward score: 5.1015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.71%) |Training time=0.46s (20.74%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5560|ppo_ep: 1|act_loss: 0.004123687744140625|cri_loss: 0.0024566650390625|unsuper_loss: 0.0
+average reward score: 5.58984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.41%) |Training time=0.45s (20.14%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.19 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5561|ppo_ep: 1|act_loss: 0.0013027191162109375|cri_loss: 0.0007872581481933594|unsuper_loss: 0.0
+average reward score: 5.375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.25%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5562|ppo_ep: 1|act_loss: -0.004364013671875|cri_loss: -0.001819610595703125|unsuper_loss: 0.0
+average reward score: 5.73046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.05%) |Training time=0.46s (21.30%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5563|ppo_ep: 1|act_loss: -0.0013980865478515625|cri_loss: -0.0005540847778320312|unsuper_loss: 0.0
+average reward score: 5.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.07%) |Training time=0.46s (21.29%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5564|ppo_ep: 1|act_loss: -0.0019588470458984375|cri_loss: -0.0007309913635253906|unsuper_loss: 0.0
+average reward score: 5.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.39%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5565|ppo_ep: 1|act_loss: 0.0224609375|cri_loss: 0.01166534423828125|unsuper_loss: 0.0
+average reward score: 5.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.46s (21.37%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5566|ppo_ep: 1|act_loss: -0.00531768798828125|cri_loss: -0.0025119781494140625|unsuper_loss: 0.0
+average reward score: 5.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.11%) |Training time=0.46s (21.25%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5567|ppo_ep: 1|act_loss: 0.0183563232421875|cri_loss: 0.00948333740234375|unsuper_loss: 0.0
+average reward score: 5.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.83%) |Training time=0.47s (21.54%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5568|ppo_ep: 1|act_loss: 0.04541015625|cri_loss: 0.02337646484375|unsuper_loss: 0.0
+average reward score: 6.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.15%) |Training time=0.46s (21.19%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.50
+[2023-04-14 12:12:20,280] [INFO] [logging.py:96:log_dist] [Rank 0] step=5570, skipped=70, lr=[2.477133390914919e-06, 2.477133390914919e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:12:20,299] [INFO] [timer.py:199:stop] epoch=0/micro_step=5570/global_step=5570, RunningAvgSamplesPerSec=105.34633939663574, CurrSamplesPerSec=104.92982122851744, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:12:20,391] [INFO] [logging.py:96:log_dist] [Rank 0] step=5570, skipped=90, lr=[1.300341892223852e-06, 1.300341892223852e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5569|ppo_ep: 1|act_loss: -0.00798797607421875|cri_loss: -0.0038700103759765625|unsuper_loss: 0.0
+average reward score: 5.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.82%) |Training time=0.47s (21.59%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5570|ppo_ep: 1|act_loss: -0.003910064697265625|cri_loss: -0.0016078948974609375|unsuper_loss: 0.0
+average reward score: 5.0
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.53%) |Training time=0.47s (20.16%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5571|ppo_ep: 1|act_loss: -0.00772857666015625|cri_loss: -0.0037689208984375|unsuper_loss: 0.0
+average reward score: 4.7265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.73%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5572|ppo_ep: 1|act_loss: 0.011016845703125|cri_loss: 0.005828857421875|unsuper_loss: 0.0
+average reward score: 5.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.83%) |Training time=0.46s (21.54%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5573|ppo_ep: 1|act_loss: -0.02130126953125|cri_loss: -0.0103607177734375|unsuper_loss: 0.0
+average reward score: 5.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.73%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5574|ppo_ep: 1|act_loss: -0.018402099609375|cri_loss: -0.00804901123046875|unsuper_loss: 0.0
+average reward score: 5.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.68%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5575|ppo_ep: 1|act_loss: 0.001041412353515625|cri_loss: 0.0006079673767089844|unsuper_loss: 0.0
+average reward score: 5.65625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.68%) |Training time=0.47s (21.71%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5576|ppo_ep: 1|act_loss: 0.0243072509765625|cri_loss: 0.0126800537109375|unsuper_loss: 0.0
+average reward score: 5.375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.86%) |Training time=0.46s (21.46%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50
+[2023-04-14 12:12:37,824] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 5577|ppo_ep: 1|act_loss: 0.058837890625|cri_loss: 0.03009033203125|unsuper_loss: 0.0
+average reward score: 5.046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.16%) |Training time=0.47s (21.69%) |Others=0.09 (4.15%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.50
+[2023-04-14 12:12:39,972] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 5578|ppo_ep: 1|act_loss: 0.04931640625|cri_loss: 0.025543212890625|unsuper_loss: 0.0
+average reward score: 4.96484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.24%) |Training time=0.46s (21.61%) |Others=0.09 (4.15%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.50
+[2023-04-14 12:12:42,017] [INFO] [logging.py:96:log_dist] [Rank 0] step=5580, skipped=70, lr=[2.4609222415448723e-06, 2.4609222415448723e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:12:42,036] [INFO] [timer.py:199:stop] epoch=0/micro_step=5580/global_step=5580, RunningAvgSamplesPerSec=105.34619836932745, CurrSamplesPerSec=107.6787841445882, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:12:42,128] [INFO] [logging.py:96:log_dist] [Rank 0] step=5580, skipped=92, lr=[1.2935920654711779e-06, 1.2935920654711779e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5579|ppo_ep: 1|act_loss: 0.010833740234375|cri_loss: 0.00600433349609375|unsuper_loss: 0.0
+average reward score: 5.09375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.07%) |Training time=0.46s (21.31%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5580|ppo_ep: 1|act_loss: 0.005214691162109375|cri_loss: 0.002765655517578125|unsuper_loss: 0.0
+average reward score: 5.43359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.80%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5581|ppo_ep: 1|act_loss: 0.001560211181640625|cri_loss: 0.0010395050048828125|unsuper_loss: 0.0
+average reward score: 5.1640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.46s (21.36%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5582|ppo_ep: 1|act_loss: -0.055084228515625|cri_loss: -0.026885986328125|unsuper_loss: 0.0
+average reward score: 5.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.21%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5583|ppo_ep: 1|act_loss: -0.031036376953125|cri_loss: -0.0148162841796875|unsuper_loss: 0.0
+average reward score: 6.046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.46%) |Training time=0.48s (21.92%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5584|ppo_ep: 1|act_loss: 0.0030517578125|cri_loss: 0.0017175674438476562|unsuper_loss: 0.0
+average reward score: 5.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.46s (21.37%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5585|ppo_ep: 1|act_loss: -0.010528564453125|cri_loss: -0.00510406494140625|unsuper_loss: 0.0
+average reward score: 5.515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.04%) |Training time=0.46s (19.65%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.77 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5586|ppo_ep: 1|act_loss: -0.0234375|cri_loss: -0.01132965087890625|unsuper_loss: 0.0
+average reward score: 4.91796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.62%) |Training time=0.46s (21.11%) |Others=0.11 (5.27%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5587|ppo_ep: 1|act_loss: 0.04998779296875|cri_loss: 0.0256805419921875|unsuper_loss: 0.0
+average reward score: 5.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.37%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5588|ppo_ep: 1|act_loss: 0.00350189208984375|cri_loss: 0.0020236968994140625|unsuper_loss: 0.0
+average reward score: 5.4921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.21%) |Training time=0.46s (21.12%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.50
+[2023-04-14 12:13:03,886] [INFO] [logging.py:96:log_dist] [Rank 0] step=5590, skipped=70, lr=[2.4447461335102328e-06, 2.4447461335102328e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:13:03,904] [INFO] [timer.py:199:stop] epoch=0/micro_step=5590/global_step=5590, RunningAvgSamplesPerSec=105.3472014072174, CurrSamplesPerSec=99.85687682696464, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:13:03,996] [INFO] [logging.py:96:log_dist] [Rank 0] step=5590, skipped=92, lr=[1.2851708832191219e-06, 1.2851708832191219e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5589|ppo_ep: 1|act_loss: -0.00678253173828125|cri_loss: -0.00315093994140625|unsuper_loss: 0.0
+average reward score: 5.7421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.11%) |Training time=0.50s (22.40%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.35 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5590|ppo_ep: 1|act_loss: 0.05072021484375|cri_loss: 0.02679443359375|unsuper_loss: 0.0
+average reward score: 5.62890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.83%) |Training time=0.47s (21.55%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5591|ppo_ep: 1|act_loss: -0.0042266845703125|cri_loss: -0.0016841888427734375|unsuper_loss: 0.0
+average reward score: 5.609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.46s (21.35%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5592|ppo_ep: 1|act_loss: 0.0079345703125|cri_loss: 0.0050506591796875|unsuper_loss: 0.0
+average reward score: 5.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.24%) |Training time=0.45s (21.08%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5593|ppo_ep: 1|act_loss: -0.07049560546875|cri_loss: -0.034515380859375|unsuper_loss: 0.0
+average reward score: 5.80859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.98%) |Training time=0.49s (22.46%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5594|ppo_ep: 1|act_loss: 0.0115966796875|cri_loss: 0.00616455078125|unsuper_loss: 0.0
+average reward score: 5.9453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.26%) |Training time=0.48s (22.13%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5595|ppo_ep: 1|act_loss: -0.01904296875|cri_loss: -0.0092620849609375|unsuper_loss: 0.0
+average reward score: 5.703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.34%) |Training time=0.48s (22.07%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5596|ppo_ep: 1|act_loss: -0.025482177734375|cri_loss: -0.012420654296875|unsuper_loss: 0.0
+average reward score: 5.8203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.34%) |Training time=0.48s (22.06%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5597|ppo_ep: 1|act_loss: -0.030914306640625|cri_loss: -0.01505279541015625|unsuper_loss: 0.0
+average reward score: 5.28125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.10%) |Training time=0.49s (22.30%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5598|ppo_ep: 1|act_loss: -0.008148193359375|cri_loss: -0.0037078857421875|unsuper_loss: 0.0
+average reward score: 4.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.19%) |Training time=0.48s (22.19%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.50
+[2023-04-14 12:13:25,592] [INFO] [logging.py:96:log_dist] [Rank 0] step=5600, skipped=70, lr=[2.4286053065799456e-06, 2.4286053065799456e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:13:25,611] [INFO] [timer.py:199:stop] epoch=0/micro_step=5600/global_step=5600, RunningAvgSamplesPerSec=105.3410928350194, CurrSamplesPerSec=100.1417826315008, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:13:25,703] [INFO] [logging.py:96:log_dist] [Rank 0] step=5600, skipped=92, lr=[1.2767677076651913e-06, 1.2767677076651913e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5599|ppo_ep: 1|act_loss: -0.0034942626953125|cri_loss: -0.0016145706176757812|unsuper_loss: 0.0
+average reward score: 5.06640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.27%) |Training time=0.48s (22.07%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5600|ppo_ep: 1|act_loss: 0.0264892578125|cri_loss: 0.01355743408203125|unsuper_loss: 0.0
+average reward score: 5.984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.56%) |Training time=0.50s (21.20%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.55 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5601|ppo_ep: 1|act_loss: 0.016510009765625|cri_loss: 0.0084381103515625|unsuper_loss: 0.0
+average reward score: 4.64453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.85%) |Training time=0.47s (21.54%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5602|ppo_ep: 1|act_loss: -0.00408935546875|cri_loss: -0.0019178390502929688|unsuper_loss: 0.0
+average reward score: 6.00390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.55%) |Training time=0.47s (21.84%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5603|ppo_ep: 1|act_loss: -0.001995086669921875|cri_loss: -0.000728607177734375|unsuper_loss: 0.0
+average reward score: 6.02734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.06%) |Training time=0.46s (21.30%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5604|ppo_ep: 1|act_loss: -0.00925445556640625|cri_loss: -0.00435638427734375|unsuper_loss: 0.0
+average reward score: 4.9609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.78%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5605|ppo_ep: 1|act_loss: 0.007480621337890625|cri_loss: 0.004024505615234375|unsuper_loss: 0.0
+average reward score: 4.8984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.91%) |Training time=0.46s (21.46%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5606|ppo_ep: 1|act_loss: -0.0097808837890625|cri_loss: -0.00438690185546875|unsuper_loss: 0.0
+average reward score: 4.73046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.58%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5607|ppo_ep: 1|act_loss: -0.001796722412109375|cri_loss: -0.0005803108215332031|unsuper_loss: 0.0
+average reward score: 5.6640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.65%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5608|ppo_ep: 1|act_loss: 0.003429412841796875|cri_loss: 0.0020198822021484375|unsuper_loss: 0.0
+average reward score: 5.40234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.96%) |Training time=0.46s (21.42%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50
+[2023-04-14 12:13:47,414] [INFO] [logging.py:96:log_dist] [Rank 0] step=5610, skipped=70, lr=[2.4125000000000015e-06, 2.4125000000000015e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:13:47,432] [INFO] [timer.py:199:stop] epoch=0/micro_step=5610/global_step=5610, RunningAvgSamplesPerSec=105.33799128956171, CurrSamplesPerSec=100.43267746437243, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:13:47,524] [INFO] [logging.py:96:log_dist] [Rank 0] step=5610, skipped=92, lr=[1.2683826633647206e-06, 1.2683826633647206e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5609|ppo_ep: 1|act_loss: 0.01202392578125|cri_loss: 0.006450653076171875|unsuper_loss: 0.0
+average reward score: 5.16796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.14%) |Training time=0.48s (22.26%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5610|ppo_ep: 1|act_loss: -0.012664794921875|cri_loss: -0.006195068359375|unsuper_loss: 0.0
+average reward score: 5.33984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.48s (22.37%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5611|ppo_ep: 1|act_loss: 0.043304443359375|cri_loss: 0.0238037109375|unsuper_loss: 0.0
+average reward score: 5.3359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.47s (21.91%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5612|ppo_ep: 1|act_loss: 0.00029969215393066406|cri_loss: 0.00031495094299316406|unsuper_loss: 0.0
+average reward score: 5.3359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.31%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5613|ppo_ep: 1|act_loss: 0.019256591796875|cri_loss: 0.0101470947265625|unsuper_loss: 0.0
+average reward score: 4.8125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.24%) |Training time=0.48s (22.13%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5614|ppo_ep: 1|act_loss: 0.002948760986328125|cri_loss: 0.0017547607421875|unsuper_loss: 0.0
+average reward score: 4.76171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.64%) |Training time=0.48s (21.77%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5615|ppo_ep: 1|act_loss: 0.0222320556640625|cri_loss: 0.01169586181640625|unsuper_loss: 0.0
+average reward score: 5.1328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.10%) |Training time=0.48s (22.19%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5616|ppo_ep: 1|act_loss: -0.0018444061279296875|cri_loss: -0.0008955001831054688|unsuper_loss: 0.0
+average reward score: 4.73828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.64%) |Training time=0.49s (21.07%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5617|ppo_ep: 1|act_loss: 0.031494140625|cri_loss: 0.0165557861328125|unsuper_loss: 0.0
+average reward score: 5.58203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.10%) |Training time=0.48s (22.24%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5618|ppo_ep: 1|act_loss: 0.0479736328125|cri_loss: 0.0246734619140625|unsuper_loss: 0.0
+average reward score: 5.19140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.79%) |Training time=0.47s (21.61%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.50
+[2023-04-14 12:14:09,378] [INFO] [logging.py:96:log_dist] [Rank 0] step=5620, skipped=70, lr=[2.396430452489896e-06, 2.396430452489896e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:14:09,396] [INFO] [timer.py:199:stop] epoch=0/micro_step=5620/global_step=5620, RunningAvgSamplesPerSec=105.32878407607137, CurrSamplesPerSec=98.12636157931868, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:14:09,489] [INFO] [logging.py:96:log_dist] [Rank 0] step=5620, skipped=92, lr=[1.260015874604294e-06, 1.260015874604294e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5619|ppo_ep: 1|act_loss: 0.005878448486328125|cri_loss: 0.00319671630859375|unsuper_loss: 0.0
+average reward score: 4.60546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.68s (74.07%) |Training time=0.49s (21.54%) |Others=0.10 (4.39%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5620|ppo_ep: 1|act_loss: -0.0250244140625|cri_loss: -0.01186370849609375|unsuper_loss: 0.0
+average reward score: 5.42578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.57%) |Training time=0.47s (21.76%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5621|ppo_ep: 1|act_loss: 0.03326416015625|cri_loss: 0.0173797607421875|unsuper_loss: 0.0
+average reward score: 5.63671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.69%) |Training time=0.46s (21.65%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5622|ppo_ep: 1|act_loss: -0.02783203125|cri_loss: -0.0135955810546875|unsuper_loss: 0.0
+average reward score: 5.21484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.47s (21.99%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5623|ppo_ep: 1|act_loss: -0.0118560791015625|cri_loss: -0.005462646484375|unsuper_loss: 0.0
+average reward score: 5.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.40%) |Training time=0.47s (21.95%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5624|ppo_ep: 1|act_loss: 0.025970458984375|cri_loss: 0.0137176513671875|unsuper_loss: 0.0
+average reward score: 5.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.47s (21.99%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5625|ppo_ep: 1|act_loss: 0.0224609375|cri_loss: 0.01157379150390625|unsuper_loss: 0.0
+average reward score: 5.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.50%) |Training time=0.47s (21.84%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5626|ppo_ep: 1|act_loss: 0.015960693359375|cri_loss: 0.00885009765625|unsuper_loss: 0.0
+average reward score: 5.40234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.47s (21.98%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5627|ppo_ep: 1|act_loss: -0.0035419464111328125|cri_loss: -0.0016145706176757812|unsuper_loss: 0.0
+average reward score: 5.19921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.47s (22.00%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5628|ppo_ep: 1|act_loss: -0.01557159423828125|cri_loss: -0.00714874267578125|unsuper_loss: 0.0
+average reward score: 4.58984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.89%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.50
+[2023-04-14 12:14:30,869] [INFO] [logging.py:96:log_dist] [Rank 0] step=5630, skipped=70, lr=[2.3803969022390895e-06, 2.3803969022390895e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:14:30,888] [INFO] [timer.py:199:stop] epoch=0/micro_step=5630/global_step=5630, RunningAvgSamplesPerSec=105.32614334516194, CurrSamplesPerSec=103.03302993013564, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:14:30,980] [INFO] [logging.py:96:log_dist] [Rank 0] step=5630, skipped=92, lr=[1.251667465399905e-06, 1.251667465399905e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5629|ppo_ep: 1|act_loss: 0.0026702880859375|cri_loss: 0.0017681121826171875|unsuper_loss: 0.0
+average reward score: 4.81640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.47s (21.87%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5630|ppo_ep: 1|act_loss: -0.008331298828125|cri_loss: -0.003772735595703125|unsuper_loss: 0.0
+average reward score: 4.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.17%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5631|ppo_ep: 1|act_loss: -0.002307891845703125|cri_loss: -4.9591064453125e-05|unsuper_loss: 0.0
+average reward score: 5.015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.30%) |Training time=0.45s (21.06%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5632|ppo_ep: 1|act_loss: 0.0172119140625|cri_loss: 0.0088653564453125|unsuper_loss: 0.0
+average reward score: 5.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.95%) |Training time=0.49s (20.77%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5633|ppo_ep: 1|act_loss: 0.006427764892578125|cri_loss: 0.003536224365234375|unsuper_loss: 0.0
+average reward score: 5.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.45s (21.09%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5634|ppo_ep: 1|act_loss: -0.0005903244018554688|cri_loss: -9.5367431640625e-06|unsuper_loss: 0.0
+average reward score: 5.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.08%) |Training time=0.46s (21.30%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5635|ppo_ep: 1|act_loss: 0.02325439453125|cri_loss: 0.0118408203125|unsuper_loss: 0.0
+average reward score: 4.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.26%) |Training time=0.45s (21.09%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5636|ppo_ep: 1|act_loss: 0.0030612945556640625|cri_loss: 0.00257110595703125|unsuper_loss: 0.0
+average reward score: 6.2109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.45s (21.10%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5637|ppo_ep: 1|act_loss: -0.00508880615234375|cri_loss: -0.002368927001953125|unsuper_loss: 0.0
+average reward score: 5.79296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.51%) |Training time=0.45s (20.83%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5638|ppo_ep: 1|act_loss: -0.00368499755859375|cri_loss: -0.0016355514526367188|unsuper_loss: 0.0
+average reward score: 5.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.28%) |Training time=0.45s (21.07%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50
+[2023-04-14 12:14:52,594] [INFO] [logging.py:96:log_dist] [Rank 0] step=5640, skipped=70, lr=[2.3643995869034777e-06, 2.3643995869034777e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:14:52,612] [INFO] [timer.py:199:stop] epoch=0/micro_step=5640/global_step=5640, RunningAvgSamplesPerSec=105.33151498596196, CurrSamplesPerSec=109.83789021262527, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:14:52,705] [INFO] [logging.py:96:log_dist] [Rank 0] step=5640, skipped=92, lr=[1.2433375594951166e-06, 1.2433375594951166e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5639|ppo_ep: 1|act_loss: 0.0013294219970703125|cri_loss: 0.0008649826049804688|unsuper_loss: 0.0
+average reward score: 5.73828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.45s (21.09%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5640|ppo_ep: 1|act_loss: -0.02569580078125|cri_loss: -0.009002685546875|unsuper_loss: 0.0
+average reward score: 5.10546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (20.96%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5641|ppo_ep: 1|act_loss: -0.0119476318359375|cri_loss: -0.005680084228515625|unsuper_loss: 0.0
+average reward score: 4.98046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.24%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5642|ppo_ep: 1|act_loss: 0.0367431640625|cri_loss: 0.018951416015625|unsuper_loss: 0.0
+average reward score: 5.23828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.01%) |Training time=0.47s (21.40%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5643|ppo_ep: 1|act_loss: 0.0321044921875|cri_loss: 0.016815185546875|unsuper_loss: 0.0
+average reward score: 5.34375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.33%) |Training time=0.45s (21.01%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5644|ppo_ep: 1|act_loss: -0.0030670166015625|cri_loss: -0.0014181137084960938|unsuper_loss: 0.0
+average reward score: 5.51171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.82%) |Training time=0.47s (21.55%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5645|ppo_ep: 1|act_loss: -0.01788330078125|cri_loss: -0.0072021484375|unsuper_loss: 0.0
+average reward score: 5.17578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.25%) |Training time=0.46s (21.13%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5646|ppo_ep: 1|act_loss: -0.0009388923645019531|cri_loss: -6.29425048828125e-05|unsuper_loss: 0.0
+average reward score: 5.125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.44%) |Training time=0.45s (20.91%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5647|ppo_ep: 1|act_loss: 0.03594970703125|cri_loss: 0.0187530517578125|unsuper_loss: 0.0
+average reward score: 6.3203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.12%) |Training time=0.49s (20.65%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.54 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5648|ppo_ep: 1|act_loss: 0.00289154052734375|cri_loss: 0.001529693603515625|unsuper_loss: 0.0
+average reward score: 5.6953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.63s (71.66%) |Training time=0.48s (21.32%) |Others=0.16 (7.02%)|CurSamplesPerSec=14.08 |AvgSamplesPerSec=14.50
+[2023-04-14 12:15:14,519] [INFO] [logging.py:96:log_dist] [Rank 0] step=5650, skipped=70, lr=[2.3484387436018617e-06, 2.3484387436018617e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:15:14,537] [INFO] [timer.py:199:stop] epoch=0/micro_step=5650/global_step=5650, RunningAvgSamplesPerSec=105.33311141352057, CurrSamplesPerSec=107.29409808518922, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:15:14,630] [INFO] [logging.py:96:log_dist] [Rank 0] step=5650, skipped=92, lr=[1.2350262803592295e-06, 1.2350262803592295e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5649|ppo_ep: 1|act_loss: -0.032562255859375|cri_loss: -0.01555633544921875|unsuper_loss: 0.0
+average reward score: 5.26171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.04%) |Training time=0.46s (21.32%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5650|ppo_ep: 1|act_loss: -0.030517578125|cri_loss: -0.01496124267578125|unsuper_loss: 0.0
+average reward score: 5.50390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.09%) |Training time=0.46s (21.27%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5651|ppo_ep: 1|act_loss: 0.00797271728515625|cri_loss: 0.004436492919921875|unsuper_loss: 0.0
+average reward score: 5.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.99%) |Training time=0.46s (21.37%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5652|ppo_ep: 1|act_loss: -0.0179290771484375|cri_loss: -0.0073394775390625|unsuper_loss: 0.0
+average reward score: 5.73046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.13%) |Training time=0.45s (21.04%) |Others=0.10 (4.83%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5653|ppo_ep: 1|act_loss: 0.01305389404296875|cri_loss: 0.006961822509765625|unsuper_loss: 0.0
+average reward score: 5.44921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.24%) |Training time=0.45s (21.12%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5654|ppo_ep: 1|act_loss: 0.024688720703125|cri_loss: 0.01264190673828125|unsuper_loss: 0.0
+average reward score: 5.8359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.96%) |Training time=0.46s (21.40%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5655|ppo_ep: 1|act_loss: -0.01490020751953125|cri_loss: -0.00708770751953125|unsuper_loss: 0.0
+average reward score: 5.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.30%) |Training time=0.45s (21.05%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5656|ppo_ep: 1|act_loss: -0.00908660888671875|cri_loss: -0.00432586669921875|unsuper_loss: 0.0
+average reward score: 5.39453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.31%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5657|ppo_ep: 1|act_loss: 0.010772705078125|cri_loss: 0.005657196044921875|unsuper_loss: 0.0
+average reward score: 5.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.13%) |Training time=0.46s (21.24%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5658|ppo_ep: 1|act_loss: 0.020751953125|cri_loss: 0.0118560791015625|unsuper_loss: 0.0
+average reward score: 5.19140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.10%) |Training time=0.46s (21.24%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50
+[2023-04-14 12:15:36,069] [INFO] [logging.py:96:log_dist] [Rank 0] step=5660, skipped=70, lr=[2.3325146089124427e-06, 2.3325146089124427e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:15:36,087] [INFO] [timer.py:199:stop] epoch=0/micro_step=5660/global_step=5660, RunningAvgSamplesPerSec=105.33806343563634, CurrSamplesPerSec=105.81956599209691, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:15:36,180] [INFO] [logging.py:96:log_dist] [Rank 0] step=5660, skipped=92, lr=[1.2267337511854502e-06, 1.2267337511854502e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5659|ppo_ep: 1|act_loss: -0.008026123046875|cri_loss: -0.00384521484375|unsuper_loss: 0.0
+average reward score: 5.99609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.89%) |Training time=0.47s (21.51%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5660|ppo_ep: 1|act_loss: 0.0150604248046875|cri_loss: 0.00789642333984375|unsuper_loss: 0.0
+average reward score: 5.44140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.25%) |Training time=0.49s (22.18%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5661|ppo_ep: 1|act_loss: -0.02191162109375|cri_loss: -0.01050567626953125|unsuper_loss: 0.0
+average reward score: 5.234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.17%) |Training time=0.47s (20.79%) |Others=0.18 (8.04%)|CurSamplesPerSec=14.24 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5662|ppo_ep: 1|act_loss: 0.005733489990234375|cri_loss: 0.003093719482421875|unsuper_loss: 0.0
+average reward score: 5.703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.67s (74.59%) |Training time=0.47s (20.92%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.27 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5663|ppo_ep: 1|act_loss: -0.0006504058837890625|cri_loss: 3.337860107421875e-06|unsuper_loss: 0.0
+average reward score: 5.6796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.18%) |Training time=0.46s (21.18%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5664|ppo_ep: 1|act_loss: 0.0027523040771484375|cri_loss: 0.002132415771484375|unsuper_loss: 0.0
+average reward score: 4.9453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.08%) |Training time=0.46s (21.28%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5665|ppo_ep: 1|act_loss: -0.008880615234375|cri_loss: -0.00409698486328125|unsuper_loss: 0.0
+average reward score: 4.65625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.45s (20.78%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5666|ppo_ep: 1|act_loss: 0.028533935546875|cri_loss: 0.0145721435546875|unsuper_loss: 0.0
+average reward score: 4.81640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.72%) |Training time=0.47s (21.67%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5667|ppo_ep: 1|act_loss: -0.037109375|cri_loss: -0.0174560546875|unsuper_loss: 0.0
+average reward score: 5.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.36%) |Training time=0.45s (20.98%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5668|ppo_ep: 1|act_loss: -0.022705078125|cri_loss: -0.0110626220703125|unsuper_loss: 0.0
+average reward score: 4.6484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.90%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.50
+[2023-04-14 12:15:57,800] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+[2023-04-14 12:15:57,801] [INFO] [logging.py:96:log_dist] [Rank 0] step=5670, skipped=71, lr=[2.3182144686488735e-06, 2.3182144686488735e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:15:57,801] [INFO] [timer.py:199:stop] epoch=0/micro_step=5670/global_step=5670, RunningAvgSamplesPerSec=105.34351177301964, CurrSamplesPerSec=121.80373494216448, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:15:57,893] [INFO] [logging.py:96:log_dist] [Rank 0] step=5670, skipped=92, lr=[1.218460094889067e-06, 1.218460094889067e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5669|ppo_ep: 1|act_loss: 0.00289154052734375|cri_loss: 0.0015668869018554688|unsuper_loss: 0.0
+average reward score: 5.41796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.29%) |Training time=0.42s (20.05%) |Others=0.10 (4.67%)|CurSamplesPerSec=15.10 |AvgSamplesPerSec=14.50
+[2023-04-14 12:15:59,951] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 5670|ppo_ep: 1|act_loss: 0.0088653564453125|cri_loss: 0.00457000732421875|unsuper_loss: 0.0
+average reward score: 5.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.04%) |Training time=0.44s (20.34%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5671|ppo_ep: 1|act_loss: -0.0054931640625|cri_loss: -0.0025272369384765625|unsuper_loss: 0.0
+average reward score: 5.4921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.36%) |Training time=0.45s (20.71%) |Others=0.11 (4.93%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5672|ppo_ep: 1|act_loss: 0.0093536376953125|cri_loss: 0.004886627197265625|unsuper_loss: 0.0
+average reward score: 5.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.23%) |Training time=0.45s (21.10%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5673|ppo_ep: 1|act_loss: 0.04095458984375|cri_loss: 0.021514892578125|unsuper_loss: 0.0
+average reward score: 4.921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.99%) |Training time=0.46s (21.37%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5674|ppo_ep: 1|act_loss: 0.0182952880859375|cri_loss: 0.0092620849609375|unsuper_loss: 0.0
+average reward score: 4.65625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.45s (21.14%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5675|ppo_ep: 1|act_loss: 0.003265380859375|cri_loss: 0.002109527587890625|unsuper_loss: 0.0
+average reward score: 5.7421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.59%) |Training time=0.45s (20.73%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5676|ppo_ep: 1|act_loss: 0.009246826171875|cri_loss: 0.005527496337890625|unsuper_loss: 0.0
+average reward score: 5.3828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.31%) |Training time=0.45s (21.02%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5677|ppo_ep: 1|act_loss: -0.01214599609375|cri_loss: -0.005889892578125|unsuper_loss: 0.0
+average reward score: 5.75
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.90%) |Training time=0.46s (19.81%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5678|ppo_ep: 1|act_loss: -0.0177154541015625|cri_loss: -0.00844573974609375|unsuper_loss: 0.0
+average reward score: 5.3125
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.15%) |Training time=0.47s (20.44%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.06 |AvgSamplesPerSec=14.50
+[2023-04-14 12:16:19,632] [INFO] [logging.py:96:log_dist] [Rank 0] step=5680, skipped=72, lr=[2.3039444252455474e-06, 2.3039444252455474e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:16:19,650] [INFO] [timer.py:199:stop] epoch=0/micro_step=5680/global_step=5680, RunningAvgSamplesPerSec=105.3509397411824, CurrSamplesPerSec=106.0162415650417, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:16:19,734] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+[2023-04-14 12:16:19,734] [INFO] [logging.py:96:log_dist] [Rank 0] step=5680, skipped=93, lr=[1.2110300418965807e-06, 1.2110300418965807e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5679|ppo_ep: 1|act_loss: 0.0119781494140625|cri_loss: 0.00623321533203125|unsuper_loss: 0.0
+average reward score: 5.38671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.19%) |Training time=0.46s (21.67%) |Others=0.09 (4.14%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.50
+[2023-04-14 12:16:21,885] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 5680|ppo_ep: 1|act_loss: 0.0156097412109375|cri_loss: 0.0085296630859375|unsuper_loss: 0.0
+average reward score: 4.7421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.47s (21.98%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5681|ppo_ep: 1|act_loss: -0.026214599609375|cri_loss: -0.0122528076171875|unsuper_loss: 0.0
+average reward score: 5.03125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.42%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5682|ppo_ep: 1|act_loss: 0.037353515625|cri_loss: 0.019256591796875|unsuper_loss: 0.0
+average reward score: 4.78515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.77%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5683|ppo_ep: 1|act_loss: 0.008056640625|cri_loss: 0.0044708251953125|unsuper_loss: 0.0
+average reward score: 5.86328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.61%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5684|ppo_ep: 1|act_loss: -0.031463623046875|cri_loss: -0.013458251953125|unsuper_loss: 0.0
+average reward score: 5.88671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.46s (21.60%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50
+epoch: 0|step: 5685|ppo_ep: 1|act_loss: 0.0073089599609375|cri_loss: 0.004638671875|unsuper_loss: 0.0
+average reward score: 5.59375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.86%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5686|ppo_ep: 1|act_loss: 0.00563812255859375|cri_loss: 0.0030384063720703125|unsuper_loss: 0.0
+average reward score: 5.37890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.73%) |Training time=0.47s (21.61%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5687|ppo_ep: 1|act_loss: -0.034942626953125|cri_loss: -0.0169219970703125|unsuper_loss: 0.0
+average reward score: 5.71875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.84%) |Training time=0.46s (21.51%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5688|ppo_ep: 1|act_loss: -0.0142364501953125|cri_loss: -0.0067901611328125|unsuper_loss: 0.0
+average reward score: 5.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.67%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.51
+[2023-04-14 12:16:41,180] [INFO] [logging.py:96:log_dist] [Rank 0] step=5690, skipped=72, lr=[2.2881243286214536e-06, 2.2881243286214536e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:16:41,198] [INFO] [timer.py:199:stop] epoch=0/micro_step=5690/global_step=5690, RunningAvgSamplesPerSec=105.35023067784104, CurrSamplesPerSec=105.09620121933077, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:16:41,291] [INFO] [logging.py:96:log_dist] [Rank 0] step=5690, skipped=94, lr=[1.2036154644795697e-06, 1.2036154644795697e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5689|ppo_ep: 1|act_loss: 0.03704833984375|cri_loss: 0.019378662109375|unsuper_loss: 0.0
+average reward score: 5.3671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.67%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5690|ppo_ep: 1|act_loss: 0.0243682861328125|cri_loss: 0.0134124755859375|unsuper_loss: 0.0
+average reward score: 5.9921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.08%) |Training time=0.46s (21.25%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5691|ppo_ep: 1|act_loss: 0.035430908203125|cri_loss: 0.0182647705078125|unsuper_loss: 0.0
+average reward score: 4.703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.64%) |Training time=0.46s (20.66%) |Others=0.17 (7.69%)|CurSamplesPerSec=14.34 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5692|ppo_ep: 1|act_loss: 0.0447998046875|cri_loss: 0.0230255126953125|unsuper_loss: 0.0
+average reward score: 5.12109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.67s (74.77%) |Training time=0.46s (20.75%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.32 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5693|ppo_ep: 1|act_loss: 0.035491943359375|cri_loss: 0.0187835693359375|unsuper_loss: 0.0
+average reward score: 5.203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.11%) |Training time=0.46s (21.24%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5694|ppo_ep: 1|act_loss: 0.0137176513671875|cri_loss: 0.00716400146484375|unsuper_loss: 0.0
+average reward score: 5.3671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.24%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5695|ppo_ep: 1|act_loss: -0.01076507568359375|cri_loss: -0.005306243896484375|unsuper_loss: 0.0
+average reward score: 5.6875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.00%) |Training time=0.46s (21.36%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5696|ppo_ep: 1|act_loss: -0.0136566162109375|cri_loss: -0.005901336669921875|unsuper_loss: 0.0
+average reward score: 5.2734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.22%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5697|ppo_ep: 1|act_loss: -0.0341796875|cri_loss: -0.016815185546875|unsuper_loss: 0.0
+average reward score: 5.82421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.56%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5698|ppo_ep: 1|act_loss: 0.04217529296875|cri_loss: 0.0222625732421875|unsuper_loss: 0.0
+average reward score: 5.328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.11%) |Training time=0.46s (21.26%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.51
+[2023-04-14 12:17:02,892] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
+[2023-04-14 12:17:02,893] [INFO] [logging.py:96:log_dist] [Rank 0] step=5700, skipped=73, lr=[2.273918385226065e-06, 2.273918385226065e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:17:02,893] [INFO] [timer.py:199:stop] epoch=0/micro_step=5700/global_step=5700, RunningAvgSamplesPerSec=105.35557904627348, CurrSamplesPerSec=118.39787723532484, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:17:02,985] [INFO] [logging.py:96:log_dist] [Rank 0] step=5700, skipped=94, lr=[1.1953953037610977e-06, 1.1953953037610977e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5699|ppo_ep: 1|act_loss: 0.0312042236328125|cri_loss: 0.0164794921875|unsuper_loss: 0.0
+average reward score: 5.484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.00%) |Training time=0.43s (20.32%) |Others=0.10 (4.68%)|CurSamplesPerSec=15.05 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5700|ppo_ep: 1|act_loss: 0.010406494140625|cri_loss: 0.005855560302734375|unsuper_loss: 0.0
+average reward score: 5.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.34%) |Training time=0.45s (21.01%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5701|ppo_ep: 1|act_loss: -0.01861572265625|cri_loss: -0.009185791015625|unsuper_loss: 0.0
+average reward score: 5.77734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.50%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5702|ppo_ep: 1|act_loss: 0.02288818359375|cri_loss: 0.011749267578125|unsuper_loss: 0.0
+average reward score: 5.31640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.02%) |Training time=0.46s (21.35%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5703|ppo_ep: 1|act_loss: 0.0144500732421875|cri_loss: 0.00774383544921875|unsuper_loss: 0.0
+average reward score: 5.7265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.17%) |Training time=0.46s (21.17%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5704|ppo_ep: 1|act_loss: 0.020233154296875|cri_loss: 0.01152801513671875|unsuper_loss: 0.0
+average reward score: 4.99609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.25%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5705|ppo_ep: 1|act_loss: -0.01038360595703125|cri_loss: -0.00502777099609375|unsuper_loss: 0.0
+average reward score: 5.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.02%) |Training time=0.44s (20.31%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5706|ppo_ep: 1|act_loss: 0.0258941650390625|cri_loss: 0.01401519775390625|unsuper_loss: 0.0
+average reward score: 5.7265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.03%) |Training time=0.44s (20.32%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5707|ppo_ep: 1|act_loss: -0.054718017578125|cri_loss: -0.023895263671875|unsuper_loss: 0.0
+average reward score: 5.21484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.80s (76.29%) |Training time=0.46s (19.46%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.60 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5708|ppo_ep: 1|act_loss: 0.0155029296875|cri_loss: 0.0079498291015625|unsuper_loss: 0.0
+average reward score: 5.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.96%) |Training time=0.44s (19.61%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.20 |AvgSamplesPerSec=14.51
+[2023-04-14 12:17:24,721] [INFO] [logging.py:96:log_dist] [Rank 0] step=5710, skipped=73, lr=[2.2581699336049657e-06, 2.2581699336049657e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:17:24,740] [INFO] [timer.py:199:stop] epoch=0/micro_step=5710/global_step=5710, RunningAvgSamplesPerSec=105.36524562432051, CurrSamplesPerSec=114.486349178187, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:17:24,832] [INFO] [logging.py:96:log_dist] [Rank 0] step=5710, skipped=94, lr=[1.1871944804314127e-06, 1.1871944804314127e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5709|ppo_ep: 1|act_loss: 0.01461029052734375|cri_loss: 0.0086212158203125|unsuper_loss: 0.0
+average reward score: 5.1796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.84%) |Training time=0.44s (20.54%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5710|ppo_ep: 1|act_loss: -0.0137786865234375|cri_loss: -0.006793975830078125|unsuper_loss: 0.0
+average reward score: 5.49609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.76%) |Training time=0.44s (20.57%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5711|ppo_ep: 1|act_loss: 0.0221099853515625|cri_loss: 0.01142120361328125|unsuper_loss: 0.0
+average reward score: 4.609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.69%) |Training time=0.45s (20.65%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5712|ppo_ep: 1|act_loss: 0.03350830078125|cri_loss: 0.0171661376953125|unsuper_loss: 0.0
+average reward score: 5.203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.39%) |Training time=0.43s (19.97%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5713|ppo_ep: 1|act_loss: -0.00296783447265625|cri_loss: -0.0011854171752929688|unsuper_loss: 0.0
+average reward score: 5.49609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.69%) |Training time=0.44s (20.65%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5714|ppo_ep: 1|act_loss: 0.0247650146484375|cri_loss: 0.01328277587890625|unsuper_loss: 0.0
+average reward score: 6.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.41%) |Training time=0.45s (20.95%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5715|ppo_ep: 1|act_loss: 0.015167236328125|cri_loss: 0.00841522216796875|unsuper_loss: 0.0
+average reward score: 5.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.73%) |Training time=0.44s (20.64%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5716|ppo_ep: 1|act_loss: 0.0017757415771484375|cri_loss: 0.001026153564453125|unsuper_loss: 0.0
+average reward score: 4.88671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.55%) |Training time=0.45s (20.81%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5717|ppo_ep: 1|act_loss: -0.029449462890625|cri_loss: -0.01311492919921875|unsuper_loss: 0.0
+average reward score: 5.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.44%) |Training time=0.45s (20.79%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5718|ppo_ep: 1|act_loss: 0.0399169921875|cri_loss: 0.022247314453125|unsuper_loss: 0.0
+average reward score: 5.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.64%) |Training time=0.45s (20.72%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.51
+[2023-04-14 12:17:46,248] [INFO] [logging.py:96:log_dist] [Rank 0] step=5720, skipped=73, lr=[2.242459528597593e-06, 2.242459528597593e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:17:46,267] [INFO] [timer.py:199:stop] epoch=0/micro_step=5720/global_step=5720, RunningAvgSamplesPerSec=105.37837842461052, CurrSamplesPerSec=112.51268157248816, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:17:46,359] [INFO] [logging.py:96:log_dist] [Rank 0] step=5720, skipped=94, lr=[1.1790131160464974e-06, 1.1790131160464974e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5719|ppo_ep: 1|act_loss: 0.00589752197265625|cri_loss: 0.0037384033203125|unsuper_loss: 0.0
+average reward score: 4.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.59%) |Training time=0.45s (20.78%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5720|ppo_ep: 1|act_loss: 0.00400543212890625|cri_loss: 0.002696990966796875|unsuper_loss: 0.0
+average reward score: 5.6640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.57%) |Training time=0.45s (20.78%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5721|ppo_ep: 1|act_loss: 0.0252227783203125|cri_loss: 0.0133514404296875|unsuper_loss: 0.0
+average reward score: 4.71875
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.61s (71.63%) |Training time=0.49s (21.60%) |Others=0.15 (6.77%)|CurSamplesPerSec=14.23 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5722|ppo_ep: 1|act_loss: 0.032135009765625|cri_loss: 0.0175628662109375|unsuper_loss: 0.0
+average reward score: 5.40625
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.63s (72.89%) |Training time=0.49s (21.99%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.28 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5723|ppo_ep: 1|act_loss: -0.041290283203125|cri_loss: -0.020355224609375|unsuper_loss: 0.0
+average reward score: 5.2734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.11%) |Training time=0.49s (22.33%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5724|ppo_ep: 1|act_loss: -0.04010009765625|cri_loss: -0.01953125|unsuper_loss: 0.0
+average reward score: 5.6015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.35%) |Training time=0.48s (22.04%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5725|ppo_ep: 1|act_loss: 0.017364501953125|cri_loss: 0.0089874267578125|unsuper_loss: 0.0
+average reward score: 5.3125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.02%) |Training time=0.47s (21.32%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5726|ppo_ep: 1|act_loss: -0.039947509765625|cri_loss: -0.0191802978515625|unsuper_loss: 0.0
+average reward score: 5.37109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.38%) |Training time=0.41s (18.92%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5727|ppo_ep: 1|act_loss: 0.016815185546875|cri_loss: 0.008697509765625|unsuper_loss: 0.0
+average reward score: 4.88671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.92%) |Training time=0.47s (21.41%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5728|ppo_ep: 1|act_loss: 0.04681396484375|cri_loss: 0.0238494873046875|unsuper_loss: 0.0
+average reward score: 4.28515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.19%) |Training time=0.46s (21.19%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.51
+[2023-04-14 12:18:08,163] [INFO] [logging.py:96:log_dist] [Rank 0] step=5730, skipped=73, lr=[2.226787403070044e-06, 2.226787403070044e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:18:08,181] [INFO] [timer.py:199:stop] epoch=0/micro_step=5730/global_step=5730, RunningAvgSamplesPerSec=105.37804869000095, CurrSamplesPerSec=104.9343332311753, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:18:08,274] [INFO] [logging.py:96:log_dist] [Rank 0] step=5730, skipped=94, lr=[1.1708513318739096e-06, 1.1708513318739096e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5729|ppo_ep: 1|act_loss: 0.0394287109375|cri_loss: 0.020660400390625|unsuper_loss: 0.0
+average reward score: 5.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.88%) |Training time=0.47s (21.50%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5730|ppo_ep: 1|act_loss: 0.024932861328125|cri_loss: 0.0137481689453125|unsuper_loss: 0.0
+average reward score: 5.29296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.24%) |Training time=0.46s (21.13%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5731|ppo_ep: 1|act_loss: 0.0161590576171875|cri_loss: 0.009124755859375|unsuper_loss: 0.0
+average reward score: 5.16796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.85%) |Training time=0.45s (20.51%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5732|ppo_ep: 1|act_loss: 0.04718017578125|cri_loss: 0.02490234375|unsuper_loss: 0.0
+average reward score: 4.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.42%) |Training time=0.46s (20.95%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5733|ppo_ep: 1|act_loss: 0.00678253173828125|cri_loss: 0.0037174224853515625|unsuper_loss: 0.0
+average reward score: 4.76171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.53%) |Training time=0.48s (21.84%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5734|ppo_ep: 1|act_loss: -0.0018100738525390625|cri_loss: -0.0007863044738769531|unsuper_loss: 0.0
+average reward score: 4.953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.69%) |Training time=0.47s (21.61%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5735|ppo_ep: 1|act_loss: -0.0214691162109375|cri_loss: -0.0105133056640625|unsuper_loss: 0.0
+average reward score: 4.328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.48%) |Training time=0.47s (21.51%) |Others=0.11 (5.01%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5736|ppo_ep: 1|act_loss: -0.0106353759765625|cri_loss: -0.00485992431640625|unsuper_loss: 0.0
+average reward score: 5.16015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.73s (73.44%) |Training time=0.52s (21.94%) |Others=0.11 (4.61%)|CurSamplesPerSec=13.59 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5737|ppo_ep: 1|act_loss: -0.01264190673828125|cri_loss: -0.00566864013671875|unsuper_loss: 0.0
+average reward score: 5.34375
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.30%) |Training time=0.48s (21.40%) |Others=0.12 (5.29%)|CurSamplesPerSec=14.27 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5738|ppo_ep: 1|act_loss: -0.037322998046875|cri_loss: -0.017547607421875|unsuper_loss: 0.0
+average reward score: 5.81640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.42%) |Training time=0.48s (21.97%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.51
+[2023-04-14 12:18:30,185] [INFO] [logging.py:96:log_dist] [Rank 0] step=5740, skipped=73, lr=[2.2111537893210277e-06, 2.2111537893210277e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:18:30,203] [INFO] [timer.py:199:stop] epoch=0/micro_step=5740/global_step=5740, RunningAvgSamplesPerSec=105.37475219460445, CurrSamplesPerSec=103.98614734176941, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:18:30,295] [INFO] [logging.py:96:log_dist] [Rank 0] step=5740, skipped=94, lr=[1.1627092488909802e-06, 1.1627092488909802e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5739|ppo_ep: 1|act_loss: -0.016815185546875|cri_loss: -0.0080108642578125|unsuper_loss: 0.0
+average reward score: 5.65625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.65%) |Training time=0.47s (21.74%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5740|ppo_ep: 1|act_loss: 0.002567291259765625|cri_loss: 0.0016937255859375|unsuper_loss: 0.0
+average reward score: 5.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.89%) |Training time=0.47s (21.48%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5741|ppo_ep: 1|act_loss: -0.0176544189453125|cri_loss: -0.00852203369140625|unsuper_loss: 0.0
+average reward score: 4.921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.56%) |Training time=0.48s (21.83%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5742|ppo_ep: 1|act_loss: -0.019744873046875|cri_loss: -0.0095367431640625|unsuper_loss: 0.0
+average reward score: 5.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.05%) |Training time=0.46s (21.30%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5743|ppo_ep: 1|act_loss: -0.022613525390625|cri_loss: -0.01056671142578125|unsuper_loss: 0.0
+average reward score: 4.96484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.02%) |Training time=0.49s (22.35%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5744|ppo_ep: 1|act_loss: -0.00809478759765625|cri_loss: -0.003993988037109375|unsuper_loss: 0.0
+average reward score: 6.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.23%) |Training time=0.46s (21.17%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5745|ppo_ep: 1|act_loss: -0.0010538101196289062|cri_loss: 0.00023937225341796875|unsuper_loss: 0.0
+average reward score: 4.92578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.23%) |Training time=0.48s (22.17%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5746|ppo_ep: 1|act_loss: 0.052093505859375|cri_loss: 0.0269927978515625|unsuper_loss: 0.0
+average reward score: 5.56640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.48s (22.28%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5747|ppo_ep: 1|act_loss: 0.027069091796875|cri_loss: 0.01427459716796875|unsuper_loss: 0.0
+average reward score: 5.3046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.22%) |Training time=0.48s (22.12%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5748|ppo_ep: 1|act_loss: 0.0090789794921875|cri_loss: 0.0047760009765625|unsuper_loss: 0.0
+average reward score: 4.8984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.98%) |Training time=0.49s (22.41%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.51
+[2023-04-14 12:18:51,928] [INFO] [logging.py:96:log_dist] [Rank 0] step=5750, skipped=73, lr=[2.195558919078415e-06, 2.195558919078415e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:18:51,947] [INFO] [timer.py:199:stop] epoch=0/micro_step=5750/global_step=5750, RunningAvgSamplesPerSec=105.36822216331952, CurrSamplesPerSec=100.24642125720193, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:18:52,039] [INFO] [logging.py:96:log_dist] [Rank 0] step=5750, skipped=94, lr=[1.1545869877830212e-06, 1.1545869877830212e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5749|ppo_ep: 1|act_loss: -0.055419921875|cri_loss: -0.0255584716796875|unsuper_loss: 0.0
+average reward score: 5.57421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.21%) |Training time=0.48s (22.20%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5750|ppo_ep: 1|act_loss: -0.03875732421875|cri_loss: -0.01885986328125|unsuper_loss: 0.0
+average reward score: 4.62109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.93%) |Training time=0.49s (22.47%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5751|ppo_ep: 1|act_loss: -0.0103912353515625|cri_loss: -0.00496673583984375|unsuper_loss: 0.0
+average reward score: 5.18359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.31%) |Training time=0.48s (20.42%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.59 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5752|ppo_ep: 1|act_loss: 0.0010557174682617188|cri_loss: 0.0008831024169921875|unsuper_loss: 0.0
+average reward score: 5.09765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.32%) |Training time=0.48s (22.06%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5753|ppo_ep: 1|act_loss: -0.0428466796875|cri_loss: -0.0207977294921875|unsuper_loss: 0.0
+average reward score: 5.03515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.48s (22.29%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5754|ppo_ep: 1|act_loss: 0.028411865234375|cri_loss: 0.0145263671875|unsuper_loss: 0.0
+average reward score: 5.21484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.36%) |Training time=0.45s (21.00%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5755|ppo_ep: 1|act_loss: 0.01409149169921875|cri_loss: 0.007289886474609375|unsuper_loss: 0.0
+average reward score: 5.76171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.47%) |Training time=0.45s (20.89%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5756|ppo_ep: 1|act_loss: 0.01885986328125|cri_loss: 0.00994110107421875|unsuper_loss: 0.0
+average reward score: 4.9296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.62%) |Training time=0.45s (20.76%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5757|ppo_ep: 1|act_loss: 0.014007568359375|cri_loss: 0.0072174072265625|unsuper_loss: 0.0
+average reward score: 5.28515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.83%) |Training time=0.44s (20.51%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5758|ppo_ep: 1|act_loss: 0.00698089599609375|cri_loss: 0.003719329833984375|unsuper_loss: 0.0
+average reward score: 5.10546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.45%) |Training time=0.46s (20.94%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.51
+[2023-04-14 12:19:13,815] [INFO] [logging.py:96:log_dist] [Rank 0] step=5760, skipped=73, lr=[2.180003023495804e-06, 2.180003023495804e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:19:13,834] [INFO] [timer.py:199:stop] epoch=0/micro_step=5760/global_step=5760, RunningAvgSamplesPerSec=105.36992402704047, CurrSamplesPerSec=113.88001075862748, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:19:13,926] [INFO] [logging.py:96:log_dist] [Rank 0] step=5760, skipped=94, lr=[1.1464846689415345e-06, 1.1464846689415345e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5759|ppo_ep: 1|act_loss: 0.004638671875|cri_loss: 0.0025043487548828125|unsuper_loss: 0.0
+average reward score: 5.1015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.87%) |Training time=0.44s (20.50%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5760|ppo_ep: 1|act_loss: -0.00904083251953125|cri_loss: -0.004444122314453125|unsuper_loss: 0.0
+average reward score: 5.09375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.69%) |Training time=0.45s (20.68%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5761|ppo_ep: 1|act_loss: -0.0150146484375|cri_loss: -0.0073089599609375|unsuper_loss: 0.0
+average reward score: 5.109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.66%) |Training time=0.45s (20.71%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5762|ppo_ep: 1|act_loss: -0.001277923583984375|cri_loss: -2.956390380859375e-05|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.22%) |Training time=0.46s (21.17%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5763|ppo_ep: 1|act_loss: -0.00362396240234375|cri_loss: -0.001667022705078125|unsuper_loss: 0.0
+average reward score: 5.1875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.90%) |Training time=0.44s (20.44%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5764|ppo_ep: 1|act_loss: -0.0035037994384765625|cri_loss: -0.0008087158203125|unsuper_loss: 0.0
+average reward score: 5.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.45%) |Training time=0.45s (20.91%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5765|ppo_ep: 1|act_loss: 0.0462646484375|cri_loss: 0.0257720947265625|unsuper_loss: 0.0
+average reward score: 4.9765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.23%) |Training time=0.46s (21.15%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5766|ppo_ep: 1|act_loss: 0.02264404296875|cri_loss: 0.011871337890625|unsuper_loss: 0.0
+average reward score: 5.42578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.77s (74.90%) |Training time=0.49s (20.81%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.55 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5767|ppo_ep: 1|act_loss: 0.0171661376953125|cri_loss: 0.00885772705078125|unsuper_loss: 0.0
+average reward score: 5.6171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.51%) |Training time=0.45s (19.95%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.06 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5768|ppo_ep: 1|act_loss: 0.019622802734375|cri_loss: 0.01026153564453125|unsuper_loss: 0.0
+average reward score: 5.71484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.16%) |Training time=0.46s (21.23%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.51
+[2023-04-14 12:19:35,783] [INFO] [logging.py:96:log_dist] [Rank 0] step=5770, skipped=73, lr=[2.164486333149091e-06, 2.164486333149091e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:19:35,801] [INFO] [timer.py:199:stop] epoch=0/micro_step=5770/global_step=5770, RunningAvgSamplesPerSec=105.3766256111742, CurrSamplesPerSec=108.46236360154154, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:19:35,894] [INFO] [logging.py:96:log_dist] [Rank 0] step=5770, skipped=94, lr=[1.1384024124624324e-06, 1.1384024124624324e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5769|ppo_ep: 1|act_loss: -0.0010747909545898438|cri_loss: -0.0003705024719238281|unsuper_loss: 0.0
+average reward score: 5.41796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.33%) |Training time=0.46s (21.08%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5770|ppo_ep: 1|act_loss: 0.00479888916015625|cri_loss: 0.0035762786865234375|unsuper_loss: 0.0
+average reward score: 5.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.19%) |Training time=0.46s (21.21%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5771|ppo_ep: 1|act_loss: -0.00518798828125|cri_loss: -0.002086639404296875|unsuper_loss: 0.0
+average reward score: 5.69140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.56%) |Training time=0.45s (20.81%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5772|ppo_ep: 1|act_loss: 0.003818511962890625|cri_loss: 0.0019702911376953125|unsuper_loss: 0.0
+average reward score: 4.8828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.15%) |Training time=0.46s (21.24%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5773|ppo_ep: 1|act_loss: 0.005664825439453125|cri_loss: 0.00347137451171875|unsuper_loss: 0.0
+average reward score: 5.15234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.38%) |Training time=0.46s (21.01%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5774|ppo_ep: 1|act_loss: 0.019561767578125|cri_loss: 0.01019287109375|unsuper_loss: 0.0
+average reward score: 5.02734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.57%) |Training time=0.45s (20.81%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5775|ppo_ep: 1|act_loss: -0.0223236083984375|cri_loss: -0.01052093505859375|unsuper_loss: 0.0
+average reward score: 5.05859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.36%) |Training time=0.46s (21.02%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5776|ppo_ep: 1|act_loss: 0.0012111663818359375|cri_loss: 0.0007171630859375|unsuper_loss: 0.0
+average reward score: 5.18359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.46%) |Training time=0.45s (20.93%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5777|ppo_ep: 1|act_loss: -0.01100921630859375|cri_loss: -0.00514984130859375|unsuper_loss: 0.0
+average reward score: 5.515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.46s (21.23%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5778|ppo_ep: 1|act_loss: -0.02056884765625|cri_loss: -0.00988006591796875|unsuper_loss: 0.0
+average reward score: 4.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.46%) |Training time=0.45s (20.92%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51
+[2023-04-14 12:19:57,471] [INFO] [logging.py:96:log_dist] [Rank 0] step=5780, skipped=73, lr=[2.1490090780330607e-06, 2.1490090780330607e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:19:57,489] [INFO] [timer.py:199:stop] epoch=0/micro_step=5780/global_step=5780, RunningAvgSamplesPerSec=105.38259097764607, CurrSamplesPerSec=107.7067809395413, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:19:57,582] [INFO] [logging.py:96:log_dist] [Rank 0] step=5780, skipped=94, lr=[1.1303403381442527e-06, 1.1303403381442527e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5779|ppo_ep: 1|act_loss: -0.00388336181640625|cri_loss: -0.0016489028930664062|unsuper_loss: 0.0
+average reward score: 5.30859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.27%) |Training time=0.46s (21.14%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5780|ppo_ep: 1|act_loss: -0.0297088623046875|cri_loss: -0.01397705078125|unsuper_loss: 0.0
+average reward score: 5.09375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.71%) |Training time=0.45s (20.70%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.51
+[2023-04-14 12:20:01,926] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 5781|ppo_ep: 1|act_loss: -0.035430908203125|cri_loss: -0.0171051025390625|unsuper_loss: 0.0
+average reward score: 4.765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.08%) |Training time=0.45s (20.76%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.51
+[2023-04-14 12:20:04,093] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 5782|ppo_ep: 1|act_loss: 0.004146575927734375|cri_loss: 0.0023784637451171875|unsuper_loss: 0.0
+average reward score: 4.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.77%) |Training time=0.46s (21.11%) |Others=0.09 (4.12%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51
+[2023-04-14 12:20:06,309] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192
+epoch: 0|step: 5783|ppo_ep: 1|act_loss: -0.0152130126953125|cri_loss: -0.007354736328125|unsuper_loss: 0.0
+average reward score: 5.9140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.79s (77.39%) |Training time=0.42s (18.28%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.85 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5784|ppo_ep: 1|act_loss: -0.04534912109375|cri_loss: -0.022186279296875|unsuper_loss: 0.0
+average reward score: 4.9296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.48%) |Training time=0.46s (20.93%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5785|ppo_ep: 1|act_loss: -0.004711151123046875|cri_loss: -0.0017976760864257812|unsuper_loss: 0.0
+average reward score: 4.58984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.73%) |Training time=0.45s (20.65%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5786|ppo_ep: 1|act_loss: -0.027191162109375|cri_loss: -0.0132293701171875|unsuper_loss: 0.0
+average reward score: 4.75
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.45%) |Training time=0.45s (20.94%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5787|ppo_ep: 1|act_loss: -0.0107574462890625|cri_loss: -0.005168914794921875|unsuper_loss: 0.0
+average reward score: 4.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.18%) |Training time=0.46s (21.21%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5788|ppo_ep: 1|act_loss: 0.024444580078125|cri_loss: 0.0126953125|unsuper_loss: 0.0
+average reward score: 4.84765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.28%) |Training time=0.46s (21.14%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.51
+[2023-04-14 12:20:19,343] [INFO] [logging.py:96:log_dist] [Rank 0] step=5790, skipped=74, lr=[2.1351134551705775e-06, 2.1351134551705775e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:20:19,361] [INFO] [timer.py:199:stop] epoch=0/micro_step=5790/global_step=5790, RunningAvgSamplesPerSec=105.3911187251698, CurrSamplesPerSec=108.43922913285121, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:20:19,453] [INFO] [logging.py:96:log_dist] [Rank 0] step=5790, skipped=96, lr=[1.12390529015926e-06, 1.12390529015926e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5789|ppo_ep: 1|act_loss: 0.0033512115478515625|cri_loss: 0.001781463623046875|unsuper_loss: 0.0
+average reward score: 4.64453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.37%) |Training time=0.46s (21.05%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5790|ppo_ep: 1|act_loss: -0.012176513671875|cri_loss: -0.0050506591796875|unsuper_loss: 0.0
+average reward score: 4.91796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.43%) |Training time=0.46s (21.00%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5791|ppo_ep: 1|act_loss: -0.0051422119140625|cri_loss: -0.002140045166015625|unsuper_loss: 0.0
+average reward score: 5.5703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.45%) |Training time=0.46s (20.93%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5792|ppo_ep: 1|act_loss: 0.0010499954223632812|cri_loss: 0.0006031990051269531|unsuper_loss: 0.0
+average reward score: 5.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.38%) |Training time=0.46s (21.03%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5793|ppo_ep: 1|act_loss: -0.00574493408203125|cri_loss: -0.00251007080078125|unsuper_loss: 0.0
+average reward score: 5.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.44%) |Training time=0.45s (20.96%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5794|ppo_ep: 1|act_loss: -0.0072784423828125|cri_loss: -0.0033245086669921875|unsuper_loss: 0.0
+average reward score: 5.765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.44%) |Training time=0.46s (20.96%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5795|ppo_ep: 1|act_loss: -0.0041961669921875|cri_loss: -0.001972198486328125|unsuper_loss: 0.0
+average reward score: 5.5234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.61%) |Training time=0.45s (20.77%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5796|ppo_ep: 1|act_loss: -0.0179443359375|cri_loss: -0.00862884521484375|unsuper_loss: 0.0
+average reward score: 5.3828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.65s (73.39%) |Training time=0.49s (21.64%) |Others=0.11 (4.97%)|CurSamplesPerSec=14.28 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5797|ppo_ep: 1|act_loss: 0.07708740234375|cri_loss: 0.03997802734375|unsuper_loss: 0.0
+average reward score: 4.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.30%) |Training time=0.44s (19.64%) |Others=0.16 (7.06%)|CurSamplesPerSec=14.45 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5798|ppo_ep: 1|act_loss: 0.0013828277587890625|cri_loss: 0.0012359619140625|unsuper_loss: 0.0
+average reward score: 5.9609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.45s (20.87%) |Others=0.11 (4.95%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.51
+[2023-04-14 12:20:41,175] [INFO] [logging.py:96:log_dist] [Rank 0] step=5800, skipped=74, lr=[2.1197117585322546e-06, 2.1197117585322546e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:20:41,193] [INFO] [timer.py:199:stop] epoch=0/micro_step=5800/global_step=5800, RunningAvgSamplesPerSec=105.39739484592792, CurrSamplesPerSec=109.96594783124505, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:20:41,286] [INFO] [logging.py:96:log_dist] [Rank 0] step=5800, skipped=96, lr=[1.1158798446670462e-06, 1.1158798446670462e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5799|ppo_ep: 1|act_loss: -0.0104522705078125|cri_loss: -0.004947662353515625|unsuper_loss: 0.0
+average reward score: 5.1328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.45s (21.12%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5800|ppo_ep: 1|act_loss: -0.02130126953125|cri_loss: -0.010406494140625|unsuper_loss: 0.0
+average reward score: 4.93359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.45s (21.09%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5801|ppo_ep: 1|act_loss: 0.005802154541015625|cri_loss: 0.0036106109619140625|unsuper_loss: 0.0
+average reward score: 4.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.24%) |Training time=0.45s (21.12%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5802|ppo_ep: 1|act_loss: -0.0009617805480957031|cri_loss: 2.384185791015625e-06|unsuper_loss: 0.0
+average reward score: 5.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.74%) |Training time=0.46s (21.61%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5803|ppo_ep: 1|act_loss: -0.0011854171752929688|cri_loss: -0.0003643035888671875|unsuper_loss: 0.0
+average reward score: 5.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.84%) |Training time=0.46s (21.51%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5804|ppo_ep: 1|act_loss: 0.0267486572265625|cri_loss: 0.01435089111328125|unsuper_loss: 0.0
+average reward score: 4.953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.64%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5805|ppo_ep: 1|act_loss: 0.0136566162109375|cri_loss: 0.00754547119140625|unsuper_loss: 0.0
+average reward score: 5.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.74%) |Training time=0.46s (21.62%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5806|ppo_ep: 1|act_loss: 0.0253448486328125|cri_loss: 0.01340484619140625|unsuper_loss: 0.0
+average reward score: 5.3203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.65%) |Training time=0.47s (21.70%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5807|ppo_ep: 1|act_loss: -0.0128173828125|cri_loss: -0.006130218505859375|unsuper_loss: 0.0
+average reward score: 5.78515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.90%) |Training time=0.46s (21.45%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5808|ppo_ep: 1|act_loss: -0.0037994384765625|cri_loss: -0.0012502670288085938|unsuper_loss: 0.0
+average reward score: 6.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.65%) |Training time=0.47s (21.74%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.51
+[2023-04-14 12:21:02,690] [INFO] [logging.py:96:log_dist] [Rank 0] step=5810, skipped=74, lr=[2.1043501607918214e-06, 2.1043501607918214e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:21:02,709] [INFO] [timer.py:199:stop] epoch=0/micro_step=5810/global_step=5810, RunningAvgSamplesPerSec=105.39882852218719, CurrSamplesPerSec=102.81196321932057, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:21:02,804] [INFO] [logging.py:96:log_dist] [Rank 0] step=5810, skipped=96, lr=[1.107874915174542e-06, 1.107874915174542e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5809|ppo_ep: 1|act_loss: -0.031494140625|cri_loss: -0.0153961181640625|unsuper_loss: 0.0
+average reward score: 5.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.47s (21.95%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5810|ppo_ep: 1|act_loss: -0.03619384765625|cri_loss: -0.0177154541015625|unsuper_loss: 0.0
+average reward score: 5.80078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.66%) |Training time=0.47s (21.68%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5811|ppo_ep: 1|act_loss: -0.04034423828125|cri_loss: -0.01953125|unsuper_loss: 0.0
+average reward score: 5.77734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.85%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5812|ppo_ep: 1|act_loss: 0.01007080078125|cri_loss: 0.005229949951171875|unsuper_loss: 0.0
+average reward score: 4.953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.59s (70.07%) |Training time=0.47s (20.86%) |Others=0.21 (9.07%)|CurSamplesPerSec=14.09 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5813|ppo_ep: 1|act_loss: -0.0201416015625|cri_loss: -0.0098114013671875|unsuper_loss: 0.0
+average reward score: 5.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.47s (21.97%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5814|ppo_ep: 1|act_loss: 0.033935546875|cri_loss: 0.018096923828125|unsuper_loss: 0.0
+average reward score: 5.203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.49%) |Training time=0.47s (21.87%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5815|ppo_ep: 1|act_loss: 0.007904052734375|cri_loss: 0.004215240478515625|unsuper_loss: 0.0
+average reward score: 5.05078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.91%) |Training time=0.46s (21.40%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5816|ppo_ep: 1|act_loss: 0.012664794921875|cri_loss: 0.006511688232421875|unsuper_loss: 0.0
+average reward score: 4.5234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.70%) |Training time=0.46s (21.63%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5817|ppo_ep: 1|act_loss: -0.00014269351959228516|cri_loss: 1.1444091796875e-05|unsuper_loss: 0.0
+average reward score: 5.42578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.82%) |Training time=0.46s (21.51%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5818|ppo_ep: 1|act_loss: 0.040313720703125|cri_loss: 0.0205841064453125|unsuper_loss: 0.0
+average reward score: 5.46484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.64%) |Training time=0.46s (21.68%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.51
+[2023-04-14 12:21:24,289] [INFO] [logging.py:96:log_dist] [Rank 0] step=5820, skipped=74, lr=[2.0890288896452154e-06, 2.0890288896452154e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:21:24,307] [INFO] [timer.py:199:stop] epoch=0/micro_step=5820/global_step=5820, RunningAvgSamplesPerSec=105.39866420062243, CurrSamplesPerSec=108.02475065836494, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:21:24,400] [INFO] [logging.py:96:log_dist] [Rank 0] step=5820, skipped=96, lr=[1.0998906203341133e-06, 1.0998906203341133e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5819|ppo_ep: 1|act_loss: 0.0126953125|cri_loss: 0.006683349609375|unsuper_loss: 0.0
+average reward score: 4.859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.89%) |Training time=0.46s (21.45%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5820|ppo_ep: 1|act_loss: 0.002716064453125|cri_loss: 0.0014963150024414062|unsuper_loss: 0.0
+average reward score: 5.1171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.59%) |Training time=0.47s (21.75%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5821|ppo_ep: 1|act_loss: -0.0369873046875|cri_loss: -0.0176239013671875|unsuper_loss: 0.0
+average reward score: 5.16796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.72%) |Training time=0.46s (21.58%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5822|ppo_ep: 1|act_loss: 0.00484466552734375|cri_loss: 0.00281524658203125|unsuper_loss: 0.0
+average reward score: 5.9140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.88%) |Training time=0.46s (21.42%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5823|ppo_ep: 1|act_loss: -0.02099609375|cri_loss: -0.009765625|unsuper_loss: 0.0
+average reward score: 4.99609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.56%) |Training time=0.47s (21.78%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5824|ppo_ep: 1|act_loss: -0.0311737060546875|cri_loss: -0.0151214599609375|unsuper_loss: 0.0
+average reward score: 5.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.57%) |Training time=0.47s (21.74%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5825|ppo_ep: 1|act_loss: 0.03155517578125|cri_loss: 0.016632080078125|unsuper_loss: 0.0
+average reward score: 4.74609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.67%) |Training time=0.47s (21.67%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5826|ppo_ep: 1|act_loss: -0.00661468505859375|cri_loss: -0.0029697418212890625|unsuper_loss: 0.0
+average reward score: 5.01953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.37%) |Training time=0.48s (21.21%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.00 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5827|ppo_ep: 1|act_loss: 0.04132080078125|cri_loss: 0.021881103515625|unsuper_loss: 0.0
+average reward score: 5.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.70%) |Training time=0.47s (21.68%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5828|ppo_ep: 1|act_loss: 0.0458984375|cri_loss: 0.02459716796875|unsuper_loss: 0.0
+average reward score: 5.20703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.93%) |Training time=0.49s (20.81%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.62 |AvgSamplesPerSec=14.51
+[2023-04-14 12:21:46,126] [INFO] [logging.py:96:log_dist] [Rank 0] step=5830, skipped=74, lr=[2.0737481721906358e-06, 2.0737481721906358e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:21:46,145] [INFO] [timer.py:199:stop] epoch=0/micro_step=5830/global_step=5830, RunningAvgSamplesPerSec=105.39656981851218, CurrSamplesPerSec=106.60348186749079, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:21:46,237] [INFO] [logging.py:96:log_dist] [Rank 0] step=5830, skipped=96, lr=[1.0919270784922725e-06, 1.0919270784922725e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5829|ppo_ep: 1|act_loss: -0.011474609375|cri_loss: -0.005504608154296875|unsuper_loss: 0.0
+average reward score: 5.28125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.43%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5830|ppo_ep: 1|act_loss: 0.00536346435546875|cri_loss: 0.0030269622802734375|unsuper_loss: 0.0
+average reward score: 5.890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.86%) |Training time=0.47s (21.53%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5831|ppo_ep: 1|act_loss: 0.018096923828125|cri_loss: 0.0093841552734375|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.46%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5832|ppo_ep: 1|act_loss: 0.015869140625|cri_loss: 0.0083160400390625|unsuper_loss: 0.0
+average reward score: 5.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.92%) |Training time=0.46s (21.45%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5833|ppo_ep: 1|act_loss: 0.09326171875|cri_loss: 0.048065185546875|unsuper_loss: 0.0
+average reward score: 5.65234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.73%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5834|ppo_ep: 1|act_loss: -0.003528594970703125|cri_loss: -0.0014944076538085938|unsuper_loss: 0.0
+average reward score: 4.24609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.46%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5835|ppo_ep: 1|act_loss: -0.00787353515625|cri_loss: -0.00383758544921875|unsuper_loss: 0.0
+average reward score: 5.515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.94%) |Training time=0.46s (21.44%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5836|ppo_ep: 1|act_loss: -0.001445770263671875|cri_loss: -0.0004544258117675781|unsuper_loss: 0.0
+average reward score: 5.04296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.88%) |Training time=0.47s (21.55%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5837|ppo_ep: 1|act_loss: -0.0098419189453125|cri_loss: -0.0033473968505859375|unsuper_loss: 0.0
+average reward score: 5.109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.48%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5838|ppo_ep: 1|act_loss: -0.0123443603515625|cri_loss: -0.006011962890625|unsuper_loss: 0.0
+average reward score: 6.16796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.85%) |Training time=0.46s (21.52%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.51
+[2023-04-14 12:22:07,741] [INFO] [logging.py:96:log_dist] [Rank 0] step=5840, skipped=74, lr=[2.05850823492518e-06, 2.05850823492518e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:22:07,759] [INFO] [timer.py:199:stop] epoch=0/micro_step=5840/global_step=5840, RunningAvgSamplesPerSec=105.39736005809708, CurrSamplesPerSec=107.77095193664996, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:22:07,851] [INFO] [logging.py:96:log_dist] [Rank 0] step=5840, skipped=96, lr=[1.0839844076879186e-06, 1.0839844076879186e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5839|ppo_ep: 1|act_loss: -0.055328369140625|cri_loss: -0.0242919921875|unsuper_loss: 0.0
+average reward score: 5.67578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.32%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5840|ppo_ep: 1|act_loss: -0.0052490234375|cri_loss: -0.0023136138916015625|unsuper_loss: 0.0
+average reward score: 5.1953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.82%) |Training time=0.47s (21.55%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5841|ppo_ep: 1|act_loss: 0.0347900390625|cri_loss: 0.018707275390625|unsuper_loss: 0.0
+average reward score: 5.20703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.13%) |Training time=0.46s (21.21%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5842|ppo_ep: 1|act_loss: -0.0010709762573242188|cri_loss: -0.0003044605255126953|unsuper_loss: 0.0
+average reward score: 5.765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.91%) |Training time=0.47s (21.47%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5843|ppo_ep: 1|act_loss: 0.002651214599609375|cri_loss: 0.0016536712646484375|unsuper_loss: 0.0
+average reward score: 4.9296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.11%) |Training time=0.49s (20.63%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.56 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5844|ppo_ep: 1|act_loss: 0.00412750244140625|cri_loss: 0.002384185791015625|unsuper_loss: 0.0
+average reward score: 5.02734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.01%) |Training time=0.44s (20.31%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5845|ppo_ep: 1|act_loss: 0.0032215118408203125|cri_loss: 0.001739501953125|unsuper_loss: 0.0
+average reward score: 5.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.67%) |Training time=0.45s (20.69%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5846|ppo_ep: 1|act_loss: -0.004993438720703125|cri_loss: -0.002056121826171875|unsuper_loss: 0.0
+average reward score: 5.01953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.83%) |Training time=0.44s (20.35%) |Others=0.10 (4.81%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5847|ppo_ep: 1|act_loss: 0.0643310546875|cri_loss: 0.03436279296875|unsuper_loss: 0.0
+average reward score: 6.1640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.05%) |Training time=0.43s (20.26%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5848|ppo_ep: 1|act_loss: -0.023712158203125|cri_loss: -0.0104217529296875|unsuper_loss: 0.0
+average reward score: 5.67578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.91%) |Training time=0.44s (20.43%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.51
+[2023-04-14 12:22:29,465] [INFO] [logging.py:96:log_dist] [Rank 0] step=5850, skipped=74, lr=[2.0433093037414786e-06, 2.0433093037414786e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:22:29,484] [INFO] [timer.py:199:stop] epoch=0/micro_step=5850/global_step=5850, RunningAvgSamplesPerSec=105.40682625158138, CurrSamplesPerSec=117.7084163556254, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:22:29,576] [INFO] [logging.py:96:log_dist] [Rank 0] step=5850, skipped=96, lr=[1.076062725650594e-06, 1.076062725650594e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5849|ppo_ep: 1|act_loss: -0.00316619873046875|cri_loss: -0.0006608963012695312|unsuper_loss: 0.0
+average reward score: 5.28125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.03%) |Training time=0.43s (20.31%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5850|ppo_ep: 1|act_loss: -0.00543975830078125|cri_loss: -0.00247955322265625|unsuper_loss: 0.0
+average reward score: 5.6640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.09%) |Training time=0.43s (20.23%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5851|ppo_ep: 1|act_loss: -0.03326416015625|cri_loss: -0.016357421875|unsuper_loss: 0.0
+average reward score: 5.7265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.98%) |Training time=0.44s (20.35%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5852|ppo_ep: 1|act_loss: -0.03924560546875|cri_loss: -0.0193328857421875|unsuper_loss: 0.0
+average reward score: 6.23828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.84%) |Training time=0.44s (20.48%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5853|ppo_ep: 1|act_loss: 0.0015001296997070312|cri_loss: 0.003116607666015625|unsuper_loss: 0.0
+average reward score: 6.23828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.76%) |Training time=0.44s (20.55%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5854|ppo_ep: 1|act_loss: -0.028717041015625|cri_loss: -0.01385498046875|unsuper_loss: 0.0
+average reward score: 5.43359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.34%) |Training time=0.45s (21.02%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5855|ppo_ep: 1|act_loss: -0.00753021240234375|cri_loss: -0.0033626556396484375|unsuper_loss: 0.0
+average reward score: 5.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.40%) |Training time=0.44s (20.00%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5856|ppo_ep: 1|act_loss: 0.032745361328125|cri_loss: 0.016998291015625|unsuper_loss: 0.0
+average reward score: 5.10546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.71s (76.06%) |Training time=0.44s (19.47%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.25 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5857|ppo_ep: 1|act_loss: -0.00849151611328125|cri_loss: -0.0033931732177734375|unsuper_loss: 0.0
+average reward score: 4.5859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.74%) |Training time=0.46s (20.72%) |Others=0.14 (6.54%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5858|ppo_ep: 1|act_loss: 0.0016469955444335938|cri_loss: 0.0010538101196289062|unsuper_loss: 0.0
+average reward score: 5.21875
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.66s (74.10%) |Training time=0.48s (21.43%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.25 |AvgSamplesPerSec=14.51
+[2023-04-14 12:22:51,232] [INFO] [logging.py:96:log_dist] [Rank 0] step=5860, skipped=74, lr=[2.028151603924359e-06, 2.028151603924359e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:22:51,250] [INFO] [timer.py:199:stop] epoch=0/micro_step=5860/global_step=5860, RunningAvgSamplesPerSec=105.41776771499981, CurrSamplesPerSec=105.08171960509524, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:22:51,343] [INFO] [logging.py:96:log_dist] [Rank 0] step=5860, skipped=96, lr=[1.068162149798737e-06, 1.068162149798737e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5859|ppo_ep: 1|act_loss: 0.0809326171875|cri_loss: 0.04217529296875|unsuper_loss: 0.0
+average reward score: 4.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.74%) |Training time=0.47s (21.65%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5860|ppo_ep: 1|act_loss: -0.0221710205078125|cri_loss: -0.01047515869140625|unsuper_loss: 0.0
+average reward score: 5.0078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.80%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5861|ppo_ep: 1|act_loss: -0.00592041015625|cri_loss: -0.0028438568115234375|unsuper_loss: 0.0
+average reward score: 6.1953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.86%) |Training time=0.46s (21.52%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5862|ppo_ep: 1|act_loss: 0.00959014892578125|cri_loss: 0.00543975830078125|unsuper_loss: 0.0
+average reward score: 5.10546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.62%) |Training time=0.47s (21.76%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5863|ppo_ep: 1|act_loss: -0.008148193359375|cri_loss: -0.002391815185546875|unsuper_loss: 0.0
+average reward score: 4.66796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (22.02%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5864|ppo_ep: 1|act_loss: -0.003368377685546875|cri_loss: -0.0016183853149414062|unsuper_loss: 0.0
+average reward score: 4.703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.88%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5865|ppo_ep: 1|act_loss: -0.0250701904296875|cri_loss: -0.012115478515625|unsuper_loss: 0.0
+average reward score: 4.56640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.83%) |Training time=0.46s (21.55%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5866|ppo_ep: 1|act_loss: -0.015777587890625|cri_loss: -0.00771331787109375|unsuper_loss: 0.0
+average reward score: 4.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.81%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5867|ppo_ep: 1|act_loss: -0.0160980224609375|cri_loss: -0.007701873779296875|unsuper_loss: 0.0
+average reward score: 4.48046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.80%) |Training time=0.46s (21.55%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5868|ppo_ep: 1|act_loss: 0.00677490234375|cri_loss: 0.0037555694580078125|unsuper_loss: 0.0
+average reward score: 4.53515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.47s (21.57%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.51
+[2023-04-14 12:23:12,850] [INFO] [logging.py:96:log_dist] [Rank 0] step=5870, skipped=74, lr=[2.013035360147499e-06, 2.013035360147499e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:23:12,868] [INFO] [timer.py:199:stop] epoch=0/micro_step=5870/global_step=5870, RunningAvgSamplesPerSec=105.4160718237638, CurrSamplesPerSec=106.12563552319669, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:23:12,961] [INFO] [logging.py:96:log_dist] [Rank 0] step=5870, skipped=96, lr=[1.0602827972379417e-06, 1.0602827972379417e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5869|ppo_ep: 1|act_loss: 0.020050048828125|cri_loss: 0.0104827880859375|unsuper_loss: 0.0
+average reward score: 4.8828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.92%) |Training time=0.46s (21.46%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5870|ppo_ep: 1|act_loss: 0.0234375|cri_loss: 0.01206207275390625|unsuper_loss: 0.0
+average reward score: 5.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.85%) |Training time=0.46s (21.52%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5871|ppo_ep: 1|act_loss: -0.0171356201171875|cri_loss: -0.00823974609375|unsuper_loss: 0.0
+average reward score: 5.43359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.85%) |Training time=0.46s (21.50%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5872|ppo_ep: 1|act_loss: -0.01873779296875|cri_loss: -0.00913238525390625|unsuper_loss: 0.0
+average reward score: 5.94921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.09%) |Training time=0.46s (21.27%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5873|ppo_ep: 1|act_loss: 0.0076446533203125|cri_loss: 0.004428863525390625|unsuper_loss: 0.0
+average reward score: 5.64453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.64%) |Training time=0.47s (20.04%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5874|ppo_ep: 1|act_loss: 0.00121307373046875|cri_loss: 0.0009002685546875|unsuper_loss: 0.0
+average reward score: 5.98828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.28%) |Training time=0.48s (22.14%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5875|ppo_ep: 1|act_loss: -0.0013256072998046875|cri_loss: -0.0005035400390625|unsuper_loss: 0.0
+average reward score: 6.109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.58%) |Training time=0.47s (21.77%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5876|ppo_ep: 1|act_loss: -0.0012073516845703125|cri_loss: -0.00045490264892578125|unsuper_loss: 0.0
+average reward score: 6.01953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.39%) |Training time=0.48s (21.99%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5877|ppo_ep: 1|act_loss: 0.0103302001953125|cri_loss: 0.005706787109375|unsuper_loss: 0.0
+average reward score: 4.75390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.48s (21.96%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5878|ppo_ep: 1|act_loss: -0.007904052734375|cri_loss: -0.003818511962890625|unsuper_loss: 0.0
+average reward score: 4.80859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.87%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.51
+[2023-04-14 12:23:34,691] [INFO] [logging.py:96:log_dist] [Rank 0] step=5880, skipped=74, lr=[1.9979607964700985e-06, 1.9979607964700985e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:23:34,710] [INFO] [timer.py:199:stop] epoch=0/micro_step=5880/global_step=5880, RunningAvgSamplesPerSec=105.41309467540493, CurrSamplesPerSec=104.21140785612961, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:23:34,803] [INFO] [logging.py:96:log_dist] [Rank 0] step=5880, skipped=96, lr=[1.0524247847592191e-06, 1.0524247847592191e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5879|ppo_ep: 1|act_loss: -0.00569915771484375|cri_loss: -0.0027313232421875|unsuper_loss: 0.0
+average reward score: 5.43359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.68%) |Training time=0.47s (21.68%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5880|ppo_ep: 1|act_loss: 0.03271484375|cri_loss: 0.016754150390625|unsuper_loss: 0.0
+average reward score: 5.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (21.98%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5881|ppo_ep: 1|act_loss: -0.01360321044921875|cri_loss: -0.006427764892578125|unsuper_loss: 0.0
+average reward score: 4.65625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.48s (22.21%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5882|ppo_ep: 1|act_loss: 0.0108489990234375|cri_loss: 0.005596160888671875|unsuper_loss: 0.0
+average reward score: 5.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (21.99%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.51
+[2023-04-14 12:23:43,477] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 5883|ppo_ep: 1|act_loss: 0.0019989013671875|cri_loss: 0.0011072158813476562|unsuper_loss: 0.0
+average reward score: 6.0
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.73%) |Training time=0.48s (22.14%) |Others=0.09 (4.14%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.51
+[2023-04-14 12:23:45,644] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 5884|ppo_ep: 1|act_loss: 0.025115966796875|cri_loss: 0.01308441162109375|unsuper_loss: 0.0
+average reward score: 5.87109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.76%) |Training time=0.48s (22.02%) |Others=0.09 (4.22%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5885|ppo_ep: 1|act_loss: 0.004608154296875|cri_loss: 0.0024166107177734375|unsuper_loss: 0.0
+average reward score: 5.42578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.62s (72.49%) |Training time=0.51s (23.01%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.33 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5886|ppo_ep: 1|act_loss: -0.02362060546875|cri_loss: -0.0113067626953125|unsuper_loss: 0.0
+average reward score: 4.69921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.48s (22.31%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5887|ppo_ep: 1|act_loss: 0.013458251953125|cri_loss: 0.00859832763671875|unsuper_loss: 0.0
+average reward score: 4.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.50%) |Training time=0.47s (20.16%) |Others=0.10 (4.34%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5888|ppo_ep: 1|act_loss: 0.0234375|cri_loss: 0.01216888427734375|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.30%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.51
+[2023-04-14 12:23:56,602] [INFO] [logging.py:96:log_dist] [Rank 0] step=5890, skipped=74, lr=[1.982928136333553e-06, 1.982928136333553e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:23:56,621] [INFO] [timer.py:199:stop] epoch=0/micro_step=5890/global_step=5890, RunningAvgSamplesPerSec=105.40534437177286, CurrSamplesPerSec=102.15519602149695, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:23:56,713] [INFO] [logging.py:96:log_dist] [Rank 0] step=5890, skipped=98, lr=[1.0461538179170056e-06, 1.0461538179170056e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5889|ppo_ep: 1|act_loss: -0.00814056396484375|cri_loss: -0.00372314453125|unsuper_loss: 0.0
+average reward score: 5.33984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (21.97%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5890|ppo_ep: 1|act_loss: 0.031341552734375|cri_loss: 0.0159759521484375|unsuper_loss: 0.0
+average reward score: 5.60546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.47s (21.95%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5891|ppo_ep: 1|act_loss: 0.031219482421875|cri_loss: 0.01629638671875|unsuper_loss: 0.0
+average reward score: 5.31640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.54%) |Training time=0.48s (21.88%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5892|ppo_ep: 1|act_loss: 0.0266265869140625|cri_loss: 0.0140533447265625|unsuper_loss: 0.0
+average reward score: 5.12890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.47s (21.94%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5893|ppo_ep: 1|act_loss: 0.0418701171875|cri_loss: 0.0213165283203125|unsuper_loss: 0.0
+average reward score: 5.39453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.18%) |Training time=0.48s (22.22%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5894|ppo_ep: 1|act_loss: 0.00766754150390625|cri_loss: 0.004138946533203125|unsuper_loss: 0.0
+average reward score: 4.8203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.28%) |Training time=0.48s (22.09%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5895|ppo_ep: 1|act_loss: -0.00994873046875|cri_loss: -0.00438690185546875|unsuper_loss: 0.0
+average reward score: 5.36328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.81%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5896|ppo_ep: 1|act_loss: -0.0102691650390625|cri_loss: -0.004901885986328125|unsuper_loss: 0.0
+average reward score: 4.81640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.33%) |Training time=0.48s (22.06%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5897|ppo_ep: 1|act_loss: 0.01910400390625|cri_loss: 0.0099334716796875|unsuper_loss: 0.0
+average reward score: 5.21484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.46%) |Training time=0.47s (21.91%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5898|ppo_ep: 1|act_loss: -0.032958984375|cri_loss: -0.0158843994140625|unsuper_loss: 0.0
+average reward score: 4.57421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.88%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.51
+[2023-04-14 12:24:18,277] [INFO] [logging.py:96:log_dist] [Rank 0] step=5900, skipped=74, lr=[1.9679376025581524e-06, 1.9679376025581524e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:24:18,295] [INFO] [timer.py:199:stop] epoch=0/micro_step=5900/global_step=5900, RunningAvgSamplesPerSec=105.39940940116666, CurrSamplesPerSec=103.16481053464135, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:24:18,393] [INFO] [logging.py:96:log_dist] [Rank 0] step=5900, skipped=98, lr=[1.0383345108885993e-06, 1.0383345108885993e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5899|ppo_ep: 1|act_loss: -0.025238037109375|cri_loss: -0.01222991943359375|unsuper_loss: 0.0
+average reward score: 5.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.47s (21.78%) |Others=0.10 (4.80%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5900|ppo_ep: 1|act_loss: -0.004428863525390625|cri_loss: -0.0018815994262695312|unsuper_loss: 0.0
+average reward score: 4.9375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.27%) |Training time=0.48s (22.11%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5901|ppo_ep: 1|act_loss: -0.01324462890625|cri_loss: -0.00623321533203125|unsuper_loss: 0.0
+average reward score: 5.29296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.86%) |Training time=0.47s (21.51%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5902|ppo_ep: 1|act_loss: -0.00580596923828125|cri_loss: -0.0019092559814453125|unsuper_loss: 0.0
+average reward score: 5.48828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.52%) |Training time=0.47s (21.86%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5903|ppo_ep: 1|act_loss: 0.0159912109375|cri_loss: 0.008148193359375|unsuper_loss: 0.0
+average reward score: 5.6328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.77s (74.42%) |Training time=0.51s (21.33%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.48 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5904|ppo_ep: 1|act_loss: -0.0110626220703125|cri_loss: -0.005344390869140625|unsuper_loss: 0.0
+average reward score: 5.62109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.63%) |Training time=0.47s (21.74%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.51
+[2023-04-14 12:24:31,500] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 5905|ppo_ep: 1|act_loss: 0.025054931640625|cri_loss: 0.0130462646484375|unsuper_loss: 0.0
+average reward score: 5.08984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.39%) |Training time=0.45s (20.93%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5906|ppo_ep: 1|act_loss: 0.0015077590942382812|cri_loss: 0.0011386871337890625|unsuper_loss: 0.0
+average reward score: 5.28125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.94%) |Training time=0.49s (22.46%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5907|ppo_ep: 1|act_loss: 0.00643157958984375|cri_loss: 0.0035247802734375|unsuper_loss: 0.0
+average reward score: 5.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.94%) |Training time=0.49s (22.45%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5908|ppo_ep: 1|act_loss: 0.006465911865234375|cri_loss: 0.0035915374755859375|unsuper_loss: 0.0
+average reward score: 5.828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.48s (22.02%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.51
+[2023-04-14 12:24:40,173] [INFO] [logging.py:96:log_dist] [Rank 0] step=5910, skipped=75, lr=[1.9544823238569153e-06, 1.9544823238569153e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:24:40,191] [INFO] [timer.py:199:stop] epoch=0/micro_step=5910/global_step=5910, RunningAvgSamplesPerSec=105.39236435819984, CurrSamplesPerSec=102.31007903210069, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:24:40,284] [INFO] [logging.py:96:log_dist] [Rank 0] step=5910, skipped=98, lr=[1.0305368692688175e-06, 1.0305368692688175e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5909|ppo_ep: 1|act_loss: -0.00597381591796875|cri_loss: -0.0027923583984375|unsuper_loss: 0.0
+average reward score: 5.86328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.46%) |Training time=0.48s (21.93%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.51
+[2023-04-14 12:24:42,333] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192
+epoch: 0|step: 5910|ppo_ep: 1|act_loss: 0.0025787353515625|cri_loss: 0.0016918182373046875|unsuper_loss: 0.0
+average reward score: 5.3359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.17%) |Training time=0.45s (21.15%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5911|ppo_ep: 1|act_loss: -0.002712249755859375|cri_loss: -0.001117706298828125|unsuper_loss: 0.0
+average reward score: 5.37890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.89%) |Training time=0.49s (22.52%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5912|ppo_ep: 1|act_loss: 0.0108795166015625|cri_loss: 0.00567626953125|unsuper_loss: 0.0
+average reward score: 4.9921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.80%) |Training time=0.49s (22.62%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5913|ppo_ep: 1|act_loss: -0.01776123046875|cri_loss: -0.00823974609375|unsuper_loss: 0.0
+average reward score: 6.1640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.50%) |Training time=0.50s (22.93%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5914|ppo_ep: 1|act_loss: -0.003353118896484375|cri_loss: -0.0009851455688476562|unsuper_loss: 0.0
+average reward score: 4.6328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.53%) |Training time=0.48s (21.92%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5915|ppo_ep: 1|act_loss: -0.0023899078369140625|cri_loss: -0.0008401870727539062|unsuper_loss: 0.0
+average reward score: 4.99609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.43%) |Training time=0.48s (21.18%) |Others=0.10 (4.39%)|CurSamplesPerSec=14.06 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5916|ppo_ep: 1|act_loss: 0.01297760009765625|cri_loss: 0.0069427490234375|unsuper_loss: 0.0
+average reward score: 5.5
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.15%) |Training time=0.48s (22.20%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5917|ppo_ep: 1|act_loss: -0.0005397796630859375|cri_loss: 0.00010776519775390625|unsuper_loss: 0.0
+average reward score: 5.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.26%) |Training time=0.48s (22.12%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5918|ppo_ep: 1|act_loss: -0.0025806427001953125|cri_loss: -0.0009760856628417969|unsuper_loss: 0.0
+average reward score: 5.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.20%) |Training time=0.47s (21.63%) |Others=0.11 (5.17%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.51
+[2023-04-14 12:25:02,201] [INFO] [logging.py:96:log_dist] [Rank 0] step=5920, skipped=76, lr=[1.9410615090410984e-06, 1.9410615090410984e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:25:02,220] [INFO] [timer.py:199:stop] epoch=0/micro_step=5920/global_step=5920, RunningAvgSamplesPerSec=105.38332702712113, CurrSamplesPerSec=103.77662056098468, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:25:02,313] [INFO] [logging.py:96:log_dist] [Rank 0] step=5920, skipped=98, lr=[1.0227610086375208e-06, 1.0227610086375208e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5919|ppo_ep: 1|act_loss: -0.0345458984375|cri_loss: -0.0159912109375|unsuper_loss: 0.0
+average reward score: 5.0234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.74s (75.05%) |Training time=0.47s (20.33%) |Others=0.11 (4.62%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5920|ppo_ep: 1|act_loss: -0.0001728534698486328|cri_loss: 6.532669067382812e-05|unsuper_loss: 0.0
+average reward score: 5.6484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.12%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5921|ppo_ep: 1|act_loss: -0.027099609375|cri_loss: -0.0131988525390625|unsuper_loss: 0.0
+average reward score: 5.16796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.47s (21.93%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5922|ppo_ep: 1|act_loss: 0.04107666015625|cri_loss: 0.02093505859375|unsuper_loss: 0.0
+average reward score: 4.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.21%) |Training time=0.48s (22.19%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5923|ppo_ep: 1|act_loss: -0.01739501953125|cri_loss: -0.00836181640625|unsuper_loss: 0.0
+average reward score: 6.3359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.48s (21.94%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5924|ppo_ep: 1|act_loss: -0.0078277587890625|cri_loss: -0.003665924072265625|unsuper_loss: 0.0
+average reward score: 5.00390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.68%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5925|ppo_ep: 1|act_loss: -0.03497314453125|cri_loss: -0.0165863037109375|unsuper_loss: 0.0
+average reward score: 4.30078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (21.94%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5926|ppo_ep: 1|act_loss: -0.026092529296875|cri_loss: -0.01259613037109375|unsuper_loss: 0.0
+average reward score: 5.875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.59%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5927|ppo_ep: 1|act_loss: 0.0107879638671875|cri_loss: 0.005474090576171875|unsuper_loss: 0.0
+average reward score: 5.51953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.64%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5928|ppo_ep: 1|act_loss: 0.05828857421875|cri_loss: 0.030517578125|unsuper_loss: 0.0
+average reward score: 5.65625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.98%) |Training time=0.47s (21.41%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.51
+[2023-04-14 12:25:23,865] [INFO] [logging.py:96:log_dist] [Rank 0] step=5930, skipped=76, lr=[1.9261901091532087e-06, 1.9261901091532087e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:25:23,884] [INFO] [timer.py:199:stop] epoch=0/micro_step=5930/global_step=5930, RunningAvgSamplesPerSec=105.37955887751461, CurrSamplesPerSec=105.48167242600165, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:25:23,976] [INFO] [logging.py:96:log_dist] [Rank 0] step=5930, skipped=98, lr=[1.0150070442517201e-06, 1.0150070442517201e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5929|ppo_ep: 1|act_loss: 0.220703125|cri_loss: 0.143310546875|unsuper_loss: 0.0
+average reward score: 5.23828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.79%) |Training time=0.47s (21.57%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5930|ppo_ep: 1|act_loss: -0.004055023193359375|cri_loss: -0.0017652511596679688|unsuper_loss: 0.0
+average reward score: 5.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.58%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5931|ppo_ep: 1|act_loss: 0.0077362060546875|cri_loss: 0.004604339599609375|unsuper_loss: 0.0
+average reward score: 5.19140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.65%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5932|ppo_ep: 1|act_loss: -0.0015506744384765625|cri_loss: -0.000553131103515625|unsuper_loss: 0.0
+average reward score: 5.72265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.79%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5933|ppo_ep: 1|act_loss: 0.006988525390625|cri_loss: 0.003902435302734375|unsuper_loss: 0.0
+average reward score: 5.46484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.52%) |Training time=0.46s (20.62%) |Others=0.15 (6.86%)|CurSamplesPerSec=14.38 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5934|ppo_ep: 1|act_loss: -0.014007568359375|cri_loss: -0.0065155029296875|unsuper_loss: 0.0
+average reward score: 5.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.25%) |Training time=0.46s (20.30%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.20 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5935|ppo_ep: 1|act_loss: -0.0095367431640625|cri_loss: -0.00386810302734375|unsuper_loss: 0.0
+average reward score: 5.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.01%) |Training time=0.46s (21.36%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5936|ppo_ep: 1|act_loss: 0.007232666015625|cri_loss: 0.003726959228515625|unsuper_loss: 0.0
+average reward score: 6.13671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.45s (21.14%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5937|ppo_ep: 1|act_loss: -0.00814056396484375|cri_loss: -0.0037689208984375|unsuper_loss: 0.0
+average reward score: 4.77734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.20%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5938|ppo_ep: 1|act_loss: -0.009490966796875|cri_loss: -0.0043487548828125|unsuper_loss: 0.0
+average reward score: 5.55859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.15%) |Training time=0.45s (21.17%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.51
+[2023-04-14 12:25:45,564] [INFO] [logging.py:96:log_dist] [Rank 0] step=5940, skipped=76, lr=[1.911361676620899e-06, 1.911361676620899e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:25:45,583] [INFO] [timer.py:199:stop] epoch=0/micro_step=5940/global_step=5940, RunningAvgSamplesPerSec=105.38352468920138, CurrSamplesPerSec=110.74535974698605, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:25:45,675] [INFO] [logging.py:96:log_dist] [Rank 0] step=5940, skipped=98, lr=[1.0072750910438734e-06, 1.0072750910438734e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5939|ppo_ep: 1|act_loss: 0.01202392578125|cri_loss: 0.006191253662109375|unsuper_loss: 0.0
+average reward score: 6.58203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.28%) |Training time=0.45s (21.07%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5940|ppo_ep: 1|act_loss: -0.003948211669921875|cri_loss: -0.0017728805541992188|unsuper_loss: 0.0
+average reward score: 5.39453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.02%) |Training time=0.46s (21.32%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5941|ppo_ep: 1|act_loss: 0.029876708984375|cri_loss: 0.01517486572265625|unsuper_loss: 0.0
+average reward score: 5.10546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.46%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5942|ppo_ep: 1|act_loss: -0.0004661083221435547|cri_loss: -0.000164031982421875|unsuper_loss: 0.0
+average reward score: 4.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.96%) |Training time=0.46s (21.39%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5943|ppo_ep: 1|act_loss: 0.019561767578125|cri_loss: 0.01006317138671875|unsuper_loss: 0.0
+average reward score: 4.6796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.23%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5944|ppo_ep: 1|act_loss: 0.0049285888671875|cri_loss: 0.0025653839111328125|unsuper_loss: 0.0
+average reward score: 5.7578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.63s (72.69%) |Training time=0.51s (22.85%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.26 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5945|ppo_ep: 1|act_loss: -0.007572174072265625|cri_loss: -0.003124237060546875|unsuper_loss: 0.0
+average reward score: 5.34375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.10%) |Training time=0.46s (21.18%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5946|ppo_ep: 1|act_loss: -0.01294708251953125|cri_loss: -0.006168365478515625|unsuper_loss: 0.0
+average reward score: 5.77734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.24%) |Training time=0.46s (21.15%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5947|ppo_ep: 1|act_loss: -0.0242156982421875|cri_loss: -0.0118408203125|unsuper_loss: 0.0
+average reward score: 4.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.46%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5948|ppo_ep: 1|act_loss: -0.00698089599609375|cri_loss: -0.0033702850341796875|unsuper_loss: 0.0
+average reward score: 5.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.18%) |Training time=0.46s (21.15%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.51
+[2023-04-14 12:26:07,375] [INFO] [logging.py:96:log_dist] [Rank 0] step=5950, skipped=76, lr=[1.8965764312373112e-06, 1.8965764312373112e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:26:07,393] [INFO] [timer.py:199:stop] epoch=0/micro_step=5950/global_step=5950, RunningAvgSamplesPerSec=105.38668188338777, CurrSamplesPerSec=107.2714592506216, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:26:07,485] [INFO] [logging.py:96:log_dist] [Rank 0] step=5950, skipped=98, lr=[9.995652636201819e-07, 9.995652636201819e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5949|ppo_ep: 1|act_loss: -0.00499725341796875|cri_loss: -0.002071380615234375|unsuper_loss: 0.0
+average reward score: 5.37109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.02%) |Training time=0.46s (19.71%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5950|ppo_ep: 1|act_loss: 0.01093292236328125|cri_loss: 0.00567626953125|unsuper_loss: 0.0
+average reward score: 6.38671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.89%) |Training time=0.46s (21.40%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5951|ppo_ep: 1|act_loss: 0.019287109375|cri_loss: 0.0101776123046875|unsuper_loss: 0.0
+average reward score: 5.6484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.01%) |Training time=0.46s (21.34%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5952|ppo_ep: 1|act_loss: -0.00455474853515625|cri_loss: -0.0020961761474609375|unsuper_loss: 0.0
+average reward score: 5.41796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.08%) |Training time=0.46s (21.26%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5953|ppo_ep: 1|act_loss: -0.0208282470703125|cri_loss: -0.010009765625|unsuper_loss: 0.0
+average reward score: 5.6484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.99%) |Training time=0.46s (21.37%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5954|ppo_ep: 1|act_loss: -0.028045654296875|cri_loss: -0.0135040283203125|unsuper_loss: 0.0
+average reward score: 4.62109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.46s (21.58%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5955|ppo_ep: 1|act_loss: 0.0128021240234375|cri_loss: 0.006572723388671875|unsuper_loss: 0.0
+average reward score: 5.17578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.16%) |Training time=0.45s (21.16%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5956|ppo_ep: 1|act_loss: 0.002628326416015625|cri_loss: 0.00140380859375|unsuper_loss: 0.0
+average reward score: 5.6875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.07%) |Training time=0.46s (21.25%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5957|ppo_ep: 1|act_loss: -0.003032684326171875|cri_loss: -0.0013885498046875|unsuper_loss: 0.0
+average reward score: 6.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.45s (21.17%) |Others=0.10 (4.80%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5958|ppo_ep: 1|act_loss: 0.0238037109375|cri_loss: 0.0124359130859375|unsuper_loss: 0.0
+average reward score: 5.20703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.96%) |Training time=0.46s (21.35%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.51
+[2023-04-14 12:26:28,860] [INFO] [logging.py:96:log_dist] [Rank 0] step=5960, skipped=76, lr=[1.8818345921554516e-06, 1.8818345921554516e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:26:28,878] [INFO] [timer.py:199:stop] epoch=0/micro_step=5960/global_step=5960, RunningAvgSamplesPerSec=105.39163032103932, CurrSamplesPerSec=110.05142543688827, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:26:28,971] [INFO] [logging.py:96:log_dist] [Rank 0] step=5960, skipped=98, lr=[9.91877676258884e-07, 9.91877676258884e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5959|ppo_ep: 1|act_loss: 0.0390625|cri_loss: 0.0201568603515625|unsuper_loss: 0.0
+average reward score: 5.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.23%) |Training time=0.45s (21.11%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5960|ppo_ep: 1|act_loss: 0.01190948486328125|cri_loss: 0.006134033203125|unsuper_loss: 0.0
+average reward score: 6.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.34%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5961|ppo_ep: 1|act_loss: -0.0223541259765625|cri_loss: -0.010711669921875|unsuper_loss: 0.0
+average reward score: 4.9765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.44%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5962|ppo_ep: 1|act_loss: 0.01100921630859375|cri_loss: 0.005786895751953125|unsuper_loss: 0.0
+average reward score: 5.0859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.07%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5963|ppo_ep: 1|act_loss: -0.0018167495727539062|cri_loss: -0.0007228851318359375|unsuper_loss: 0.0
+average reward score: 5.3828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.16%) |Training time=0.45s (21.16%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5964|ppo_ep: 1|act_loss: -0.0391845703125|cri_loss: -0.0184478759765625|unsuper_loss: 0.0
+average reward score: 5.8515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.46s (21.59%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5965|ppo_ep: 1|act_loss: 0.0067138671875|cri_loss: 0.004070281982421875|unsuper_loss: 0.0
+average reward score: 5.40625
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.66s (71.69%) |Training time=0.55s (23.59%) |Others=0.11 (4.72%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5966|ppo_ep: 1|act_loss: -0.030609130859375|cri_loss: -0.014984130859375|unsuper_loss: 0.0
+average reward score: 5.03515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.67%) |Training time=0.46s (21.67%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5967|ppo_ep: 1|act_loss: -0.02716064453125|cri_loss: -0.0134735107421875|unsuper_loss: 0.0
+average reward score: 4.7421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.93%) |Training time=0.46s (21.40%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5968|ppo_ep: 1|act_loss: 0.034423828125|cri_loss: 0.018524169921875|unsuper_loss: 0.0
+average reward score: 5.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.25%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.51
+[2023-04-14 12:26:50,497] [INFO] [logging.py:96:log_dist] [Rank 0] step=5970, skipped=76, lr=[1.8671363778849347e-06, 1.8671363778849347e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:26:50,516] [INFO] [timer.py:199:stop] epoch=0/micro_step=5970/global_step=5970, RunningAvgSamplesPerSec=105.39215717659776, CurrSamplesPerSec=107.45963430119632, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:26:50,608] [INFO] [logging.py:96:log_dist] [Rank 0] step=5970, skipped=98, lr=[9.842124429085714e-07, 9.842124429085714e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5969|ppo_ep: 1|act_loss: 0.00429534912109375|cri_loss: 0.002338409423828125|unsuper_loss: 0.0
+average reward score: 4.8046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.87%) |Training time=0.46s (21.48%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5970|ppo_ep: 1|act_loss: -0.0014858245849609375|cri_loss: -0.0004582405090332031|unsuper_loss: 0.0
+average reward score: 5.1875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.75%) |Training time=0.44s (20.59%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5971|ppo_ep: 1|act_loss: -0.0072021484375|cri_loss: -0.00341796875|unsuper_loss: 0.0
+average reward score: 5.09375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.94%) |Training time=0.44s (20.39%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5972|ppo_ep: 1|act_loss: 0.02154541015625|cri_loss: 0.01145172119140625|unsuper_loss: 0.0
+average reward score: 4.19140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.35%) |Training time=0.45s (21.00%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5973|ppo_ep: 1|act_loss: 0.022430419921875|cri_loss: 0.01153564453125|unsuper_loss: 0.0
+average reward score: 4.9453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.83%) |Training time=0.44s (20.44%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5974|ppo_ep: 1|act_loss: -0.018157958984375|cri_loss: -0.0088348388671875|unsuper_loss: 0.0
+average reward score: 4.859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.98%) |Training time=0.44s (19.59%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5975|ppo_ep: 1|act_loss: 0.016845703125|cri_loss: 0.0087127685546875|unsuper_loss: 0.0
+average reward score: 5.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.76%) |Training time=0.44s (20.57%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5976|ppo_ep: 1|act_loss: -0.0218505859375|cri_loss: -0.01079559326171875|unsuper_loss: 0.0
+average reward score: 5.20703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.82%) |Training time=0.44s (20.53%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5977|ppo_ep: 1|act_loss: -0.02777099609375|cri_loss: -0.01334381103515625|unsuper_loss: 0.0
+average reward score: 5.23046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.65%) |Training time=0.45s (20.70%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5978|ppo_ep: 1|act_loss: -0.013427734375|cri_loss: -0.006366729736328125|unsuper_loss: 0.0
+average reward score: 5.15234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.79%) |Training time=0.44s (20.53%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.51
+[2023-04-14 12:27:12,106] [INFO] [logging.py:96:log_dist] [Rank 0] step=5980, skipped=76, lr=[1.8524820062887546e-06, 1.8524820062887546e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:27:12,124] [INFO] [timer.py:199:stop] epoch=0/micro_step=5980/global_step=5980, RunningAvgSamplesPerSec=105.40602482628336, CurrSamplesPerSec=113.2016207116422, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:27:12,218] [INFO] [logging.py:96:log_dist] [Rank 0] step=5980, skipped=98, lr=[9.765696771864933e-07, 9.765696771864933e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5979|ppo_ep: 1|act_loss: 0.0231781005859375|cri_loss: 0.01198577880859375|unsuper_loss: 0.0
+average reward score: 4.875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.55%) |Training time=0.45s (20.73%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52
+epoch: 0|step: 5980|ppo_ep: 1|act_loss: 0.021240234375|cri_loss: 0.01136016845703125|unsuper_loss: 0.0
+average reward score: 5.29296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.74%) |Training time=0.46s (21.57%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.52
+epoch: 0|step: 5981|ppo_ep: 1|act_loss: 0.01788330078125|cri_loss: 0.009185791015625|unsuper_loss: 0.0
+average reward score: 5.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.74s (75.18%) |Training time=0.47s (20.50%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.82 |AvgSamplesPerSec=14.51
+epoch: 0|step: 5982|ppo_ep: 1|act_loss: 0.034881591796875|cri_loss: 0.0184783935546875|unsuper_loss: 0.0
+average reward score: 5.31640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.13%) |Training time=0.48s (22.23%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52
+epoch: 0|step: 5983|ppo_ep: 1|act_loss: -0.0008478164672851562|cri_loss: -0.00020265579223632812|unsuper_loss: 0.0
+average reward score: 5.8046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.40%) |Training time=0.47s (21.95%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.52
+epoch: 0|step: 5984|ppo_ep: 1|act_loss: -0.024932861328125|cri_loss: -0.01213836669921875|unsuper_loss: 0.0
+average reward score: 6.08984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.51%) |Training time=0.47s (21.80%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
+[2023-04-14 12:27:25,255] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 5985|ppo_ep: 1|act_loss: -0.00849151611328125|cri_loss: -0.0028514862060546875|unsuper_loss: 0.0
+average reward score: 5.6953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.93%) |Training time=0.47s (21.88%) |Others=0.09 (4.19%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.52
+[2023-04-14 12:27:27,391] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 5986|ppo_ep: 1|act_loss: -0.0246734619140625|cri_loss: -0.01187896728515625|unsuper_loss: 0.0
+average reward score: 4.84765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.76%) |Training time=0.47s (22.04%) |Others=0.09 (4.20%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.52
+epoch: 0|step: 5987|ppo_ep: 1|act_loss: 0.0214080810546875|cri_loss: 0.0121612548828125|unsuper_loss: 0.0
+average reward score: 5.44921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.84%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
+epoch: 0|step: 5988|ppo_ep: 1|act_loss: -0.01424407958984375|cri_loss: -0.005828857421875|unsuper_loss: 0.0
+average reward score: 5.1875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.42%) |Training time=0.47s (21.92%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
+[2023-04-14 12:27:33,716] [INFO] [logging.py:96:log_dist] [Rank 0] step=5990, skipped=76, lr=[1.8378716945800507e-06, 1.8378716945800507e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:27:33,734] [INFO] [timer.py:199:stop] epoch=0/micro_step=5990/global_step=5990, RunningAvgSamplesPerSec=105.40360050801567, CurrSamplesPerSec=103.72817841981488, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:27:33,827] [INFO] [logging.py:96:log_dist] [Rank 0] step=5990, skipped=100, lr=[9.704717174393912e-07, 9.704717174393912e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5989|ppo_ep: 1|act_loss: -0.00201416015625|cri_loss: -0.0007715225219726562|unsuper_loss: 0.0
+average reward score: 5.1875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.40%) |Training time=0.47s (21.93%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
+epoch: 0|step: 5990|ppo_ep: 1|act_loss: -0.023345947265625|cri_loss: -0.01153564453125|unsuper_loss: 0.0
+average reward score: 5.45703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.58%) |Training time=0.47s (21.75%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
+epoch: 0|step: 5991|ppo_ep: 1|act_loss: 0.0107879638671875|cri_loss: 0.005706787109375|unsuper_loss: 0.0
+average reward score: 5.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.41%) |Training time=0.47s (21.93%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
+epoch: 0|step: 5992|ppo_ep: 1|act_loss: 0.0079498291015625|cri_loss: 0.0045623779296875|unsuper_loss: 0.0
+average reward score: 5.38671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.40%) |Training time=0.47s (21.92%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.52
+epoch: 0|step: 5993|ppo_ep: 1|act_loss: 0.0242919921875|cri_loss: 0.01251220703125|unsuper_loss: 0.0
+average reward score: 5.09765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.28%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
+epoch: 0|step: 5994|ppo_ep: 1|act_loss: -0.0028228759765625|cri_loss: -0.0012311935424804688|unsuper_loss: 0.0
+average reward score: 5.796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.64%) |Training time=0.47s (21.69%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
+epoch: 0|step: 5995|ppo_ep: 1|act_loss: -0.00330352783203125|cri_loss: -0.0015287399291992188|unsuper_loss: 0.0
+average reward score: 4.609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.84%) |Training time=0.46s (21.50%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
+epoch: 0|step: 5996|ppo_ep: 1|act_loss: 0.0069732666015625|cri_loss: 0.0037631988525390625|unsuper_loss: 0.0
+average reward score: 6.5546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.96%) |Training time=0.45s (20.29%) |Others=0.17 (7.74%)|CurSamplesPerSec=14.44 |AvgSamplesPerSec=14.52
+epoch: 0|step: 5997|ppo_ep: 1|act_loss: -0.005279541015625|cri_loss: -0.0024662017822265625|unsuper_loss: 0.0
+average reward score: 5.30078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.22%) |Training time=0.45s (21.12%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
+epoch: 0|step: 5998|ppo_ep: 1|act_loss: -0.0168304443359375|cri_loss: -0.00815582275390625|unsuper_loss: 0.0
+average reward score: 6.7109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.05%) |Training time=0.46s (21.30%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
+[2023-04-14 12:27:55,249] [INFO] [logging.py:96:log_dist] [Rank 0] step=6000, skipped=76, lr=[1.8233056593188891e-06, 1.8233056593188891e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:27:55,267] [INFO] [timer.py:199:stop] epoch=0/micro_step=6000/global_step=6000, RunningAvgSamplesPerSec=105.40675121024026, CurrSamplesPerSec=109.96774978513238, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:27:55,360] [INFO] [logging.py:96:log_dist] [Rank 0] step=6000, skipped=100, lr=[9.62869678699519e-07, 9.62869678699519e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 5999|ppo_ep: 1|act_loss: -0.0345458984375|cri_loss: -0.0167236328125|unsuper_loss: 0.0
+average reward score: 5.58984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.19%) |Training time=0.45s (21.15%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6000|ppo_ep: 1|act_loss: -0.0182952880859375|cri_loss: -0.008880615234375|unsuper_loss: 0.0
+average reward score: 5.69140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.99%) |Training time=0.46s (21.37%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6001|ppo_ep: 1|act_loss: -0.002777099609375|cri_loss: -0.0012273788452148438|unsuper_loss: 0.0
+average reward score: 5.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.32%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6002|ppo_ep: 1|act_loss: 0.0582275390625|cri_loss: 0.03076171875|unsuper_loss: 0.0
+average reward score: 6.01171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.87%) |Training time=0.47s (21.53%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6003|ppo_ep: 1|act_loss: -0.0167999267578125|cri_loss: -0.0081329345703125|unsuper_loss: 0.0
+average reward score: 4.9765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.45%) |Training time=0.46s (20.97%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6004|ppo_ep: 1|act_loss: -0.0114898681640625|cri_loss: -0.00524139404296875|unsuper_loss: 0.0
+average reward score: 4.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.89%) |Training time=0.47s (20.71%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.17 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6005|ppo_ep: 1|act_loss: -0.0033092498779296875|cri_loss: -0.0012836456298828125|unsuper_loss: 0.0
+average reward score: 4.9453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.10%) |Training time=0.46s (21.22%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6006|ppo_ep: 1|act_loss: 0.0024547576904296875|cri_loss: 0.001491546630859375|unsuper_loss: 0.0
+average reward score: 5.1640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.45s (21.14%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6007|ppo_ep: 1|act_loss: -0.001834869384765625|cri_loss: -0.0005602836608886719|unsuper_loss: 0.0
+average reward score: 5.73828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.16%) |Training time=0.46s (21.20%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6008|ppo_ep: 1|act_loss: -0.028045654296875|cri_loss: -0.0137939453125|unsuper_loss: 0.0
+average reward score: 4.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.08%) |Training time=0.46s (21.25%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
+[2023-04-14 12:28:16,897] [INFO] [logging.py:96:log_dist] [Rank 0] step=6010, skipped=76, lr=[1.8087841164090489e-06, 1.8087841164090489e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:28:16,915] [INFO] [timer.py:199:stop] epoch=0/micro_step=6010/global_step=6010, RunningAvgSamplesPerSec=105.41120763475524, CurrSamplesPerSec=110.29632922803589, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:28:17,008] [INFO] [logging.py:96:log_dist] [Rank 0] step=6010, skipped=100, lr=[9.552904239391322e-07, 9.552904239391322e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6009|ppo_ep: 1|act_loss: 0.01910400390625|cri_loss: 0.00994110107421875|unsuper_loss: 0.0
+average reward score: 5.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.45s (21.09%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6010|ppo_ep: 1|act_loss: -0.01312255859375|cri_loss: -0.006427764892578125|unsuper_loss: 0.0
+average reward score: 4.97265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.30%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6011|ppo_ep: 1|act_loss: -0.0002875328063964844|cri_loss: 5.936622619628906e-05|unsuper_loss: 0.0
+average reward score: 5.35546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.72s (74.56%) |Training time=0.48s (20.78%) |Others=0.11 (4.66%)|CurSamplesPerSec=13.84 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6012|ppo_ep: 1|act_loss: -0.028472900390625|cri_loss: -0.0135345458984375|unsuper_loss: 0.0
+average reward score: 5.046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.06%) |Training time=0.46s (21.29%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6013|ppo_ep: 1|act_loss: -0.027679443359375|cri_loss: -0.013671875|unsuper_loss: 0.0
+average reward score: 5.8828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.00%) |Training time=0.46s (21.35%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6014|ppo_ep: 1|act_loss: -0.01953125|cri_loss: -0.0093841552734375|unsuper_loss: 0.0
+average reward score: 5.3203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.13%) |Training time=0.45s (21.19%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6015|ppo_ep: 1|act_loss: -0.02752685546875|cri_loss: -0.01348114013671875|unsuper_loss: 0.0
+average reward score: 5.9609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.36%) |Training time=0.45s (20.97%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6016|ppo_ep: 1|act_loss: 0.017852783203125|cri_loss: 0.0092010498046875|unsuper_loss: 0.0
+average reward score: 4.97265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.18%) |Training time=0.45s (21.05%) |Others=0.10 (4.78%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6017|ppo_ep: 1|act_loss: -0.01593017578125|cri_loss: -0.007625579833984375|unsuper_loss: 0.0
+average reward score: 5.453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.05%) |Training time=0.45s (21.13%) |Others=0.10 (4.82%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6018|ppo_ep: 1|act_loss: 0.02801513671875|cri_loss: 0.014251708984375|unsuper_loss: 0.0
+average reward score: 5.14453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.34%) |Training time=0.47s (21.98%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
+[2023-04-14 12:28:38,536] [INFO] [logging.py:96:log_dist] [Rank 0] step=6020, skipped=76, lr=[1.7943072810948272e-06, 1.7943072810948272e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:28:38,555] [INFO] [timer.py:199:stop] epoch=0/micro_step=6020/global_step=6020, RunningAvgSamplesPerSec=105.41476522670784, CurrSamplesPerSec=105.24371287159768, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:28:38,648] [INFO] [logging.py:96:log_dist] [Rank 0] step=6020, skipped=100, lr=[9.477340655010717e-07, 9.477340655010717e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6019|ppo_ep: 1|act_loss: -0.04486083984375|cri_loss: -0.021087646484375|unsuper_loss: 0.0
+average reward score: 5.32421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.57%) |Training time=0.47s (21.75%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6020|ppo_ep: 1|act_loss: -0.005889892578125|cri_loss: -0.0028514862060546875|unsuper_loss: 0.0
+average reward score: 5.58203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.41%) |Training time=0.47s (21.90%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6021|ppo_ep: 1|act_loss: 0.00341033935546875|cri_loss: 0.002422332763671875|unsuper_loss: 0.0
+average reward score: 5.5234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.56%) |Training time=0.46s (21.62%) |Others=0.10 (4.82%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6022|ppo_ep: 1|act_loss: 0.0144805908203125|cri_loss: 0.007442474365234375|unsuper_loss: 0.0
+average reward score: 4.9453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.58%) |Training time=0.47s (21.75%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6023|ppo_ep: 1|act_loss: 0.013458251953125|cri_loss: 0.006992340087890625|unsuper_loss: 0.0
+average reward score: 4.98046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.13%) |Training time=0.48s (22.22%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6024|ppo_ep: 1|act_loss: -0.00662994384765625|cri_loss: -0.0031528472900390625|unsuper_loss: 0.0
+average reward score: 5.96484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.48%) |Training time=0.47s (21.83%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6025|ppo_ep: 1|act_loss: 0.01381683349609375|cri_loss: 0.007251739501953125|unsuper_loss: 0.0
+average reward score: 4.8203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.45s (21.08%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6026|ppo_ep: 1|act_loss: 0.056182861328125|cri_loss: 0.0298004150390625|unsuper_loss: 0.0
+average reward score: 5.30078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.23%) |Training time=0.45s (19.42%) |Others=0.10 (4.35%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6027|ppo_ep: 1|act_loss: 0.0283355712890625|cri_loss: 0.0158233642578125|unsuper_loss: 0.0
+average reward score: 5.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.15%) |Training time=0.45s (21.18%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6028|ppo_ep: 1|act_loss: 0.0100860595703125|cri_loss: 0.00598907470703125|unsuper_loss: 0.0
+average reward score: 5.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.31%) |Training time=0.45s (21.01%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
+[2023-04-14 12:29:00,177] [INFO] [logging.py:96:log_dist] [Rank 0] step=6030, skipped=76, lr=[1.7798753679578482e-06, 1.7798753679578482e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:29:00,195] [INFO] [timer.py:199:stop] epoch=0/micro_step=6030/global_step=6030, RunningAvgSamplesPerSec=105.41748877018956, CurrSamplesPerSec=108.73361352979491, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:29:00,288] [INFO] [logging.py:96:log_dist] [Rank 0] step=6030, skipped=100, lr=[9.402007153887982e-07, 9.402007153887982e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6029|ppo_ep: 1|act_loss: -0.027130126953125|cri_loss: -0.0133209228515625|unsuper_loss: 0.0
+average reward score: 5.92578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.09%) |Training time=0.46s (21.28%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6030|ppo_ep: 1|act_loss: -0.023773193359375|cri_loss: -0.0114898681640625|unsuper_loss: 0.0
+average reward score: 5.6640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.26%) |Training time=0.46s (21.12%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6031|ppo_ep: 1|act_loss: 0.0023746490478515625|cri_loss: 0.0015802383422851562|unsuper_loss: 0.0
+average reward score: 4.90625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.11%) |Training time=0.46s (21.23%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6032|ppo_ep: 1|act_loss: -0.03857421875|cri_loss: -0.0188446044921875|unsuper_loss: 0.0
+average reward score: 5.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.22%) |Training time=0.46s (21.14%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6033|ppo_ep: 1|act_loss: -0.0029544830322265625|cri_loss: -0.0013036727905273438|unsuper_loss: 0.0
+average reward score: 5.45703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.36%) |Training time=0.47s (21.04%) |Others=0.12 (5.60%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6034|ppo_ep: 1|act_loss: -0.012664794921875|cri_loss: -0.006099700927734375|unsuper_loss: 0.0
+average reward score: 5.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.84%) |Training time=0.44s (20.21%) |Others=0.11 (4.94%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6035|ppo_ep: 1|act_loss: 0.019073486328125|cri_loss: 0.00981903076171875|unsuper_loss: 0.0
+average reward score: 5.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.75%) |Training time=0.44s (20.55%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6036|ppo_ep: 1|act_loss: 0.00531005859375|cri_loss: 0.00304412841796875|unsuper_loss: 0.0
+average reward score: 6.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.33%) |Training time=0.45s (21.00%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6037|ppo_ep: 1|act_loss: -0.00218963623046875|cri_loss: -0.0005550384521484375|unsuper_loss: 0.0
+average reward score: 5.1015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.52%) |Training time=0.44s (20.79%) |Others=0.10 (4.68%)|CurSamplesPerSec=15.03 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6038|ppo_ep: 1|act_loss: -0.0223236083984375|cri_loss: -0.01055908203125|unsuper_loss: 0.0
+average reward score: 5.62890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.35%) |Training time=0.45s (20.95%) |Others=0.10 (4.70%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.52
+[2023-04-14 12:29:21,698] [INFO] [logging.py:96:log_dist] [Rank 0] step=6040, skipped=76, lr=[1.765488590913881e-06, 1.765488590913881e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:29:21,716] [INFO] [timer.py:199:stop] epoch=0/micro_step=6040/global_step=6040, RunningAvgSamplesPerSec=105.42680483904256, CurrSamplesPerSec=113.44426751053996, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:29:21,809] [INFO] [logging.py:96:log_dist] [Rank 0] step=6040, skipped=100, lr=[9.326904852647345e-07, 9.326904852647345e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6039|ppo_ep: 1|act_loss: -0.0015201568603515625|cri_loss: -0.0006833076477050781|unsuper_loss: 0.0
+average reward score: 5.03515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.47%) |Training time=0.44s (20.87%) |Others=0.10 (4.67%)|CurSamplesPerSec=15.02 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6040|ppo_ep: 1|act_loss: -0.014862060546875|cri_loss: -0.007099151611328125|unsuper_loss: 0.0
+average reward score: 4.80078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.08%) |Training time=0.43s (20.07%) |Others=0.10 (4.85%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6041|ppo_ep: 1|act_loss: 0.0177154541015625|cri_loss: 0.009063720703125|unsuper_loss: 0.0
+average reward score: 5.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.45s (20.79%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6042|ppo_ep: 1|act_loss: -0.003208160400390625|cri_loss: -0.0014791488647460938|unsuper_loss: 0.0
+average reward score: 5.3828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (76.04%) |Training time=0.46s (19.65%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.79 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6043|ppo_ep: 1|act_loss: -0.0123443603515625|cri_loss: -0.006061553955078125|unsuper_loss: 0.0
+average reward score: 5.890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.47%) |Training time=0.45s (20.86%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6044|ppo_ep: 1|act_loss: 0.029998779296875|cri_loss: 0.0153350830078125|unsuper_loss: 0.0
+average reward score: 5.40625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.40%) |Training time=0.45s (20.92%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6045|ppo_ep: 1|act_loss: -0.00530242919921875|cri_loss: -0.002552032470703125|unsuper_loss: 0.0
+average reward score: 5.62890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.28%) |Training time=0.45s (21.05%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6046|ppo_ep: 1|act_loss: 0.0306396484375|cri_loss: 0.016571044921875|unsuper_loss: 0.0
+average reward score: 5.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.33%) |Training time=0.45s (21.01%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6047|ppo_ep: 1|act_loss: 0.002735137939453125|cri_loss: 0.001861572265625|unsuper_loss: 0.0
+average reward score: 4.98828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.51%) |Training time=0.45s (20.81%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6048|ppo_ep: 1|act_loss: 0.006122589111328125|cri_loss: 0.0031719207763671875|unsuper_loss: 0.0
+average reward score: 4.75390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.31%) |Training time=0.45s (21.03%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52
+[2023-04-14 12:29:43,343] [INFO] [logging.py:96:log_dist] [Rank 0] step=6050, skipped=76, lr=[1.751147163209664e-06, 1.751147163209664e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:29:43,361] [INFO] [timer.py:199:stop] epoch=0/micro_step=6050/global_step=6050, RunningAvgSamplesPerSec=105.43758064958905, CurrSamplesPerSec=114.32953392591709, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:29:43,454] [INFO] [logging.py:96:log_dist] [Rank 0] step=6050, skipped=100, lr=[9.252034864486062e-07, 9.252034864486062e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6049|ppo_ep: 1|act_loss: -0.04718017578125|cri_loss: -0.023223876953125|unsuper_loss: 0.0
+average reward score: 5.21875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.72%) |Training time=0.44s (20.62%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6050|ppo_ep: 1|act_loss: -0.021270751953125|cri_loss: -0.01024627685546875|unsuper_loss: 0.0
+average reward score: 4.71484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.55%) |Training time=0.45s (20.78%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6051|ppo_ep: 1|act_loss: 0.01218414306640625|cri_loss: 0.00667572021484375|unsuper_loss: 0.0
+average reward score: 4.984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.46%) |Training time=0.45s (20.85%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6052|ppo_ep: 1|act_loss: -0.0228271484375|cri_loss: -0.0111236572265625|unsuper_loss: 0.0
+average reward score: 4.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.45s (21.14%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6053|ppo_ep: 1|act_loss: -0.0243682861328125|cri_loss: -0.011871337890625|unsuper_loss: 0.0
+average reward score: 5.25390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.46%) |Training time=0.45s (20.86%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6054|ppo_ep: 1|act_loss: 0.0071868896484375|cri_loss: 0.003875732421875|unsuper_loss: 0.0
+average reward score: 5.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.42%) |Training time=0.45s (20.91%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6055|ppo_ep: 1|act_loss: 0.035552978515625|cri_loss: 0.0187225341796875|unsuper_loss: 0.0
+average reward score: 5.453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.50%) |Training time=0.44s (20.61%) |Others=0.11 (4.88%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6056|ppo_ep: 1|act_loss: 0.043182373046875|cri_loss: 0.0221099853515625|unsuper_loss: 0.0
+average reward score: 5.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.96%) |Training time=0.44s (20.37%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6057|ppo_ep: 1|act_loss: 0.0064544677734375|cri_loss: 0.003429412841796875|unsuper_loss: 0.0
+average reward score: 4.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.96%) |Training time=0.47s (21.10%) |Others=0.15 (6.94%)|CurSamplesPerSec=14.42 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6058|ppo_ep: 1|act_loss: 0.0033817291259765625|cri_loss: 0.002197265625|unsuper_loss: 0.0
+average reward score: 4.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.35%) |Training time=0.46s (20.20%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.19 |AvgSamplesPerSec=14.52
+[2023-04-14 12:30:05,009] [INFO] [logging.py:96:log_dist] [Rank 0] step=6060, skipped=76, lr=[1.7368512974197543e-06, 1.7368512974197543e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:30:05,028] [INFO] [timer.py:199:stop] epoch=0/micro_step=6060/global_step=6060, RunningAvgSamplesPerSec=105.4466785255788, CurrSamplesPerSec=110.93914526975647, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:30:05,120] [INFO] [logging.py:96:log_dist] [Rank 0] step=6060, skipped=100, lr=[9.177398299157989e-07, 9.177398299157989e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6059|ppo_ep: 1|act_loss: 0.00881195068359375|cri_loss: 0.004749298095703125|unsuper_loss: 0.0
+average reward score: 4.671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.34%) |Training time=0.45s (21.02%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6060|ppo_ep: 1|act_loss: 0.02923583984375|cri_loss: 0.01485443115234375|unsuper_loss: 0.0
+average reward score: 5.3359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.46s (21.39%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6061|ppo_ep: 1|act_loss: -0.0119171142578125|cri_loss: -0.005054473876953125|unsuper_loss: 0.0
+average reward score: 4.85546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.10%) |Training time=0.46s (21.25%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6062|ppo_ep: 1|act_loss: -0.020172119140625|cri_loss: -0.00948333740234375|unsuper_loss: 0.0
+average reward score: 4.79296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.22%) |Training time=0.47s (21.92%) |Others=0.11 (4.87%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6063|ppo_ep: 1|act_loss: -0.0472412109375|cri_loss: -0.023162841796875|unsuper_loss: 0.0
+average reward score: 5.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.65s (72.69%) |Training time=0.52s (22.88%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.09 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6064|ppo_ep: 1|act_loss: 0.00022161006927490234|cri_loss: 0.0004191398620605469|unsuper_loss: 0.0
+average reward score: 5.671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.06%) |Training time=0.48s (22.30%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6065|ppo_ep: 1|act_loss: -0.0066070556640625|cri_loss: -0.002780914306640625|unsuper_loss: 0.0
+average reward score: 5.5
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.17%) |Training time=0.48s (22.18%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6066|ppo_ep: 1|act_loss: -0.01184844970703125|cri_loss: -0.005641937255859375|unsuper_loss: 0.0
+average reward score: 6.296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.26%) |Training time=0.48s (22.08%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6067|ppo_ep: 1|act_loss: 0.00704193115234375|cri_loss: 0.0038299560546875|unsuper_loss: 0.0
+average reward score: 6.09375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.62%) |Training time=0.47s (21.76%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6068|ppo_ep: 1|act_loss: 0.01067352294921875|cri_loss: 0.005550384521484375|unsuper_loss: 0.0
+average reward score: 5.70703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.10%) |Training time=0.48s (22.25%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52
+[2023-04-14 12:30:26,681] [INFO] [logging.py:96:log_dist] [Rank 0] step=6070, skipped=76, lr=[1.7226012054433704e-06, 1.7226012054433704e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:30:26,699] [INFO] [timer.py:199:stop] epoch=0/micro_step=6070/global_step=6070, RunningAvgSamplesPerSec=105.44069020433436, CurrSamplesPerSec=101.47307651888987, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:30:26,792] [INFO] [logging.py:96:log_dist] [Rank 0] step=6070, skipped=100, lr=[9.102996262957067e-07, 9.102996262957067e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6069|ppo_ep: 1|act_loss: 0.0227508544921875|cri_loss: 0.0118408203125|unsuper_loss: 0.0
+average reward score: 5.07421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.18%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6070|ppo_ep: 1|act_loss: 0.015289306640625|cri_loss: 0.00815582275390625|unsuper_loss: 0.0
+average reward score: 4.36328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (21.86%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6071|ppo_ep: 1|act_loss: 0.0294189453125|cri_loss: 0.01490020751953125|unsuper_loss: 0.0
+average reward score: 4.921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.87%) |Training time=0.44s (20.45%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6072|ppo_ep: 1|act_loss: -0.010040283203125|cri_loss: -0.004913330078125|unsuper_loss: 0.0
+average reward score: 5.921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.25%) |Training time=0.48s (22.08%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6073|ppo_ep: 1|act_loss: 0.0377197265625|cri_loss: 0.01934814453125|unsuper_loss: 0.0
+average reward score: 5.796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.13%) |Training time=0.45s (21.18%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6074|ppo_ep: 1|act_loss: 0.00595855712890625|cri_loss: 0.003124237060546875|unsuper_loss: 0.0
+average reward score: 5.44140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.44%) |Training time=0.47s (20.24%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.80 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6075|ppo_ep: 1|act_loss: -0.0601806640625|cri_loss: -0.0290985107421875|unsuper_loss: 0.0
+average reward score: 4.6640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.94%) |Training time=0.44s (20.38%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6076|ppo_ep: 1|act_loss: 0.04913330078125|cri_loss: 0.02716064453125|unsuper_loss: 0.0
+average reward score: 4.61328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.77%) |Training time=0.44s (20.57%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6077|ppo_ep: 1|act_loss: 0.01459503173828125|cri_loss: 0.00791168212890625|unsuper_loss: 0.0
+average reward score: 5.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.64%) |Training time=0.44s (20.70%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6078|ppo_ep: 1|act_loss: -0.014251708984375|cri_loss: -0.006336212158203125|unsuper_loss: 0.0
+average reward score: 5.8359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.94%) |Training time=0.44s (20.39%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
+[2023-04-14 12:30:48,347] [INFO] [logging.py:96:log_dist] [Rank 0] step=6080, skipped=76, lr=[1.7083970985012522e-06, 1.7083970985012522e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:30:48,365] [INFO] [timer.py:199:stop] epoch=0/micro_step=6080/global_step=6080, RunningAvgSamplesPerSec=105.44891692173641, CurrSamplesPerSec=114.66817942601718, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:30:48,458] [INFO] [logging.py:96:log_dist] [Rank 0] step=6080, skipped=100, lr=[9.028829858700974e-07, 9.028829858700974e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6079|ppo_ep: 1|act_loss: -0.020721435546875|cri_loss: -0.01010894775390625|unsuper_loss: 0.0
+average reward score: 5.296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.84%) |Training time=0.44s (20.52%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6080|ppo_ep: 1|act_loss: -0.01166534423828125|cri_loss: -0.005489349365234375|unsuper_loss: 0.0
+average reward score: 5.03515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.84%) |Training time=0.44s (20.50%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6081|ppo_ep: 1|act_loss: 0.0248565673828125|cri_loss: 0.012939453125|unsuper_loss: 0.0
+average reward score: 6.3203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.44%) |Training time=0.45s (20.91%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6082|ppo_ep: 1|act_loss: 0.0131683349609375|cri_loss: 0.0073394775390625|unsuper_loss: 0.0
+average reward score: 4.7109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.86%) |Training time=0.44s (20.47%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6083|ppo_ep: 1|act_loss: 0.021148681640625|cri_loss: 0.010986328125|unsuper_loss: 0.0
+average reward score: 6.01953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.68%) |Training time=0.44s (20.66%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6084|ppo_ep: 1|act_loss: -0.002887725830078125|cri_loss: -0.0010995864868164062|unsuper_loss: 0.0
+average reward score: 5.5859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.76%) |Training time=0.44s (20.56%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6085|ppo_ep: 1|act_loss: 0.02294921875|cri_loss: 0.012542724609375|unsuper_loss: 0.0
+average reward score: 5.28125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.83%) |Training time=0.44s (20.47%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6086|ppo_ep: 1|act_loss: -0.003551483154296875|cri_loss: -0.0016345977783203125|unsuper_loss: 0.0
+average reward score: 6.015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.91%) |Training time=0.44s (20.40%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52
+[2023-04-14 12:31:05,617] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 6087|ppo_ep: 1|act_loss: 0.0240478515625|cri_loss: 0.0125885009765625|unsuper_loss: 0.0
+average reward score: 5.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.28%) |Training time=0.44s (20.50%) |Others=0.09 (4.22%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.52
+[2023-04-14 12:31:07,750] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 6088|ppo_ep: 1|act_loss: 0.035614013671875|cri_loss: 0.01824951171875|unsuper_loss: 0.0
+average reward score: 5.05859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.12%) |Training time=0.44s (20.68%) |Others=0.09 (4.20%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.52
+[2023-04-14 12:31:09,949] [INFO] [logging.py:96:log_dist] [Rank 0] step=6090, skipped=76, lr=[1.6942391871325284e-06, 1.6942391871325284e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:31:09,968] [INFO] [timer.py:199:stop] epoch=0/micro_step=6090/global_step=6090, RunningAvgSamplesPerSec=105.46141074976802, CurrSamplesPerSec=103.6843468130931, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:31:10,060] [INFO] [logging.py:96:log_dist] [Rank 0] step=6090, skipped=102, lr=[8.969667129160547e-07, 8.969667129160547e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6089|ppo_ep: 1|act_loss: -0.011199951171875|cri_loss: -0.005390167236328125|unsuper_loss: 0.0
+average reward score: 5.703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.73s (74.93%) |Training time=0.47s (20.43%) |Others=0.11 (4.64%)|CurSamplesPerSec=13.84 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6090|ppo_ep: 1|act_loss: -0.021484375|cri_loss: -0.01032257080078125|unsuper_loss: 0.0
+average reward score: 5.59375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.71%) |Training time=0.44s (20.63%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6091|ppo_ep: 1|act_loss: -0.01059722900390625|cri_loss: -0.0050811767578125|unsuper_loss: 0.0
+average reward score: 4.9921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.90%) |Training time=0.44s (20.43%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6092|ppo_ep: 1|act_loss: -0.02288818359375|cri_loss: -0.01100921630859375|unsuper_loss: 0.0
+average reward score: 4.60546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.33%) |Training time=0.43s (20.02%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6093|ppo_ep: 1|act_loss: 0.01059722900390625|cri_loss: 0.00543975830078125|unsuper_loss: 0.0
+average reward score: 5.87890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.77%) |Training time=0.45s (19.79%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.23 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6094|ppo_ep: 1|act_loss: 0.0088653564453125|cri_loss: 0.0054931640625|unsuper_loss: 0.0
+average reward score: 5.05859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.66%) |Training time=0.44s (20.68%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6095|ppo_ep: 1|act_loss: 0.002613067626953125|cri_loss: 0.0014209747314453125|unsuper_loss: 0.0
+average reward score: 5.1796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.98%) |Training time=0.44s (20.33%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6096|ppo_ep: 1|act_loss: -0.00624847412109375|cri_loss: -0.0026302337646484375|unsuper_loss: 0.0
+average reward score: 4.890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.66%) |Training time=0.45s (20.70%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6097|ppo_ep: 1|act_loss: 0.03790283203125|cri_loss: 0.0205535888671875|unsuper_loss: 0.0
+average reward score: 5.671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.72%) |Training time=0.44s (20.63%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6098|ppo_ep: 1|act_loss: -0.00748443603515625|cri_loss: -0.00344085693359375|unsuper_loss: 0.0
+average reward score: 5.6015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.73%) |Training time=0.44s (20.59%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52
+[2023-04-14 12:31:31,534] [INFO] [logging.py:96:log_dist] [Rank 0] step=6100, skipped=76, lr=[1.6801276811916005e-06, 1.6801276811916005e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:31:31,552] [INFO] [timer.py:199:stop] epoch=0/micro_step=6100/global_step=6100, RunningAvgSamplesPerSec=105.47637239879195, CurrSamplesPerSec=115.30227455669898, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:31:31,645] [INFO] [logging.py:96:log_dist] [Rank 0] step=6100, skipped=102, lr=[8.895927630346729e-07, 8.895927630346729e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6099|ppo_ep: 1|act_loss: -0.0009832382202148438|cri_loss: -2.765655517578125e-05|unsuper_loss: 0.0
+average reward score: 5.52734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.79%) |Training time=0.44s (20.57%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6100|ppo_ep: 1|act_loss: 0.0250396728515625|cri_loss: 0.012725830078125|unsuper_loss: 0.0
+average reward score: 4.625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.43%) |Training time=0.45s (20.94%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6101|ppo_ep: 1|act_loss: 0.025787353515625|cri_loss: 0.01318359375|unsuper_loss: 0.0
+average reward score: 5.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.52%) |Training time=0.43s (19.81%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6102|ppo_ep: 1|act_loss: 0.00839996337890625|cri_loss: 0.0043182373046875|unsuper_loss: 0.0
+average reward score: 5.15234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.46%) |Training time=0.43s (19.83%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6103|ppo_ep: 1|act_loss: -0.0005388259887695312|cri_loss: 0.00017642974853515625|unsuper_loss: 0.0
+average reward score: 5.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.88%) |Training time=0.44s (20.47%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6104|ppo_ep: 1|act_loss: 0.0017223358154296875|cri_loss: 0.001003265380859375|unsuper_loss: 0.0
+average reward score: 4.6953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.70s (73.49%) |Training time=0.51s (22.14%) |Others=0.10 (4.37%)|CurSamplesPerSec=13.86 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6105|ppo_ep: 1|act_loss: 0.0120697021484375|cri_loss: 0.00655364990234375|unsuper_loss: 0.0
+average reward score: 5.53515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.03%) |Training time=0.44s (20.32%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6106|ppo_ep: 1|act_loss: -0.000453948974609375|cri_loss: 0.0004172325134277344|unsuper_loss: 0.0
+average reward score: 5.02734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.35%) |Training time=0.41s (18.98%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6107|ppo_ep: 1|act_loss: 0.0012788772583007812|cri_loss: 0.0009832382202148438|unsuper_loss: 0.0
+average reward score: 5.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.86%) |Training time=0.42s (19.38%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6108|ppo_ep: 1|act_loss: -0.0181121826171875|cri_loss: -0.0089263916015625|unsuper_loss: 0.0
+average reward score: 5.05859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.97%) |Training time=0.44s (20.36%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
+[2023-04-14 12:31:53,178] [INFO] [logging.py:96:log_dist] [Rank 0] step=6110, skipped=76, lr=[1.666062789845028e-06, 1.666062789845028e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:31:53,197] [INFO] [timer.py:199:stop] epoch=0/micro_step=6110/global_step=6110, RunningAvgSamplesPerSec=105.49268024048469, CurrSamplesPerSec=120.01878555601061, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:31:53,289] [INFO] [logging.py:96:log_dist] [Rank 0] step=6110, skipped=102, lr=[8.822426832734388e-07, 8.822426832734388e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6109|ppo_ep: 1|act_loss: 0.03955078125|cri_loss: 0.0203399658203125|unsuper_loss: 0.0
+average reward score: 5.234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.27%) |Training time=0.43s (20.06%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6110|ppo_ep: 1|act_loss: -0.0210723876953125|cri_loss: -0.01020050048828125|unsuper_loss: 0.0
+average reward score: 6.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.69%) |Training time=0.44s (20.64%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6111|ppo_ep: 1|act_loss: -0.01251220703125|cri_loss: -0.005100250244140625|unsuper_loss: 0.0
+average reward score: 5.03125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.78%) |Training time=0.44s (20.57%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6112|ppo_ep: 1|act_loss: -0.00618743896484375|cri_loss: -0.002872467041015625|unsuper_loss: 0.0
+average reward score: 5.04296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.10%) |Training time=0.43s (20.21%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6113|ppo_ep: 1|act_loss: 0.021453857421875|cri_loss: 0.0109710693359375|unsuper_loss: 0.0
+average reward score: 4.69921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.97%) |Training time=0.44s (20.36%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6114|ppo_ep: 1|act_loss: -0.00036072731018066406|cri_loss: 3.933906555175781e-05|unsuper_loss: 0.0
+average reward score: 5.62890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.72%) |Training time=0.44s (20.60%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6115|ppo_ep: 1|act_loss: -0.0131378173828125|cri_loss: -0.006378173828125|unsuper_loss: 0.0
+average reward score: 5.27734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.92%) |Training time=0.44s (20.41%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6116|ppo_ep: 1|act_loss: 0.027069091796875|cri_loss: 0.013885498046875|unsuper_loss: 0.0
+average reward score: 4.9609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.90%) |Training time=0.44s (20.42%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6117|ppo_ep: 1|act_loss: 0.03656005859375|cri_loss: 0.019195556640625|unsuper_loss: 0.0
+average reward score: 5.3828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.26%) |Training time=0.46s (21.11%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6118|ppo_ep: 1|act_loss: -0.01161956787109375|cri_loss: -0.00482177734375|unsuper_loss: 0.0
+average reward score: 5.4453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.63%) |Training time=0.45s (20.71%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.52
+[2023-04-14 12:32:14,701] [INFO] [logging.py:96:log_dist] [Rank 0] step=6120, skipped=76, lr=[1.6520447215684334e-06, 1.6520447215684334e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:32:14,705] [INFO] [timer.py:199:stop] epoch=0/micro_step=6120/global_step=6120, RunningAvgSamplesPerSec=105.50570028757365, CurrSamplesPerSec=111.2033127913316, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:32:14,868] [INFO] [logging.py:96:log_dist] [Rank 0] step=6120, skipped=102, lr=[8.749165825782657e-07, 8.749165825782657e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6119|ppo_ep: 1|act_loss: -0.0025386810302734375|cri_loss: -0.0011510848999023438|unsuper_loss: 0.0
+average reward score: 5.0234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.14%) |Training time=0.48s (21.48%) |Others=0.14 (6.38%)|CurSamplesPerSec=14.36 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6120|ppo_ep: 1|act_loss: -0.005218505859375|cri_loss: -0.0017795562744140625|unsuper_loss: 0.0
+average reward score: 5.49609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.63%) |Training time=0.45s (20.72%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6121|ppo_ep: 1|act_loss: 0.0121917724609375|cri_loss: 0.00702667236328125|unsuper_loss: 0.0
+average reward score: 5.09375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.87%) |Training time=0.44s (20.49%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6122|ppo_ep: 1|act_loss: -0.012176513671875|cri_loss: -0.00586700439453125|unsuper_loss: 0.0
+average reward score: 5.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.87%) |Training time=0.45s (20.53%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6123|ppo_ep: 1|act_loss: -0.003131866455078125|cri_loss: -0.0015077590942382812|unsuper_loss: 0.0
+average reward score: 5.08203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.78%) |Training time=0.45s (19.78%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6124|ppo_ep: 1|act_loss: 0.003070831298828125|cri_loss: 0.002010345458984375|unsuper_loss: 0.0
+average reward score: 5.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.59%) |Training time=0.45s (20.78%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6125|ppo_ep: 1|act_loss: -0.0011262893676757812|cri_loss: -7.152557373046875e-05|unsuper_loss: 0.0
+average reward score: 5.19921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.95%) |Training time=0.44s (20.38%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6126|ppo_ep: 1|act_loss: 0.0059051513671875|cri_loss: 0.003509521484375|unsuper_loss: 0.0
+average reward score: 4.73828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.75%) |Training time=0.44s (20.61%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6127|ppo_ep: 1|act_loss: -0.0007395744323730469|cri_loss: -0.0002562999725341797|unsuper_loss: 0.0
+average reward score: 4.9921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.55%) |Training time=0.45s (20.82%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6128|ppo_ep: 1|act_loss: 0.0227203369140625|cri_loss: 0.0115966796875|unsuper_loss: 0.0
+average reward score: 6.3515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.71%) |Training time=0.44s (20.63%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52
+[2023-04-14 12:32:36,432] [INFO] [logging.py:96:log_dist] [Rank 0] step=6130, skipped=76, lr=[1.6380736841434013e-06, 1.6380736841434013e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:32:36,450] [INFO] [timer.py:199:stop] epoch=0/micro_step=6130/global_step=6130, RunningAvgSamplesPerSec=105.5171589615916, CurrSamplesPerSec=112.04278099865182, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:32:36,542] [INFO] [logging.py:96:log_dist] [Rank 0] step=6130, skipped=102, lr=[8.676145695396399e-07, 8.676145695396399e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6129|ppo_ep: 1|act_loss: -0.01308441162109375|cri_loss: -0.006229400634765625|unsuper_loss: 0.0
+average reward score: 5.44921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.58%) |Training time=0.45s (20.81%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6130|ppo_ep: 1|act_loss: 0.003566741943359375|cri_loss: 0.00501251220703125|unsuper_loss: 0.0
+average reward score: 4.9453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.52%) |Training time=0.45s (20.82%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6131|ppo_ep: 1|act_loss: -0.010467529296875|cri_loss: -0.005153656005859375|unsuper_loss: 0.0
+average reward score: 6.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.16%) |Training time=0.43s (20.14%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6132|ppo_ep: 1|act_loss: 0.0159759521484375|cri_loss: 0.00836181640625|unsuper_loss: 0.0
+average reward score: 5.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.47s (21.90%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6133|ppo_ep: 1|act_loss: 0.01029205322265625|cri_loss: 0.00539398193359375|unsuper_loss: 0.0
+average reward score: 4.8046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.15%) |Training time=0.45s (21.18%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6134|ppo_ep: 1|act_loss: 0.028656005859375|cri_loss: 0.01476287841796875|unsuper_loss: 0.0
+average reward score: 5.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.57s (69.15%) |Training time=0.60s (26.43%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.06 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6135|ppo_ep: 1|act_loss: 0.00274658203125|cri_loss: 0.0015954971313476562|unsuper_loss: 0.0
+average reward score: 5.375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.51%) |Training time=0.47s (21.83%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6136|ppo_ep: 1|act_loss: -0.020721435546875|cri_loss: -0.00540924072265625|unsuper_loss: 0.0
+average reward score: 5.40234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.09%) |Training time=0.48s (22.28%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6137|ppo_ep: 1|act_loss: -0.032318115234375|cri_loss: -0.015716552734375|unsuper_loss: 0.0
+average reward score: 4.9296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.13%) |Training time=0.48s (22.21%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6138|ppo_ep: 1|act_loss: 0.0096435546875|cri_loss: 0.005260467529296875|unsuper_loss: 0.0
+average reward score: 5.15625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.47s (21.95%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52
+[2023-04-14 12:32:58,076] [INFO] [logging.py:96:log_dist] [Rank 0] step=6140, skipped=76, lr=[1.6241498846544112e-06, 1.6241498846544112e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:32:58,094] [INFO] [timer.py:199:stop] epoch=0/micro_step=6140/global_step=6140, RunningAvgSamplesPerSec=105.51132018702846, CurrSamplesPerSec=102.04126119112495, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:32:58,188] [INFO] [logging.py:96:log_dist] [Rank 0] step=6140, skipped=102, lr=[8.60336752391008e-07, 8.60336752391008e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6139|ppo_ep: 1|act_loss: 0.007755279541015625|cri_loss: 0.004730224609375|unsuper_loss: 0.0
+average reward score: 5.875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.17%) |Training time=0.48s (22.20%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6140|ppo_ep: 1|act_loss: 0.006702423095703125|cri_loss: 0.0040435791015625|unsuper_loss: 0.0
+average reward score: 5.20703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.24%) |Training time=0.48s (22.09%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6141|ppo_ep: 1|act_loss: -0.02569580078125|cri_loss: -0.012451171875|unsuper_loss: 0.0
+average reward score: 5.828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.66%) |Training time=0.47s (21.73%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6142|ppo_ep: 1|act_loss: -0.0069122314453125|cri_loss: -0.00293731689453125|unsuper_loss: 0.0
+average reward score: 5.015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.14%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6143|ppo_ep: 1|act_loss: 0.0230712890625|cri_loss: 0.0119781494140625|unsuper_loss: 0.0
+average reward score: 5.76171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.88%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6144|ppo_ep: 1|act_loss: 0.0186614990234375|cri_loss: 0.0100250244140625|unsuper_loss: 0.0
+average reward score: 6.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.11%) |Training time=0.48s (22.25%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6145|ppo_ep: 1|act_loss: -0.0276031494140625|cri_loss: -0.01342010498046875|unsuper_loss: 0.0
+average reward score: 5.58984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.87%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6146|ppo_ep: 1|act_loss: -0.0023174285888671875|cri_loss: -0.0010881423950195312|unsuper_loss: 0.0
+average reward score: 5.1015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.28%) |Training time=0.48s (22.05%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6147|ppo_ep: 1|act_loss: -0.05145263671875|cri_loss: -0.0251922607421875|unsuper_loss: 0.0
+average reward score: 5.57421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.74%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6148|ppo_ep: 1|act_loss: 0.0002200603485107422|cri_loss: 0.0001798868179321289|unsuper_loss: 0.0
+average reward score: 5.83203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.12%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.52
+[2023-04-14 12:33:19,694] [INFO] [logging.py:96:log_dist] [Rank 0] step=6150, skipped=76, lr=[1.6102735294857612e-06, 1.6102735294857612e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:33:19,711] [INFO] [timer.py:199:stop] epoch=0/micro_step=6150/global_step=6150, RunningAvgSamplesPerSec=105.50358234499453, CurrSamplesPerSec=88.18394502043337, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:33:19,859] [INFO] [logging.py:96:log_dist] [Rank 0] step=6150, skipped=102, lr=[8.530832390071778e-07, 8.530832390071778e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6149|ppo_ep: 1|act_loss: 0.0155792236328125|cri_loss: 0.008056640625|unsuper_loss: 0.0
+average reward score: 5.6640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.58s (69.94%) |Training time=0.53s (23.28%) |Others=0.15 (6.79%)|CurSamplesPerSec=14.17 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6150|ppo_ep: 1|act_loss: -0.014556884765625|cri_loss: -0.00690460205078125|unsuper_loss: 0.0
+average reward score: 5.390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.53%) |Training time=0.48s (21.80%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6151|ppo_ep: 1|act_loss: -0.001575469970703125|cri_loss: -0.0005412101745605469|unsuper_loss: 0.0
+average reward score: 5.90625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.22%) |Training time=0.42s (19.07%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6152|ppo_ep: 1|act_loss: 0.0008678436279296875|cri_loss: 0.0005321502685546875|unsuper_loss: 0.0
+average reward score: 4.9375
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.69s (75.80%) |Training time=0.43s (19.49%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.38 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6153|ppo_ep: 1|act_loss: 0.0113983154296875|cri_loss: 0.0059967041015625|unsuper_loss: 0.0
+average reward score: 5.484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.21%) |Training time=0.43s (20.03%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6154|ppo_ep: 1|act_loss: 0.027252197265625|cri_loss: 0.01384735107421875|unsuper_loss: 0.0
+average reward score: 5.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.07%) |Training time=0.46s (21.25%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6155|ppo_ep: 1|act_loss: 0.01102447509765625|cri_loss: 0.00586700439453125|unsuper_loss: 0.0
+average reward score: 5.109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.58%) |Training time=0.45s (20.70%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6156|ppo_ep: 1|act_loss: -0.01934814453125|cri_loss: -0.0091705322265625|unsuper_loss: 0.0
+average reward score: 5.5859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.83%) |Training time=0.44s (20.44%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6157|ppo_ep: 1|act_loss: -0.01885986328125|cri_loss: -0.00922393798828125|unsuper_loss: 0.0
+average reward score: 5.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.70%) |Training time=0.43s (19.58%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6158|ppo_ep: 1|act_loss: -0.0258026123046875|cri_loss: -0.01213836669921875|unsuper_loss: 0.0
+average reward score: 5.41796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.72%) |Training time=0.47s (21.64%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.52
+[2023-04-14 12:33:41,552] [INFO] [logging.py:96:log_dist] [Rank 0] step=6160, skipped=76, lr=[1.5964448243185107e-06, 1.5964448243185107e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:33:41,570] [INFO] [timer.py:199:stop] epoch=0/micro_step=6160/global_step=6160, RunningAvgSamplesPerSec=105.51397045659475, CurrSamplesPerSec=105.2155795695511, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:33:41,663] [INFO] [logging.py:96:log_dist] [Rank 0] step=6160, skipped=102, lr=[8.458541369027159e-07, 8.458541369027159e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6159|ppo_ep: 1|act_loss: 0.0109405517578125|cri_loss: 0.005832672119140625|unsuper_loss: 0.0
+average reward score: 4.83984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.83%) |Training time=0.47s (21.55%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6160|ppo_ep: 1|act_loss: 0.0102691650390625|cri_loss: 0.005405426025390625|unsuper_loss: 0.0
+average reward score: 5.4453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.32%) |Training time=0.48s (22.04%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6161|ppo_ep: 1|act_loss: -0.024322509765625|cri_loss: -0.01200103759765625|unsuper_loss: 0.0
+average reward score: 5.23046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.62%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6162|ppo_ep: 1|act_loss: -0.0107574462890625|cri_loss: -0.00484466552734375|unsuper_loss: 0.0
+average reward score: 6.30078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.00%) |Training time=0.44s (20.34%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6163|ppo_ep: 1|act_loss: 0.029083251953125|cri_loss: 0.015167236328125|unsuper_loss: 0.0
+average reward score: 5.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.81%) |Training time=0.42s (19.54%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6164|ppo_ep: 1|act_loss: 0.0117950439453125|cri_loss: 0.00669097900390625|unsuper_loss: 0.0
+average reward score: 5.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.53%) |Training time=0.45s (20.84%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6165|ppo_ep: 1|act_loss: 0.02691650390625|cri_loss: 0.01380157470703125|unsuper_loss: 0.0
+average reward score: 5.77734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.71%) |Training time=0.49s (20.97%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6166|ppo_ep: 1|act_loss: 0.00437164306640625|cri_loss: 0.00235748291015625|unsuper_loss: 0.0
+average reward score: 5.08203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.49%) |Training time=0.45s (20.88%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6167|ppo_ep: 1|act_loss: -0.00687408447265625|cri_loss: -0.0029582977294921875|unsuper_loss: 0.0
+average reward score: 5.45703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.76%) |Training time=0.45s (20.59%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6168|ppo_ep: 1|act_loss: 0.017120361328125|cri_loss: 0.0088958740234375|unsuper_loss: 0.0
+average reward score: 5.484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.53%) |Training time=0.45s (20.81%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.52
+[2023-04-14 12:34:03,411] [INFO] [logging.py:96:log_dist] [Rank 0] step=6170, skipped=76, lr=[1.5826639741274258e-06, 1.5826639741274258e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:34:03,430] [INFO] [timer.py:199:stop] epoch=0/micro_step=6170/global_step=6170, RunningAvgSamplesPerSec=105.5204895216476, CurrSamplesPerSec=110.71758783026316, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:34:03,522] [INFO] [logging.py:96:log_dist] [Rank 0] step=6170, skipped=102, lr=[8.386495532303557e-07, 8.386495532303557e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6169|ppo_ep: 1|act_loss: -0.0137786865234375|cri_loss: -0.006626129150390625|unsuper_loss: 0.0
+average reward score: 5.09375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.70%) |Training time=0.45s (20.70%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6170|ppo_ep: 1|act_loss: -0.0162811279296875|cri_loss: -0.00791168212890625|unsuper_loss: 0.0
+average reward score: 5.17578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.03%) |Training time=0.44s (20.29%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6171|ppo_ep: 1|act_loss: -0.020263671875|cri_loss: -0.0099029541015625|unsuper_loss: 0.0
+average reward score: 5.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.14%) |Training time=0.44s (20.18%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6172|ppo_ep: 1|act_loss: -0.026458740234375|cri_loss: -0.00772857666015625|unsuper_loss: 0.0
+average reward score: 5.36328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.13%) |Training time=0.44s (20.22%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6173|ppo_ep: 1|act_loss: -0.0079498291015625|cri_loss: -0.003582000732421875|unsuper_loss: 0.0
+average reward score: 5.44921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.18%) |Training time=0.44s (20.17%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6174|ppo_ep: 1|act_loss: -0.00720977783203125|cri_loss: -0.003360748291015625|unsuper_loss: 0.0
+average reward score: 4.85546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.95%) |Training time=0.44s (20.40%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6175|ppo_ep: 1|act_loss: 0.0032806396484375|cri_loss: 0.0018138885498046875|unsuper_loss: 0.0
+average reward score: 6.015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.95%) |Training time=0.44s (20.39%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6176|ppo_ep: 1|act_loss: 0.010406494140625|cri_loss: 0.005847930908203125|unsuper_loss: 0.0
+average reward score: 5.75
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.63%) |Training time=0.45s (20.71%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6177|ppo_ep: 1|act_loss: 0.001491546630859375|cri_loss: 0.0009002685546875|unsuper_loss: 0.0
+average reward score: 5.765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.40%) |Training time=0.45s (20.66%) |Others=0.11 (4.94%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6178|ppo_ep: 1|act_loss: 0.00827789306640625|cri_loss: 0.004497528076171875|unsuper_loss: 0.0
+average reward score: 5.4296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.27%) |Training time=0.46s (21.11%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.52
+[2023-04-14 12:34:25,037] [INFO] [logging.py:96:log_dist] [Rank 0] step=6180, skipped=76, lr=[1.5689311831779562e-06, 1.5689311831779562e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:34:25,056] [INFO] [timer.py:199:stop] epoch=0/micro_step=6180/global_step=6180, RunningAvgSamplesPerSec=105.53333925269833, CurrSamplesPerSec=109.81793903372979, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:34:25,148] [INFO] [logging.py:96:log_dist] [Rank 0] step=6180, skipped=102, lr=[8.314695947794054e-07, 8.314695947794054e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6179|ppo_ep: 1|act_loss: 0.0247955322265625|cri_loss: 0.012725830078125|unsuper_loss: 0.0
+average reward score: 4.7109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.36%) |Training time=0.46s (21.04%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6180|ppo_ep: 1|act_loss: -0.02691650390625|cri_loss: -0.0132904052734375|unsuper_loss: 0.0
+average reward score: 5.65234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.63s (71.82%) |Training time=0.53s (23.42%) |Others=0.11 (4.76%)|CurSamplesPerSec=14.07 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6181|ppo_ep: 1|act_loss: -0.01049041748046875|cri_loss: -0.004871368408203125|unsuper_loss: 0.0
+average reward score: 5.95703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.51%) |Training time=0.46s (20.91%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6182|ppo_ep: 1|act_loss: -8.463859558105469e-05|cri_loss: 0.00012814998626708984|unsuper_loss: 0.0
+average reward score: 5.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.72%) |Training time=0.45s (19.84%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6183|ppo_ep: 1|act_loss: -0.0137481689453125|cri_loss: -0.00630950927734375|unsuper_loss: 0.0
+average reward score: 4.7421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.68%) |Training time=0.45s (20.69%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6184|ppo_ep: 1|act_loss: -0.0121002197265625|cri_loss: -0.00557708740234375|unsuper_loss: 0.0
+average reward score: 4.9453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.58%) |Training time=0.45s (20.78%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6185|ppo_ep: 1|act_loss: 0.01291656494140625|cri_loss: 0.00736236572265625|unsuper_loss: 0.0
+average reward score: 5.94921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.87%) |Training time=0.44s (20.50%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6186|ppo_ep: 1|act_loss: 0.007572174072265625|cri_loss: 0.00428009033203125|unsuper_loss: 0.0
+average reward score: 5.45703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.82%) |Training time=0.44s (20.51%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6187|ppo_ep: 1|act_loss: 0.01485443115234375|cri_loss: 0.0076904296875|unsuper_loss: 0.0
+average reward score: 5.8828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.86%) |Training time=0.44s (20.49%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6188|ppo_ep: 1|act_loss: 0.02923583984375|cri_loss: 0.01490020751953125|unsuper_loss: 0.0
+average reward score: 5.69921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.55%) |Training time=0.45s (20.82%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.52
+[2023-04-14 12:34:46,845] [INFO] [logging.py:96:log_dist] [Rank 0] step=6190, skipped=76, lr=[1.5552466550231917e-06, 1.5552466550231917e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:34:46,863] [INFO] [timer.py:199:stop] epoch=0/micro_step=6190/global_step=6190, RunningAvgSamplesPerSec=105.54164914219182, CurrSamplesPerSec=115.26355799601869, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:34:46,947] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+[2023-04-14 12:34:46,947] [INFO] [logging.py:96:log_dist] [Rank 0] step=6190, skipped=103, lr=[8.25028774705112e-07, 8.25028774705112e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6189|ppo_ep: 1|act_loss: 0.0374755859375|cri_loss: 0.0200653076171875|unsuper_loss: 0.0
+average reward score: 4.6484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.31%) |Training time=0.44s (20.52%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
+[2023-04-14 12:34:49,092] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 6190|ppo_ep: 1|act_loss: 0.0126953125|cri_loss: 0.00658416748046875|unsuper_loss: 0.0
+average reward score: 5.40625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.20%) |Training time=0.44s (20.63%) |Others=0.09 (4.17%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6191|ppo_ep: 1|act_loss: 0.0193328857421875|cri_loss: 0.01024627685546875|unsuper_loss: 0.0
+average reward score: 5.68359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.85%) |Training time=0.44s (20.51%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6192|ppo_ep: 1|act_loss: -0.034759521484375|cri_loss: -0.0170745849609375|unsuper_loss: 0.0
+average reward score: 5.2109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.49%) |Training time=0.45s (20.85%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6193|ppo_ep: 1|act_loss: 0.015655517578125|cri_loss: 0.00811004638671875|unsuper_loss: 0.0
+average reward score: 4.49609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.89%) |Training time=0.44s (20.47%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6194|ppo_ep: 1|act_loss: -0.0287933349609375|cri_loss: -0.0138702392578125|unsuper_loss: 0.0
+average reward score: 4.96484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.39%) |Training time=0.43s (19.91%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6195|ppo_ep: 1|act_loss: 0.029052734375|cri_loss: 0.018585205078125|unsuper_loss: 0.0
+average reward score: 5.20703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.08%) |Training time=0.45s (20.21%) |Others=0.15 (6.72%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6196|ppo_ep: 1|act_loss: -0.021270751953125|cri_loss: -0.0104522705078125|unsuper_loss: 0.0
+average reward score: 5.13671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.92%) |Training time=0.44s (20.37%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6197|ppo_ep: 1|act_loss: 0.0007162094116210938|cri_loss: 0.0006031990051269531|unsuper_loss: 0.0
+average reward score: 5.33984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.91%) |Training time=0.44s (20.40%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6198|ppo_ep: 1|act_loss: -0.003345489501953125|cri_loss: -0.0012874603271484375|unsuper_loss: 0.0
+average reward score: 5.40234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.81%) |Training time=0.44s (20.51%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
+[2023-04-14 12:35:08,385] [INFO] [logging.py:96:log_dist] [Rank 0] step=6200, skipped=76, lr=[1.5416105925008481e-06, 1.5416105925008481e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:35:08,403] [INFO] [timer.py:199:stop] epoch=0/micro_step=6200/global_step=6200, RunningAvgSamplesPerSec=105.5565439468875, CurrSamplesPerSec=118.04580662921714, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:35:08,496] [INFO] [logging.py:96:log_dist] [Rank 0] step=6200, skipped=104, lr=[8.186080645980449e-07, 8.186080645980449e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6199|ppo_ep: 1|act_loss: 0.0003528594970703125|cri_loss: 0.0007967948913574219|unsuper_loss: 0.0
+average reward score: 5.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.09%) |Training time=0.43s (20.24%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6200|ppo_ep: 1|act_loss: -0.003871917724609375|cri_loss: -0.00183868408203125|unsuper_loss: 0.0
+average reward score: 5.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.73%) |Training time=0.44s (20.62%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6201|ppo_ep: 1|act_loss: 0.004852294921875|cri_loss: 0.0026836395263671875|unsuper_loss: 0.0
+average reward score: 5.0859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.91%) |Training time=0.44s (20.42%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6202|ppo_ep: 1|act_loss: -0.002704620361328125|cri_loss: -0.00090789794921875|unsuper_loss: 0.0
+average reward score: 5.484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.78%) |Training time=0.44s (20.55%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6203|ppo_ep: 1|act_loss: -9.34600830078125e-05|cri_loss: 5.0902366638183594e-05|unsuper_loss: 0.0
+average reward score: 5.6015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.49%) |Training time=0.45s (20.88%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6204|ppo_ep: 1|act_loss: -0.00811004638671875|cri_loss: -0.003986358642578125|unsuper_loss: 0.0
+average reward score: 6.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.71%) |Training time=0.44s (20.62%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6205|ppo_ep: 1|act_loss: -0.014251708984375|cri_loss: -0.006908416748046875|unsuper_loss: 0.0
+average reward score: 5.078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.14%) |Training time=0.45s (20.65%) |Others=0.11 (5.21%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6206|ppo_ep: 1|act_loss: -0.0167236328125|cri_loss: -0.00799560546875|unsuper_loss: 0.0
+average reward score: 5.50390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.64%) |Training time=0.44s (20.64%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6207|ppo_ep: 1|act_loss: 0.020660400390625|cri_loss: 0.01129913330078125|unsuper_loss: 0.0
+average reward score: 5.0234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.60%) |Training time=0.47s (21.28%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6208|ppo_ep: 1|act_loss: -0.0110626220703125|cri_loss: -0.005298614501953125|unsuper_loss: 0.0
+average reward score: 4.8359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.20%) |Training time=0.42s (19.07%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.52
+[2023-04-14 12:35:30,005] [INFO] [logging.py:96:log_dist] [Rank 0] step=6210, skipped=76, lr=[1.5280231977302697e-06, 1.5280231977302697e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:35:30,023] [INFO] [timer.py:199:stop] epoch=0/micro_step=6210/global_step=6210, RunningAvgSamplesPerSec=105.569428626769, CurrSamplesPerSec=115.34894721463071, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:35:30,115] [INFO] [logging.py:96:log_dist] [Rank 0] step=6210, skipped=104, lr=[8.114976217732126e-07, 8.114976217732126e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6209|ppo_ep: 1|act_loss: -0.007843017578125|cri_loss: -0.0037384033203125|unsuper_loss: 0.0
+average reward score: 4.86328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.97%) |Training time=0.44s (20.37%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6210|ppo_ep: 1|act_loss: 0.001140594482421875|cri_loss: 0.0009160041809082031|unsuper_loss: 0.0
+average reward score: 5.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.79s (75.64%) |Training time=0.48s (20.07%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.50 |AvgSamplesPerSec=14.52
+[2023-04-14 12:35:34,546] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 6211|ppo_ep: 1|act_loss: 0.00507354736328125|cri_loss: 0.0026645660400390625|unsuper_loss: 0.0
+average reward score: 5.3515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.66s (77.26%) |Training time=0.39s (18.05%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.52
+[2023-04-14 12:35:36,778] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 6212|ppo_ep: 1|act_loss: 0.0276031494140625|cri_loss: 0.0142974853515625|unsuper_loss: 0.0
+average reward score: 5.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.75s (78.19%) |Training time=0.39s (17.28%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.34 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6213|ppo_ep: 1|act_loss: -0.02130126953125|cri_loss: -0.01049041748046875|unsuper_loss: 0.0
+average reward score: 5.25390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.42%) |Training time=0.45s (20.84%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6214|ppo_ep: 1|act_loss: -0.0183563232421875|cri_loss: -0.00872039794921875|unsuper_loss: 0.0
+average reward score: 5.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.28%) |Training time=0.43s (19.99%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6215|ppo_ep: 1|act_loss: 0.0140380859375|cri_loss: 0.007381439208984375|unsuper_loss: 0.0
+average reward score: 5.203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.41%) |Training time=0.45s (20.87%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6216|ppo_ep: 1|act_loss: 0.002777099609375|cri_loss: 0.0017156600952148438|unsuper_loss: 0.0
+average reward score: 5.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.76%) |Training time=0.44s (20.52%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6217|ppo_ep: 1|act_loss: 0.0430908203125|cri_loss: 0.0218505859375|unsuper_loss: 0.0
+average reward score: 5.90625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.66%) |Training time=0.42s (19.60%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.52
+epoch: 0|step: 6218|ppo_ep: 1|act_loss: -0.029815673828125|cri_loss: -0.01462554931640625|unsuper_loss: 0.0
+average reward score: 5.37109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.43%) |Training time=0.42s (19.85%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.52
+[2023-04-14 12:35:51,756] [INFO] [logging.py:96:log_dist] [Rank 0] step=6220, skipped=78, lr=[1.517188458058827e-06, 1.517188458058827e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:35:51,774] [INFO] [timer.py:199:stop] epoch=0/micro_step=6220/global_step=6220, RunningAvgSamplesPerSec=105.5899408595616, CurrSamplesPerSec=112.21889660419636, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:35:51,867] [INFO] [logging.py:96:log_dist] [Rank 0] step=6220, skipped=104, lr=[8.04412206626915e-07, 8.04412206626915e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6219|ppo_ep: 1|act_loss: 0.0148468017578125|cri_loss: 0.007663726806640625|unsuper_loss: 0.0
+average reward score: 5.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.39%) |Training time=0.45s (20.92%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6220|ppo_ep: 1|act_loss: -0.00608062744140625|cri_loss: -0.0027713775634765625|unsuper_loss: 0.0
+average reward score: 5.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.70%) |Training time=0.44s (20.59%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6221|ppo_ep: 1|act_loss: -0.0115509033203125|cri_loss: -0.0054931640625|unsuper_loss: 0.0
+average reward score: 5.71875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.80%) |Training time=0.44s (20.50%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6222|ppo_ep: 1|act_loss: -0.00766754150390625|cri_loss: -0.003414154052734375|unsuper_loss: 0.0
+average reward score: 5.7421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.42%) |Training time=0.45s (20.86%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6223|ppo_ep: 1|act_loss: -0.0185546875|cri_loss: -0.00916290283203125|unsuper_loss: 0.0
+average reward score: 5.73046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.17%) |Training time=0.46s (21.15%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6224|ppo_ep: 1|act_loss: 0.0041961669921875|cri_loss: 0.0024700164794921875|unsuper_loss: 0.0
+average reward score: 5.71875
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.25%) |Training time=0.51s (23.20%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6225|ppo_ep: 1|act_loss: -0.0008344650268554688|cri_loss: 7.200241088867188e-05|unsuper_loss: 0.0
+average reward score: 5.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.59s (69.27%) |Training time=0.60s (26.34%) |Others=0.10 (4.39%)|CurSamplesPerSec=13.96 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6226|ppo_ep: 1|act_loss: 0.0102691650390625|cri_loss: 0.00655364990234375|unsuper_loss: 0.0
+average reward score: 5.6328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.62%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6227|ppo_ep: 1|act_loss: 0.0199127197265625|cri_loss: 0.0105438232421875|unsuper_loss: 0.0
+average reward score: 5.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.47s (21.90%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6228|ppo_ep: 1|act_loss: -0.008087158203125|cri_loss: -0.003635406494140625|unsuper_loss: 0.0
+average reward score: 5.31640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.69%) |Training time=0.47s (21.51%) |Others=0.10 (4.80%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
+[2023-04-14 12:36:13,489] [INFO] [logging.py:96:log_dist] [Rank 0] step=6230, skipped=78, lr=[1.5036891722776364e-06, 1.5036891722776364e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:36:13,507] [INFO] [timer.py:199:stop] epoch=0/micro_step=6230/global_step=6230, RunningAvgSamplesPerSec=105.58477727357665, CurrSamplesPerSec=102.88509972863999, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:36:13,600] [INFO] [logging.py:96:log_dist] [Rank 0] step=6230, skipped=104, lr=[7.973519241820982e-07, 7.973519241820982e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6229|ppo_ep: 1|act_loss: -0.00170135498046875|cri_loss: -0.0007963180541992188|unsuper_loss: 0.0
+average reward score: 5.5
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.88%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6230|ppo_ep: 1|act_loss: 0.00777435302734375|cri_loss: 0.004039764404296875|unsuper_loss: 0.0
+average reward score: 5.296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.57%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6231|ppo_ep: 1|act_loss: -0.0211181640625|cri_loss: -0.0089569091796875|unsuper_loss: 0.0
+average reward score: 5.6328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.86%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6232|ppo_ep: 1|act_loss: -0.026275634765625|cri_loss: -0.01280975341796875|unsuper_loss: 0.0
+average reward score: 5.24609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.24%) |Training time=0.48s (22.12%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6233|ppo_ep: 1|act_loss: -0.01097869873046875|cri_loss: -0.005405426025390625|unsuper_loss: 0.0
+average reward score: 5.98828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.24%) |Training time=0.48s (22.13%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6234|ppo_ep: 1|act_loss: 0.0022563934326171875|cri_loss: 0.0018453598022460938|unsuper_loss: 0.0
+average reward score: 5.6875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.13%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6235|ppo_ep: 1|act_loss: -0.00632476806640625|cri_loss: -0.002765655517578125|unsuper_loss: 0.0
+average reward score: 4.84765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.40%) |Training time=0.47s (21.96%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6236|ppo_ep: 1|act_loss: -0.025146484375|cri_loss: -0.0122833251953125|unsuper_loss: 0.0
+average reward score: 5.30078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (21.98%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6237|ppo_ep: 1|act_loss: -0.003505706787109375|cri_loss: -0.0015869140625|unsuper_loss: 0.0
+average reward score: 4.921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.11%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6238|ppo_ep: 1|act_loss: -0.01464080810546875|cri_loss: -0.00714874267578125|unsuper_loss: 0.0
+average reward score: 5.1171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.57%) |Training time=0.47s (21.78%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
+[2023-04-14 12:36:35,111] [INFO] [logging.py:96:log_dist] [Rank 0] step=6240, skipped=78, lr=[1.4902391163351402e-06, 1.4902391163351402e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:36:35,130] [INFO] [timer.py:199:stop] epoch=0/micro_step=6240/global_step=6240, RunningAvgSamplesPerSec=105.58013996430111, CurrSamplesPerSec=104.96740574710576, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:36:35,222] [INFO] [logging.py:96:log_dist] [Rank 0] step=6240, skipped=104, lr=[7.903168790891797e-07, 7.903168790891797e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6239|ppo_ep: 1|act_loss: 0.0259552001953125|cri_loss: 0.0141448974609375|unsuper_loss: 0.0
+average reward score: 4.9921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.58%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6240|ppo_ep: 1|act_loss: 0.01824951171875|cri_loss: 0.0101776123046875|unsuper_loss: 0.0
+average reward score: 5.96875
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.60s (70.23%) |Training time=0.58s (25.37%) |Others=0.10 (4.40%)|CurSamplesPerSec=14.05 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6241|ppo_ep: 1|act_loss: 0.02484130859375|cri_loss: 0.012664794921875|unsuper_loss: 0.0
+average reward score: 5.375
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.25%) |Training time=0.48s (21.74%) |Others=0.11 (5.01%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6242|ppo_ep: 1|act_loss: 0.01416015625|cri_loss: 0.007274627685546875|unsuper_loss: 0.0
+average reward score: 5.67578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.17%) |Training time=0.46s (21.18%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6243|ppo_ep: 1|act_loss: 0.028594970703125|cri_loss: 0.01468658447265625|unsuper_loss: 0.0
+average reward score: 5.66015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.65%) |Training time=0.44s (20.67%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6244|ppo_ep: 1|act_loss: 0.004192352294921875|cri_loss: 0.002834320068359375|unsuper_loss: 0.0
+average reward score: 5.1171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.38%) |Training time=0.45s (20.84%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6245|ppo_ep: 1|act_loss: -0.0104217529296875|cri_loss: -0.004413604736328125|unsuper_loss: 0.0
+average reward score: 5.36328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.79%) |Training time=0.44s (20.56%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6246|ppo_ep: 1|act_loss: -0.004180908203125|cri_loss: -0.0017414093017578125|unsuper_loss: 0.0
+average reward score: 5.09375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.40%) |Training time=0.45s (20.97%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6247|ppo_ep: 1|act_loss: -0.00423431396484375|cri_loss: -0.001888275146484375|unsuper_loss: 0.0
+average reward score: 5.04296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.69%) |Training time=0.45s (20.67%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6248|ppo_ep: 1|act_loss: -0.008544921875|cri_loss: -0.004192352294921875|unsuper_loss: 0.0
+average reward score: 5.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.72%) |Training time=0.45s (20.64%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
+[2023-04-14 12:36:56,876] [INFO] [logging.py:96:log_dist] [Rank 0] step=6250, skipped=78, lr=[1.4768384895936108e-06, 1.4768384895936108e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:36:56,894] [INFO] [timer.py:199:stop] epoch=0/micro_step=6250/global_step=6250, RunningAvgSamplesPerSec=105.58190779052778, CurrSamplesPerSec=111.68467475926893, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:36:56,986] [INFO] [logging.py:96:log_dist] [Rank 0] step=6250, skipped=104, lr=[7.833071756244998e-07, 7.833071756244998e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6249|ppo_ep: 1|act_loss: 0.006694793701171875|cri_loss: 0.0036468505859375|unsuper_loss: 0.0
+average reward score: 5.09375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.61%) |Training time=0.45s (20.78%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6250|ppo_ep: 1|act_loss: -0.013153076171875|cri_loss: -0.006481170654296875|unsuper_loss: 0.0
+average reward score: 5.21484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.69%) |Training time=0.45s (20.66%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6251|ppo_ep: 1|act_loss: -0.00647735595703125|cri_loss: -0.0031337738037109375|unsuper_loss: 0.0
+average reward score: 5.203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.30%) |Training time=0.46s (21.08%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6252|ppo_ep: 1|act_loss: 0.02947998046875|cri_loss: 0.01508331298828125|unsuper_loss: 0.0
+average reward score: 5.20703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.86%) |Training time=0.45s (20.54%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6253|ppo_ep: 1|act_loss: 0.004730224609375|cri_loss: 0.002590179443359375|unsuper_loss: 0.0
+average reward score: 5.40625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.08%) |Training time=0.44s (20.25%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6254|ppo_ep: 1|act_loss: 0.01178741455078125|cri_loss: 0.006591796875|unsuper_loss: 0.0
+average reward score: 5.28515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.62%) |Training time=0.45s (20.75%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6255|ppo_ep: 1|act_loss: 0.02813720703125|cri_loss: 0.01495361328125|unsuper_loss: 0.0
+average reward score: 5.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.92%) |Training time=0.44s (20.41%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6256|ppo_ep: 1|act_loss: 0.0218048095703125|cri_loss: 0.01131439208984375|unsuper_loss: 0.0
+average reward score: 6.625
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.11%) |Training time=0.46s (19.58%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.75 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6257|ppo_ep: 1|act_loss: 0.02520751953125|cri_loss: 0.01290130615234375|unsuper_loss: 0.0
+average reward score: 5.40234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.64%) |Training time=0.45s (20.71%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6258|ppo_ep: 1|act_loss: 0.001255035400390625|cri_loss: 0.000720977783203125|unsuper_loss: 0.0
+average reward score: 5.44140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.91%) |Training time=0.44s (20.41%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.53
+[2023-04-14 12:37:18,637] [INFO] [logging.py:96:log_dist] [Rank 0] step=6260, skipped=78, lr=[1.4634874906826658e-06, 1.4634874906826658e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:37:18,654] [INFO] [timer.py:199:stop] epoch=0/micro_step=6260/global_step=6260, RunningAvgSamplesPerSec=105.59232439178967, CurrSamplesPerSec=109.13119463360097, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:37:18,747] [INFO] [logging.py:96:log_dist] [Rank 0] step=6260, skipped=104, lr=[7.763229176887724e-07, 7.763229176887724e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6259|ppo_ep: 1|act_loss: 0.00228118896484375|cri_loss: 0.0017452239990234375|unsuper_loss: 0.0
+average reward score: 5.4453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.32%) |Training time=0.46s (21.07%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6260|ppo_ep: 1|act_loss: 0.00464630126953125|cri_loss: 0.0027523040771484375|unsuper_loss: 0.0
+average reward score: 5.72265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.58%) |Training time=0.45s (20.78%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6261|ppo_ep: 1|act_loss: -0.013946533203125|cri_loss: -0.0068206787109375|unsuper_loss: 0.0
+average reward score: 5.02734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.50%) |Training time=0.45s (20.86%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6262|ppo_ep: 1|act_loss: 0.0018863677978515625|cri_loss: 0.001041412353515625|unsuper_loss: 0.0
+average reward score: 5.125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.55%) |Training time=0.45s (20.79%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6263|ppo_ep: 1|act_loss: -0.0026092529296875|cri_loss: -0.000988006591796875|unsuper_loss: 0.0
+average reward score: 5.7265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.44%) |Training time=0.45s (20.92%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6264|ppo_ep: 1|act_loss: 0.00653076171875|cri_loss: 0.003620147705078125|unsuper_loss: 0.0
+average reward score: 5.6796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.71%) |Training time=0.44s (20.62%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6265|ppo_ep: 1|act_loss: -0.00452423095703125|cri_loss: -0.0022068023681640625|unsuper_loss: 0.0
+average reward score: 6.4296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.61%) |Training time=0.45s (20.75%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6266|ppo_ep: 1|act_loss: -0.0116119384765625|cri_loss: -0.005718231201171875|unsuper_loss: 0.0
+average reward score: 5.76953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.49%) |Training time=0.45s (20.87%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6267|ppo_ep: 1|act_loss: -0.0115966796875|cri_loss: -0.005588531494140625|unsuper_loss: 0.0
+average reward score: 5.59375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.69%) |Training time=0.44s (20.67%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6268|ppo_ep: 1|act_loss: -0.00019741058349609375|cri_loss: 0.00042819976806640625|unsuper_loss: 0.0
+average reward score: 6.16015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.41%) |Training time=0.45s (20.83%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+[2023-04-14 12:37:40,204] [INFO] [logging.py:96:log_dist] [Rank 0] step=6270, skipped=78, lr=[1.4501863174963161e-06, 1.4501863174963161e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:37:40,222] [INFO] [timer.py:199:stop] epoch=0/micro_step=6270/global_step=6270, RunningAvgSamplesPerSec=105.6018430806941, CurrSamplesPerSec=111.38955300678872, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:37:40,326] [INFO] [logging.py:96:log_dist] [Rank 0] step=6270, skipped=104, lr=[7.693642088055492e-07, 7.693642088055492e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6269|ppo_ep: 1|act_loss: 0.0145263671875|cri_loss: 0.007442474365234375|unsuper_loss: 0.0
+average reward score: 5.6796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.27%) |Training time=0.45s (20.72%) |Others=0.11 (5.01%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6270|ppo_ep: 1|act_loss: -0.026397705078125|cri_loss: -0.01293182373046875|unsuper_loss: 0.0
+average reward score: 5.27734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.56%) |Training time=0.48s (21.89%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6271|ppo_ep: 1|act_loss: 0.002685546875|cri_loss: 0.0016155242919921875|unsuper_loss: 0.0
+average reward score: 5.8125
+-------------------------------------------------------------------------------------
+|E2E latency=2.45s |Gather latency=0.00s (0.00%) |Generate time=1.87s (76.54%) |Training time=0.47s (19.35%) |Others=0.10 (4.11%)|CurSamplesPerSec=13.08 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6272|ppo_ep: 1|act_loss: -0.01654052734375|cri_loss: -0.0077056884765625|unsuper_loss: 0.0
+average reward score: 5.28125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.10%) |Training time=0.46s (21.28%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6273|ppo_ep: 1|act_loss: -0.0284576416015625|cri_loss: -0.01349639892578125|unsuper_loss: 0.0
+average reward score: 5.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.22%) |Training time=0.46s (21.17%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6274|ppo_ep: 1|act_loss: -0.025360107421875|cri_loss: -0.01253509521484375|unsuper_loss: 0.0
+average reward score: 6.06640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.41%) |Training time=0.45s (20.97%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6275|ppo_ep: 1|act_loss: 0.0177001953125|cri_loss: 0.009307861328125|unsuper_loss: 0.0
+average reward score: 5.69921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.19%) |Training time=0.46s (21.19%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6276|ppo_ep: 1|act_loss: 0.0011444091796875|cri_loss: 0.0008845329284667969|unsuper_loss: 0.0
+average reward score: 5.859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.13%) |Training time=0.46s (21.26%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6277|ppo_ep: 1|act_loss: 0.042236328125|cri_loss: 0.021392822265625|unsuper_loss: 0.0
+average reward score: 5.07421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.88%) |Training time=0.47s (21.51%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6278|ppo_ep: 1|act_loss: -0.0009446144104003906|cri_loss: -0.00019884109497070312|unsuper_loss: 0.0
+average reward score: 5.7421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.17%) |Training time=0.46s (21.22%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
+[2023-04-14 12:38:02,230] [INFO] [logging.py:96:log_dist] [Rank 0] step=6280, skipped=78, lr=[1.4369351671900373e-06, 1.4369351671900373e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:38:02,249] [INFO] [timer.py:199:stop] epoch=0/micro_step=6280/global_step=6280, RunningAvgSamplesPerSec=105.6027195287297, CurrSamplesPerSec=109.6824271427334, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:38:02,342] [INFO] [logging.py:96:log_dist] [Rank 0] step=6280, skipped=104, lr=[7.624311521196831e-07, 7.624311521196831e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6279|ppo_ep: 1|act_loss: 0.0102691650390625|cri_loss: 0.00617218017578125|unsuper_loss: 0.0
+average reward score: 5.51171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.30%) |Training time=0.45s (21.06%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6280|ppo_ep: 1|act_loss: 0.00545501708984375|cri_loss: 0.003635406494140625|unsuper_loss: 0.0
+average reward score: 6.10546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.88%) |Training time=0.45s (20.48%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6281|ppo_ep: 1|act_loss: -0.0016050338745117188|cri_loss: -0.00028705596923828125|unsuper_loss: 0.0
+average reward score: 6.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.49%) |Training time=0.45s (20.88%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6282|ppo_ep: 1|act_loss: 0.011962890625|cri_loss: 0.006378173828125|unsuper_loss: 0.0
+average reward score: 5.6328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.45%) |Training time=0.45s (20.91%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6283|ppo_ep: 1|act_loss: -0.0033092498779296875|cri_loss: -0.0013580322265625|unsuper_loss: 0.0
+average reward score: 5.05078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.20%) |Training time=0.46s (21.16%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6284|ppo_ep: 1|act_loss: 0.0046234130859375|cri_loss: 0.002899169921875|unsuper_loss: 0.0
+average reward score: 5.61328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.01%) |Training time=0.46s (21.26%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6285|ppo_ep: 1|act_loss: -0.03997802734375|cri_loss: -0.0195159912109375|unsuper_loss: 0.0
+average reward score: 4.953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.50%) |Training time=0.45s (20.85%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6286|ppo_ep: 1|act_loss: 0.00927734375|cri_loss: 0.004932403564453125|unsuper_loss: 0.0
+average reward score: 5.30859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.91%) |Training time=0.47s (20.44%) |Others=0.11 (4.64%)|CurSamplesPerSec=13.78 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6287|ppo_ep: 1|act_loss: -0.0032196044921875|cri_loss: -0.0010728836059570312|unsuper_loss: 0.0
+average reward score: 5.1796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.68%) |Training time=0.45s (20.68%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6288|ppo_ep: 1|act_loss: -0.00197601318359375|cri_loss: -0.0006690025329589844|unsuper_loss: 0.0
+average reward score: 5.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.30%) |Training time=0.46s (21.07%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
+[2023-04-14 12:38:24,029] [INFO] [logging.py:96:log_dist] [Rank 0] step=6290, skipped=78, lr=[1.4237342361778406e-06, 1.4237342361778406e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:38:24,048] [INFO] [timer.py:199:stop] epoch=0/micro_step=6290/global_step=6290, RunningAvgSamplesPerSec=105.6091977124146, CurrSamplesPerSec=111.14943981290935, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:38:24,140] [INFO] [logging.py:96:log_dist] [Rank 0] step=6290, skipped=104, lr=[7.555238503958001e-07, 7.555238503958001e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6289|ppo_ep: 1|act_loss: 0.033782958984375|cri_loss: 0.01788330078125|unsuper_loss: 0.0
+average reward score: 6.03125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.53%) |Training time=0.45s (20.86%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6290|ppo_ep: 1|act_loss: -0.0182647705078125|cri_loss: -0.00897979736328125|unsuper_loss: 0.0
+average reward score: 6.25
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.26%) |Training time=0.46s (21.12%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
+[2023-04-14 12:38:28,458] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 6291|ppo_ep: 1|act_loss: 0.013763427734375|cri_loss: 0.007251739501953125|unsuper_loss: 0.0
+average reward score: 6.2109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.90%) |Training time=0.45s (20.95%) |Others=0.09 (4.14%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.53
+[2023-04-14 12:38:30,610] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 6292|ppo_ep: 1|act_loss: -0.0005426406860351562|cri_loss: -1.621246337890625e-05|unsuper_loss: 0.0
+average reward score: 5.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.67%) |Training time=0.46s (21.17%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6293|ppo_ep: 1|act_loss: 0.0010776519775390625|cri_loss: 0.0010042190551757812|unsuper_loss: 0.0
+average reward score: 5.796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.28%) |Training time=0.46s (21.10%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6294|ppo_ep: 1|act_loss: 0.04022216796875|cri_loss: 0.02130126953125|unsuper_loss: 0.0
+average reward score: 4.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.41%) |Training time=0.45s (20.97%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6295|ppo_ep: 1|act_loss: -0.014068603515625|cri_loss: -0.006320953369140625|unsuper_loss: 0.0
+average reward score: 5.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.44%) |Training time=0.45s (20.93%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6296|ppo_ep: 1|act_loss: 0.002857208251953125|cri_loss: 0.001953125|unsuper_loss: 0.0
+average reward score: 5.296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.60%) |Training time=0.45s (20.76%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6297|ppo_ep: 1|act_loss: -0.0020904541015625|cri_loss: -0.0003910064697265625|unsuper_loss: 0.0
+average reward score: 5.08984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.35%) |Training time=0.46s (21.03%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6298|ppo_ep: 1|act_loss: -0.003864288330078125|cri_loss: 0.001556396484375|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.35%) |Training time=0.45s (20.94%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
+[2023-04-14 12:38:45,675] [INFO] [logging.py:96:log_dist] [Rank 0] step=6300, skipped=78, lr=[1.4105837201293704e-06, 1.4105837201293704e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:38:45,694] [INFO] [timer.py:199:stop] epoch=0/micro_step=6300/global_step=6300, RunningAvgSamplesPerSec=105.61516707150545, CurrSamplesPerSec=107.56875275999228, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:38:45,786] [INFO] [logging.py:96:log_dist] [Rank 0] step=6300, skipped=106, lr=[7.500166214034776e-07, 7.500166214034776e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6299|ppo_ep: 1|act_loss: 0.009979248046875|cri_loss: 0.00518798828125|unsuper_loss: 0.0
+average reward score: 5.9375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.32%) |Training time=0.46s (21.09%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6300|ppo_ep: 1|act_loss: 0.019439697265625|cri_loss: 0.010284423828125|unsuper_loss: 0.0
+average reward score: 4.953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.65s (73.93%) |Training time=0.45s (20.32%) |Others=0.13 (5.75%)|CurSamplesPerSec=14.35 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6301|ppo_ep: 1|act_loss: -0.00232696533203125|cri_loss: -0.00095367431640625|unsuper_loss: 0.0
+average reward score: 5.69921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.66s (73.72%) |Training time=0.49s (21.79%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6302|ppo_ep: 1|act_loss: 0.0011081695556640625|cri_loss: 0.0012903213500976562|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.54%) |Training time=0.45s (20.82%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6303|ppo_ep: 1|act_loss: -0.022674560546875|cri_loss: -0.0111541748046875|unsuper_loss: 0.0
+average reward score: 5.45703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.61%) |Training time=0.45s (20.74%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6304|ppo_ep: 1|act_loss: -0.00984954833984375|cri_loss: -0.004364013671875|unsuper_loss: 0.0
+average reward score: 6.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.52%) |Training time=0.45s (20.85%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6305|ppo_ep: 1|act_loss: -0.0477294921875|cri_loss: -0.02325439453125|unsuper_loss: 0.0
+average reward score: 5.34765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.43%) |Training time=0.45s (20.94%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6306|ppo_ep: 1|act_loss: -0.0142974853515625|cri_loss: -0.007049560546875|unsuper_loss: 0.0
+average reward score: 5.67578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.67%) |Training time=0.45s (20.69%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6307|ppo_ep: 1|act_loss: 0.00475311279296875|cri_loss: 0.002620697021484375|unsuper_loss: 0.0
+average reward score: 5.78515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.35%) |Training time=0.46s (21.01%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6308|ppo_ep: 1|act_loss: 0.0255126953125|cri_loss: 0.01300811767578125|unsuper_loss: 0.0
+average reward score: 5.3046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.33%) |Training time=0.45s (20.99%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
+[2023-04-14 12:39:07,456] [INFO] [logging.py:96:log_dist] [Rank 0] step=6310, skipped=78, lr=[1.3974838139670003e-06, 1.3974838139670003e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:39:07,475] [INFO] [timer.py:199:stop] epoch=0/micro_step=6310/global_step=6310, RunningAvgSamplesPerSec=105.62331246396326, CurrSamplesPerSec=111.24064007134373, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:39:07,567] [INFO] [logging.py:96:log_dist] [Rank 0] step=6310, skipped=106, lr=[7.431559363585e-07, 7.431559363585e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6309|ppo_ep: 1|act_loss: 0.0016880035400390625|cri_loss: 0.0015850067138671875|unsuper_loss: 0.0
+average reward score: 5.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.52%) |Training time=0.45s (20.83%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6310|ppo_ep: 1|act_loss: 0.017578125|cri_loss: 0.0090484619140625|unsuper_loss: 0.0
+average reward score: 5.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.51%) |Training time=0.45s (20.83%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6311|ppo_ep: 1|act_loss: 0.087646484375|cri_loss: 0.044891357421875|unsuper_loss: 0.0
+average reward score: 5.66796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.30%) |Training time=0.45s (21.00%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6312|ppo_ep: 1|act_loss: 0.0124053955078125|cri_loss: 0.006786346435546875|unsuper_loss: 0.0
+average reward score: 5.42578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.37%) |Training time=0.45s (20.98%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+[2023-04-14 12:39:16,100] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 6313|ppo_ep: 1|act_loss: 0.0188751220703125|cri_loss: 0.00971221923828125|unsuper_loss: 0.0
+average reward score: 5.96875
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.45%) |Training time=0.42s (19.87%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.53
+[2023-04-14 12:39:18,241] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 6314|ppo_ep: 1|act_loss: 0.03717041015625|cri_loss: 0.019134521484375|unsuper_loss: 0.0
+average reward score: 6.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.14%) |Training time=0.43s (19.97%) |Others=0.10 (4.89%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6315|ppo_ep: 1|act_loss: 0.008331298828125|cri_loss: 0.004657745361328125|unsuper_loss: 0.0
+average reward score: 6.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.86%) |Training time=0.44s (20.43%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.53
+[2023-04-14 12:39:22,730] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
+epoch: 0|step: 6316|ppo_ep: 1|act_loss: 0.00103759765625|cri_loss: 0.0007176399230957031|unsuper_loss: 0.0
+average reward score: 5.828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.84s (79.41%) |Training time=0.37s (16.09%) |Others=0.10 (4.50%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6317|ppo_ep: 1|act_loss: 0.035888671875|cri_loss: 0.01885986328125|unsuper_loss: 0.0
+average reward score: 5.78515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.86%) |Training time=0.40s (18.42%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6318|ppo_ep: 1|act_loss: 0.0038299560546875|cri_loss: 0.0024509429931640625|unsuper_loss: 0.0
+average reward score: 4.94140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.94%) |Training time=0.46s (21.41%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
+[2023-04-14 12:39:29,229] [INFO] [logging.py:96:log_dist] [Rank 0] step=6320, skipped=81, lr=[1.3883440965450009e-06, 1.3883440965450009e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:39:29,247] [INFO] [timer.py:199:stop] epoch=0/micro_step=6320/global_step=6320, RunningAvgSamplesPerSec=105.64005873369146, CurrSamplesPerSec=108.1665674061908, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:39:29,341] [INFO] [logging.py:96:log_dist] [Rank 0] step=6320, skipped=106, lr=[7.363212919807011e-07, 7.363212919807011e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6319|ppo_ep: 1|act_loss: -0.0096893310546875|cri_loss: -0.004680633544921875|unsuper_loss: 0.0
+average reward score: 5.80078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.25%) |Training time=0.46s (21.10%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6320|ppo_ep: 1|act_loss: -0.06494140625|cri_loss: -0.031707763671875|unsuper_loss: 0.0
+average reward score: 5.1640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.31%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6321|ppo_ep: 1|act_loss: -0.02008056640625|cri_loss: -0.0095672607421875|unsuper_loss: 0.0
+average reward score: 4.859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.56%) |Training time=0.45s (20.77%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6322|ppo_ep: 1|act_loss: -0.02349853515625|cri_loss: -0.0114288330078125|unsuper_loss: 0.0
+average reward score: 5.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.68%) |Training time=0.44s (20.51%) |Others=0.10 (4.80%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6323|ppo_ep: 1|act_loss: -0.0170745849609375|cri_loss: -0.00827789306640625|unsuper_loss: 0.0
+average reward score: 5.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.03%) |Training time=0.46s (21.33%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6324|ppo_ep: 1|act_loss: -0.0194549560546875|cri_loss: -0.00948333740234375|unsuper_loss: 0.0
+average reward score: 5.41796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.99%) |Training time=0.47s (21.37%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6325|ppo_ep: 1|act_loss: -0.0080718994140625|cri_loss: -0.00390625|unsuper_loss: 0.0
+average reward score: 5.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.29%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6326|ppo_ep: 1|act_loss: 0.024169921875|cri_loss: 0.0124053955078125|unsuper_loss: 0.0
+average reward score: 5.6640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.53%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6327|ppo_ep: 1|act_loss: 0.0140838623046875|cri_loss: 0.007419586181640625|unsuper_loss: 0.0
+average reward score: 5.3828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.36%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6328|ppo_ep: 1|act_loss: 0.037841796875|cri_loss: 0.0194091796875|unsuper_loss: 0.0
+average reward score: 5.9453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.82%) |Training time=0.47s (21.52%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
+[2023-04-14 12:39:50,932] [INFO] [logging.py:96:log_dist] [Rank 0] step=6330, skipped=81, lr=[1.3753306724110857e-06, 1.3753306724110857e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:39:50,951] [INFO] [timer.py:199:stop] epoch=0/micro_step=6330/global_step=6330, RunningAvgSamplesPerSec=105.64268490460414, CurrSamplesPerSec=102.43962470224307, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:39:51,059] [INFO] [logging.py:96:log_dist] [Rank 0] step=6330, skipped=106, lr=[7.29512789575999e-07, 7.29512789575999e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6329|ppo_ep: 1|act_loss: 0.0162200927734375|cri_loss: 0.00836944580078125|unsuper_loss: 0.0
+average reward score: 5.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.32%) |Training time=0.48s (21.44%) |Others=0.12 (5.24%)|CurSamplesPerSec=14.43 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6330|ppo_ep: 1|act_loss: 0.0054931640625|cri_loss: 0.0031185150146484375|unsuper_loss: 0.0
+average reward score: 5.328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.42%) |Training time=0.45s (20.12%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.15 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6331|ppo_ep: 1|act_loss: -0.003711700439453125|cri_loss: -0.0015411376953125|unsuper_loss: 0.0
+average reward score: 5.39453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.31%) |Training time=0.45s (19.34%) |Others=0.10 (4.35%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6332|ppo_ep: 1|act_loss: 0.01558685302734375|cri_loss: 0.00820159912109375|unsuper_loss: 0.0
+average reward score: 5.1015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.46s (21.13%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6333|ppo_ep: 1|act_loss: 0.0157623291015625|cri_loss: 0.00823974609375|unsuper_loss: 0.0
+average reward score: 5.14453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.31%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6334|ppo_ep: 1|act_loss: -0.005840301513671875|cri_loss: -0.002635955810546875|unsuper_loss: 0.0
+average reward score: 5.44921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.77%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6335|ppo_ep: 1|act_loss: -0.0248260498046875|cri_loss: -0.01216888427734375|unsuper_loss: 0.0
+average reward score: 5.03125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.16%) |Training time=0.46s (21.21%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6336|ppo_ep: 1|act_loss: -0.03643798828125|cri_loss: -0.0167236328125|unsuper_loss: 0.0
+average reward score: 5.16015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.70%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6337|ppo_ep: 1|act_loss: -0.00493621826171875|cri_loss: -0.0005397796630859375|unsuper_loss: 0.0
+average reward score: 5.70703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.67%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+[2023-04-14 12:40:10,666] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192
+epoch: 0|step: 6338|ppo_ep: 1|act_loss: 0.005001068115234375|cri_loss: 0.0028247833251953125|unsuper_loss: 0.0
+average reward score: 5.953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.45%) |Training time=0.45s (20.89%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.53
+[2023-04-14 12:40:12,806] [INFO] [logging.py:96:log_dist] [Rank 0] step=6340, skipped=82, lr=[1.3636623034290526e-06, 1.3636623034290526e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:40:12,824] [INFO] [timer.py:199:stop] epoch=0/micro_step=6340/global_step=6340, RunningAvgSamplesPerSec=105.64557854690106, CurrSamplesPerSec=105.26971448381123, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:40:12,917] [INFO] [logging.py:96:log_dist] [Rank 0] step=6340, skipped=106, lr=[7.227305300628223e-07, 7.227305300628223e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6339|ppo_ep: 1|act_loss: -0.0091400146484375|cri_loss: -0.004283905029296875|unsuper_loss: 0.0
+average reward score: 5.25390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.63%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6340|ppo_ep: 1|act_loss: -0.0142822265625|cri_loss: -0.007076263427734375|unsuper_loss: 0.0
+average reward score: 5.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.87%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6341|ppo_ep: 1|act_loss: 0.003917694091796875|cri_loss: 0.00240325927734375|unsuper_loss: 0.0
+average reward score: 4.93359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.12%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6342|ppo_ep: 1|act_loss: -0.0396728515625|cri_loss: -0.0185089111328125|unsuper_loss: 0.0
+average reward score: 5.23828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.14%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6343|ppo_ep: 1|act_loss: -0.007843017578125|cri_loss: -0.003810882568359375|unsuper_loss: 0.0
+average reward score: 5.45703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.18%) |Training time=0.48s (22.19%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6344|ppo_ep: 1|act_loss: 0.00955963134765625|cri_loss: 0.005218505859375|unsuper_loss: 0.0
+average reward score: 5.94921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.80%) |Training time=0.47s (21.58%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6345|ppo_ep: 1|act_loss: 0.01165771484375|cri_loss: 0.006011962890625|unsuper_loss: 0.0
+average reward score: 5.3046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.23%) |Training time=0.48s (21.42%) |Others=0.16 (7.35%)|CurSamplesPerSec=14.34 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6346|ppo_ep: 1|act_loss: 0.035186767578125|cri_loss: 0.01885986328125|unsuper_loss: 0.0
+average reward score: 5.18359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.67s (74.30%) |Training time=0.48s (21.24%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.23 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6347|ppo_ep: 1|act_loss: -0.0005655288696289062|cri_loss: -0.00018525123596191406|unsuper_loss: 0.0
+average reward score: 4.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.74%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6348|ppo_ep: 1|act_loss: 0.0498046875|cri_loss: 0.0255126953125|unsuper_loss: 0.0
+average reward score: 5.49609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (21.98%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.53
+[2023-04-14 12:40:34,627] [INFO] [logging.py:96:log_dist] [Rank 0] step=6350, skipped=82, lr=[1.3507461951905125e-06, 1.3507461951905125e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:40:34,645] [INFO] [timer.py:199:stop] epoch=0/micro_step=6350/global_step=6350, RunningAvgSamplesPerSec=105.63979259857506, CurrSamplesPerSec=102.75269498806098, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:40:34,738] [INFO] [logging.py:96:log_dist] [Rank 0] step=6350, skipped=106, lr=[7.159746139706194e-07, 7.159746139706194e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6349|ppo_ep: 1|act_loss: -0.00492095947265625|cri_loss: -0.0022411346435546875|unsuper_loss: 0.0
+average reward score: 5.87890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.91%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6350|ppo_ep: 1|act_loss: -0.01776123046875|cri_loss: -0.0073699951171875|unsuper_loss: 0.0
+average reward score: 4.90234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.83%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6351|ppo_ep: 1|act_loss: 0.020477294921875|cri_loss: 0.01078033447265625|unsuper_loss: 0.0
+average reward score: 4.96484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.82%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6352|ppo_ep: 1|act_loss: 0.053955078125|cri_loss: 0.0275115966796875|unsuper_loss: 0.0
+average reward score: 4.65625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.76%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6353|ppo_ep: 1|act_loss: 0.04095458984375|cri_loss: 0.021240234375|unsuper_loss: 0.0
+average reward score: 5.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.47s (21.86%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6354|ppo_ep: 1|act_loss: -0.0173492431640625|cri_loss: -0.00838470458984375|unsuper_loss: 0.0
+average reward score: 5.28125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.47s (21.82%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6355|ppo_ep: 1|act_loss: -0.022491455078125|cri_loss: -0.01107025146484375|unsuper_loss: 0.0
+average reward score: 5.1953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.82%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6356|ppo_ep: 1|act_loss: 0.01422882080078125|cri_loss: 0.00811767578125|unsuper_loss: 0.0
+average reward score: 5.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.31%) |Training time=0.48s (22.08%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6357|ppo_ep: 1|act_loss: 0.045135498046875|cri_loss: 0.0232696533203125|unsuper_loss: 0.0
+average reward score: 5.5703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.47s (21.89%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6358|ppo_ep: 1|act_loss: -0.01306915283203125|cri_loss: -0.00640106201171875|unsuper_loss: 0.0
+average reward score: 5.328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.42%) |Training time=0.48s (21.99%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.53
+[2023-04-14 12:40:56,326] [INFO] [logging.py:96:log_dist] [Rank 0] step=6360, skipped=82, lr=[1.3378815837745404e-06, 1.3378815837745404e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:40:56,344] [INFO] [timer.py:199:stop] epoch=0/micro_step=6360/global_step=6360, RunningAvgSamplesPerSec=105.63503569608532, CurrSamplesPerSec=101.74754325784137, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:40:56,437] [INFO] [logging.py:96:log_dist] [Rank 0] step=6360, skipped=106, lr=[7.092451414383644e-07, 7.092451414383644e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6359|ppo_ep: 1|act_loss: 0.031951904296875|cri_loss: 0.0164794921875|unsuper_loss: 0.0
+average reward score: 5.8359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.80%) |Training time=0.48s (21.65%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6360|ppo_ep: 1|act_loss: 0.0017070770263671875|cri_loss: 0.00101470947265625|unsuper_loss: 0.0
+average reward score: 5.47265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.71s (73.05%) |Training time=0.53s (22.65%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6361|ppo_ep: 1|act_loss: 0.020721435546875|cri_loss: 0.01094818115234375|unsuper_loss: 0.0
+average reward score: 5.921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.47s (21.96%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6362|ppo_ep: 1|act_loss: 0.03717041015625|cri_loss: 0.0189056396484375|unsuper_loss: 0.0
+average reward score: 6.07421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.27%) |Training time=0.48s (22.10%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6363|ppo_ep: 1|act_loss: 0.008636474609375|cri_loss: 0.00461578369140625|unsuper_loss: 0.0
+average reward score: 5.41015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.33%) |Training time=0.48s (22.10%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6364|ppo_ep: 1|act_loss: 0.0145111083984375|cri_loss: 0.00763702392578125|unsuper_loss: 0.0
+average reward score: 4.7109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.29%) |Training time=0.48s (22.09%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6365|ppo_ep: 1|act_loss: -0.01392364501953125|cri_loss: -0.006816864013671875|unsuper_loss: 0.0
+average reward score: 5.21484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (21.97%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6366|ppo_ep: 1|act_loss: -0.023895263671875|cri_loss: -0.0116729736328125|unsuper_loss: 0.0
+average reward score: 5.3515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (22.01%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6367|ppo_ep: 1|act_loss: 0.002269744873046875|cri_loss: 0.001384735107421875|unsuper_loss: 0.0
+average reward score: 5.75
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (22.01%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6368|ppo_ep: 1|act_loss: 0.04046630859375|cri_loss: 0.022247314453125|unsuper_loss: 0.0
+average reward score: 5.83203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.22%) |Training time=0.48s (22.17%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.53
+[2023-04-14 12:41:18,188] [INFO] [logging.py:96:log_dist] [Rank 0] step=6370, skipped=82, lr=[1.3250686598657134e-06, 1.3250686598657134e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:41:18,206] [INFO] [timer.py:199:stop] epoch=0/micro_step=6370/global_step=6370, RunningAvgSamplesPerSec=105.62690440321303, CurrSamplesPerSec=102.08720838251938, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:41:18,299] [INFO] [logging.py:96:log_dist] [Rank 0] step=6370, skipped=106, lr=[7.025422122130748e-07, 7.025422122130748e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6369|ppo_ep: 1|act_loss: -0.00763702392578125|cri_loss: -0.003749847412109375|unsuper_loss: 0.0
+average reward score: 5.56640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (21.99%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6370|ppo_ep: 1|act_loss: 0.00490570068359375|cri_loss: 0.003025054931640625|unsuper_loss: 0.0
+average reward score: 5.6796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.28%) |Training time=0.48s (22.10%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6371|ppo_ep: 1|act_loss: 0.0430908203125|cri_loss: 0.023895263671875|unsuper_loss: 0.0
+average reward score: 5.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.79%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6372|ppo_ep: 1|act_loss: 0.001590728759765625|cri_loss: 0.001018524169921875|unsuper_loss: 0.0
+average reward score: 5.15234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.34%) |Training time=0.48s (22.05%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6373|ppo_ep: 1|act_loss: -0.00396728515625|cri_loss: -0.0017547607421875|unsuper_loss: 0.0
+average reward score: 5.66015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.84%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6374|ppo_ep: 1|act_loss: 0.0047454833984375|cri_loss: 0.002521514892578125|unsuper_loss: 0.0
+average reward score: 5.64453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.99%) |Training time=0.46s (21.35%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6375|ppo_ep: 1|act_loss: -0.0148773193359375|cri_loss: -0.007122039794921875|unsuper_loss: 0.0
+average reward score: 4.546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.57%) |Training time=0.47s (20.13%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.63 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6376|ppo_ep: 1|act_loss: -0.0074920654296875|cri_loss: -0.0035533905029296875|unsuper_loss: 0.0
+average reward score: 5.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.47s (21.95%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6377|ppo_ep: 1|act_loss: -0.0041961669921875|cri_loss: -0.0018596649169921875|unsuper_loss: 0.0
+average reward score: 5.05078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.29%) |Training time=0.48s (22.09%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6378|ppo_ep: 1|act_loss: -0.0028533935546875|cri_loss: -0.001171112060546875|unsuper_loss: 0.0
+average reward score: 5.5703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.50%) |Training time=0.47s (21.85%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.53
+[2023-04-14 12:41:40,024] [INFO] [logging.py:96:log_dist] [Rank 0] step=6380, skipped=82, lr=[1.3123076133824706e-06, 1.3123076133824706e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:41:40,042] [INFO] [timer.py:199:stop] epoch=0/micro_step=6380/global_step=6380, RunningAvgSamplesPerSec=105.62234845103798, CurrSamplesPerSec=103.69900664528576, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:41:40,135] [INFO] [logging.py:96:log_dist] [Rank 0] step=6380, skipped=106, lr=[6.958659256483305e-07, 6.958659256483305e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6379|ppo_ep: 1|act_loss: -0.0093841552734375|cri_loss: -0.004528045654296875|unsuper_loss: 0.0
+average reward score: 5.49609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.82%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6380|ppo_ep: 1|act_loss: -0.0149078369140625|cri_loss: -0.00719451904296875|unsuper_loss: 0.0
+average reward score: 4.71484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.80%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6381|ppo_ep: 1|act_loss: -0.0292510986328125|cri_loss: -0.01401519775390625|unsuper_loss: 0.0
+average reward score: 5.12109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.52%) |Training time=0.47s (21.80%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6382|ppo_ep: 1|act_loss: -0.0028247833251953125|cri_loss: -0.0012359619140625|unsuper_loss: 0.0
+average reward score: 5.76953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.87%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6383|ppo_ep: 1|act_loss: -0.003734588623046875|cri_loss: -0.0015010833740234375|unsuper_loss: 0.0
+average reward score: 5.01953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.78%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6384|ppo_ep: 1|act_loss: -0.0234222412109375|cri_loss: -0.01132965087890625|unsuper_loss: 0.0
+average reward score: 5.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.13%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6385|ppo_ep: 1|act_loss: -0.020721435546875|cri_loss: -0.010101318359375|unsuper_loss: 0.0
+average reward score: 4.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.84%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6386|ppo_ep: 1|act_loss: 0.003948211669921875|cri_loss: 0.0021457672119140625|unsuper_loss: 0.0
+average reward score: 6.00390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.47s (21.99%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6387|ppo_ep: 1|act_loss: 0.0122222900390625|cri_loss: 0.00632476806640625|unsuper_loss: 0.0
+average reward score: 5.828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.48s (21.97%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6388|ppo_ep: 1|act_loss: 0.021087646484375|cri_loss: 0.01094818115234375|unsuper_loss: 0.0
+average reward score: 5.23828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.47s (21.84%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
+[2023-04-14 12:42:01,739] [INFO] [logging.py:96:log_dist] [Rank 0] step=6390, skipped=82, lr=[1.299598633474306e-06, 1.299598633474306e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:42:01,758] [INFO] [timer.py:199:stop] epoch=0/micro_step=6390/global_step=6390, RunningAvgSamplesPerSec=105.61863925776754, CurrSamplesPerSec=104.40157717345564, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:42:01,851] [INFO] [logging.py:96:log_dist] [Rank 0] step=6390, skipped=106, lr=[6.892163807028043e-07, 6.892163807028043e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6389|ppo_ep: 1|act_loss: -0.0061798095703125|cri_loss: -0.00295257568359375|unsuper_loss: 0.0
+average reward score: 5.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.94%) |Training time=0.47s (20.66%) |Others=0.10 (4.40%)|CurSamplesPerSec=14.07 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6390|ppo_ep: 1|act_loss: 0.0626220703125|cri_loss: 0.03173828125|unsuper_loss: 0.0
+average reward score: 5.11328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.54%) |Training time=0.48s (20.19%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.57 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6391|ppo_ep: 1|act_loss: 0.016357421875|cri_loss: 0.0083770751953125|unsuper_loss: 0.0
+average reward score: 5.328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.39%) |Training time=0.47s (21.95%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6392|ppo_ep: 1|act_loss: 0.0127716064453125|cri_loss: 0.0069580078125|unsuper_loss: 0.0
+average reward score: 5.30078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.11%) |Training time=0.48s (22.27%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+[2023-04-14 12:42:10,680] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 6393|ppo_ep: 1|act_loss: 0.0075225830078125|cri_loss: 0.004016876220703125|unsuper_loss: 0.0
+average reward score: 5.94921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.60%) |Training time=0.48s (22.23%) |Others=0.09 (4.17%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.53
+[2023-04-14 12:42:12,838] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 6394|ppo_ep: 1|act_loss: -0.0056915283203125|cri_loss: -0.0023746490478515625|unsuper_loss: 0.0
+average reward score: 5.25390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.49s (22.54%) |Others=0.09 (4.14%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6395|ppo_ep: 1|act_loss: -0.0128631591796875|cri_loss: -0.005878448486328125|unsuper_loss: 0.0
+average reward score: 5.80859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.04%) |Training time=0.48s (22.35%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6396|ppo_ep: 1|act_loss: 0.03961181640625|cri_loss: 0.0205078125|unsuper_loss: 0.0
+average reward score: 5.4921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.26%) |Training time=0.48s (22.10%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6397|ppo_ep: 1|act_loss: -0.0031223297119140625|cri_loss: -0.0006008148193359375|unsuper_loss: 0.0
+average reward score: 5.31640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.86%) |Training time=0.49s (22.51%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6398|ppo_ep: 1|act_loss: -0.008331298828125|cri_loss: -0.0038623809814453125|unsuper_loss: 0.0
+average reward score: 5.1015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.29%) |Training time=0.48s (22.08%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.53
+[2023-04-14 12:42:23,529] [INFO] [logging.py:96:log_dist] [Rank 0] step=6400, skipped=82, lr=[1.286941908518962e-06, 1.286941908518962e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:42:23,547] [INFO] [timer.py:199:stop] epoch=0/micro_step=6400/global_step=6400, RunningAvgSamplesPerSec=105.61073946158585, CurrSamplesPerSec=101.53287430195913, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:42:23,640] [INFO] [logging.py:96:log_dist] [Rank 0] step=6400, skipped=108, lr=[6.839160649594401e-07, 6.839160649594401e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6399|ppo_ep: 1|act_loss: -0.0230865478515625|cri_loss: -0.01132965087890625|unsuper_loss: 0.0
+average reward score: 4.8046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.26%) |Training time=0.48s (22.13%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6400|ppo_ep: 1|act_loss: -0.00251007080078125|cri_loss: -0.0011920928955078125|unsuper_loss: 0.0
+average reward score: 5.83984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.25%) |Training time=0.48s (22.10%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6401|ppo_ep: 1|act_loss: 0.01190185546875|cri_loss: 0.006153106689453125|unsuper_loss: 0.0
+average reward score: 4.70703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.88%) |Training time=0.49s (22.51%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6402|ppo_ep: 1|act_loss: -0.0122222900390625|cri_loss: -0.00583648681640625|unsuper_loss: 0.0
+average reward score: 5.12109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.25%) |Training time=0.48s (22.10%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6403|ppo_ep: 1|act_loss: -0.01561737060546875|cri_loss: -0.00749969482421875|unsuper_loss: 0.0
+average reward score: 5.87890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.47s (21.96%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6404|ppo_ep: 1|act_loss: 0.018951416015625|cri_loss: 0.009796142578125|unsuper_loss: 0.0
+average reward score: 5.26171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.94%) |Training time=0.49s (22.42%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6405|ppo_ep: 1|act_loss: -0.0061798095703125|cri_loss: -0.0029392242431640625|unsuper_loss: 0.0
+average reward score: 5.84765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.40%) |Training time=0.53s (23.90%) |Others=0.11 (4.70%)|CurSamplesPerSec=14.33 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6406|ppo_ep: 1|act_loss: 0.00415802001953125|cri_loss: 0.0023365020751953125|unsuper_loss: 0.0
+average reward score: 4.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.13%) |Training time=0.48s (22.25%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6407|ppo_ep: 1|act_loss: -0.00318145751953125|cri_loss: -0.0015420913696289062|unsuper_loss: 0.0
+average reward score: 6.234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.35%) |Training time=0.48s (22.04%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6408|ppo_ep: 1|act_loss: 0.0098114013671875|cri_loss: 0.005237579345703125|unsuper_loss: 0.0
+average reward score: 5.45703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.14%) |Training time=0.48s (22.22%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+[2023-04-14 12:42:45,229] [INFO] [logging.py:96:log_dist] [Rank 0] step=6410, skipped=82, lr=[1.2743376261196346e-06, 1.2743376261196346e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:42:45,248] [INFO] [timer.py:199:stop] epoch=0/micro_step=6410/global_step=6410, RunningAvgSamplesPerSec=105.60036418493905, CurrSamplesPerSec=102.12643591575606, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:42:45,340] [INFO] [logging.py:96:log_dist] [Rank 0] step=6410, skipped=108, lr=[6.77314903038199e-07, 6.77314903038199e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6409|ppo_ep: 1|act_loss: -0.0075836181640625|cri_loss: -0.0036602020263671875|unsuper_loss: 0.0
+average reward score: 5.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.02%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6410|ppo_ep: 1|act_loss: 0.05438232421875|cri_loss: 0.031829833984375|unsuper_loss: 0.0
+average reward score: 5.4453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.30%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6411|ppo_ep: 1|act_loss: -0.015777587890625|cri_loss: -0.007659912109375|unsuper_loss: 0.0
+average reward score: 5.47265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.98%) |Training time=0.49s (22.39%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6412|ppo_ep: 1|act_loss: 0.059295654296875|cri_loss: 0.0310211181640625|unsuper_loss: 0.0
+average reward score: 5.64453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.47s (21.93%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6413|ppo_ep: 1|act_loss: -0.0099945068359375|cri_loss: -0.004230499267578125|unsuper_loss: 0.0
+average reward score: 5.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.80%) |Training time=0.49s (22.61%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6414|ppo_ep: 1|act_loss: -0.0134124755859375|cri_loss: -0.006267547607421875|unsuper_loss: 0.0
+average reward score: 5.87109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.20%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6415|ppo_ep: 1|act_loss: 0.029022216796875|cri_loss: 0.01480865478515625|unsuper_loss: 0.0
+average reward score: 5.43359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.17%) |Training time=0.48s (22.19%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6416|ppo_ep: 1|act_loss: -0.0063018798828125|cri_loss: -0.003021240234375|unsuper_loss: 0.0
+average reward score: 5.53515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.24%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6417|ppo_ep: 1|act_loss: 0.0084686279296875|cri_loss: 0.004512786865234375|unsuper_loss: 0.0
+average reward score: 5.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.23%) |Training time=0.48s (22.15%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6418|ppo_ep: 1|act_loss: 0.00885009765625|cri_loss: 0.005138397216796875|unsuper_loss: 0.0
+average reward score: 4.6484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.63%) |Training time=0.48s (21.50%) |Others=0.11 (4.88%)|CurSamplesPerSec=14.38 |AvgSamplesPerSec=14.53
+[2023-04-14 12:43:07,049] [INFO] [logging.py:96:log_dist] [Rank 0] step=6420, skipped=82, lr=[1.2617859731021937e-06, 1.2617859731021937e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:43:07,067] [INFO] [timer.py:199:stop] epoch=0/micro_step=6420/global_step=6420, RunningAvgSamplesPerSec=105.59150460729504, CurrSamplesPerSec=99.78976137635176, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:43:07,160] [INFO] [logging.py:96:log_dist] [Rank 0] step=6420, skipped=108, lr=[6.707407577070841e-07, 6.707407577070841e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6419|ppo_ep: 1|act_loss: 0.057830810546875|cri_loss: 0.030792236328125|unsuper_loss: 0.0
+average reward score: 5.1328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.31%) |Training time=0.48s (21.31%) |Others=0.10 (4.38%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6420|ppo_ep: 1|act_loss: 0.03582763671875|cri_loss: 0.019805908203125|unsuper_loss: 0.0
+average reward score: 4.6484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.81%) |Training time=0.49s (22.59%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6421|ppo_ep: 1|act_loss: -0.001499176025390625|cri_loss: -0.0002970695495605469|unsuper_loss: 0.0
+average reward score: 4.9140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.47%) |Training time=0.47s (20.25%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6422|ppo_ep: 1|act_loss: 0.024444580078125|cri_loss: 0.013214111328125|unsuper_loss: 0.0
+average reward score: 5.203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.47s (21.93%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6423|ppo_ep: 1|act_loss: 0.00295257568359375|cri_loss: 0.0017480850219726562|unsuper_loss: 0.0
+average reward score: 5.3671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.79%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6424|ppo_ep: 1|act_loss: -0.0242156982421875|cri_loss: -0.0118255615234375|unsuper_loss: 0.0
+average reward score: 5.88671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.10%) |Training time=0.48s (22.27%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6425|ppo_ep: 1|act_loss: 0.0140228271484375|cri_loss: 0.007411956787109375|unsuper_loss: 0.0
+average reward score: 4.94140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.03%) |Training time=0.48s (22.22%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6426|ppo_ep: 1|act_loss: -0.0147705078125|cri_loss: -0.007007598876953125|unsuper_loss: 0.0
+average reward score: 4.24609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.47s (21.92%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6427|ppo_ep: 1|act_loss: -0.00894927978515625|cri_loss: -0.0044097900390625|unsuper_loss: 0.0
+average reward score: 5.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.22%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6428|ppo_ep: 1|act_loss: -0.016326904296875|cri_loss: -0.0078277587890625|unsuper_loss: 0.0
+average reward score: 5.37890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.04%) |Training time=0.49s (22.35%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
+[2023-04-14 12:43:28,885] [INFO] [logging.py:96:log_dist] [Rank 0] step=6430, skipped=82, lr=[1.2492871355124154e-06, 1.2492871355124154e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:43:28,903] [INFO] [timer.py:199:stop] epoch=0/micro_step=6430/global_step=6430, RunningAvgSamplesPerSec=105.58394898947627, CurrSamplesPerSec=101.49256638680498, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:43:28,996] [INFO] [logging.py:96:log_dist] [Rank 0] step=6430, skipped=108, lr=[6.641937264107868e-07, 6.641937264107868e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6429|ppo_ep: 1|act_loss: -0.0140533447265625|cri_loss: -0.00669097900390625|unsuper_loss: 0.0
+average reward score: 4.6875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.31%) |Training time=0.48s (22.09%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6430|ppo_ep: 1|act_loss: -0.015594482421875|cri_loss: -0.007366180419921875|unsuper_loss: 0.0
+average reward score: 5.703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.26%) |Training time=0.48s (22.10%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6431|ppo_ep: 1|act_loss: 0.004291534423828125|cri_loss: 0.0024585723876953125|unsuper_loss: 0.0
+average reward score: 5.0
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.47s (21.88%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6432|ppo_ep: 1|act_loss: 0.0127105712890625|cri_loss: 0.007175445556640625|unsuper_loss: 0.0
+average reward score: 5.15625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.30%) |Training time=0.48s (22.09%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6433|ppo_ep: 1|act_loss: -0.01412200927734375|cri_loss: -0.00665283203125|unsuper_loss: 0.0
+average reward score: 5.9140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.46%) |Training time=0.47s (21.93%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6434|ppo_ep: 1|act_loss: 0.0120086669921875|cri_loss: 0.00629425048828125|unsuper_loss: 0.0
+average reward score: 4.984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.82%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6435|ppo_ep: 1|act_loss: -0.0128173828125|cri_loss: -0.005954742431640625|unsuper_loss: 0.0
+average reward score: 4.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.83%) |Training time=0.47s (21.55%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6436|ppo_ep: 1|act_loss: -0.0032863616943359375|cri_loss: -0.0015459060668945312|unsuper_loss: 0.0
+average reward score: 5.62890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.60s (70.26%) |Training time=0.58s (25.32%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.06 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6437|ppo_ep: 1|act_loss: 0.00627899169921875|cri_loss: 0.0032482147216796875|unsuper_loss: 0.0
+average reward score: 4.609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.39%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6438|ppo_ep: 1|act_loss: 0.007965087890625|cri_loss: 0.005157470703125|unsuper_loss: 0.0
+average reward score: 5.69921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.35%) |Training time=0.48s (22.04%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
+[2023-04-14 12:43:50,625] [INFO] [logging.py:96:log_dist] [Rank 0] step=6440, skipped=82, lr=[1.2368412986132292e-06, 1.2368412986132292e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:43:50,643] [INFO] [timer.py:199:stop] epoch=0/micro_step=6440/global_step=6440, RunningAvgSamplesPerSec=105.5777836336627, CurrSamplesPerSec=102.76473200476545, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:43:50,736] [INFO] [logging.py:96:log_dist] [Rank 0] step=6440, skipped=108, lr=[6.576739061921056e-07, 6.576739061921056e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6439|ppo_ep: 1|act_loss: 0.02630615234375|cri_loss: 0.01354217529296875|unsuper_loss: 0.0
+average reward score: 5.30859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.44%) |Training time=0.47s (21.96%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6440|ppo_ep: 1|act_loss: -0.0191192626953125|cri_loss: -0.0086517333984375|unsuper_loss: 0.0
+average reward score: 5.04296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.75%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6441|ppo_ep: 1|act_loss: 0.00342559814453125|cri_loss: 0.0019407272338867188|unsuper_loss: 0.0
+average reward score: 5.44140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.55%) |Training time=0.47s (21.84%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6442|ppo_ep: 1|act_loss: -0.000881195068359375|cri_loss: -0.0002651214599609375|unsuper_loss: 0.0
+average reward score: 5.34375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.30%) |Training time=0.48s (22.06%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6443|ppo_ep: 1|act_loss: -0.0157928466796875|cri_loss: -0.007617950439453125|unsuper_loss: 0.0
+average reward score: 4.7421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.81%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6444|ppo_ep: 1|act_loss: -0.0029392242431640625|cri_loss: -0.0014276504516601562|unsuper_loss: 0.0
+average reward score: 5.765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.90%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6445|ppo_ep: 1|act_loss: -0.0023975372314453125|cri_loss: -0.0009541511535644531|unsuper_loss: 0.0
+average reward score: 4.9609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.37%) |Training time=0.48s (22.01%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6446|ppo_ep: 1|act_loss: 0.03143310546875|cri_loss: 0.01885986328125|unsuper_loss: 0.0
+average reward score: 4.34375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.60%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6447|ppo_ep: 1|act_loss: -0.0092315673828125|cri_loss: -0.0044708251953125|unsuper_loss: 0.0
+average reward score: 5.15625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.68%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6448|ppo_ep: 1|act_loss: -0.002841949462890625|cri_loss: 0.0002040863037109375|unsuper_loss: 0.0
+average reward score: 4.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.78%) |Training time=0.47s (20.81%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.08 |AvgSamplesPerSec=14.53
+[2023-04-14 12:44:12,338] [INFO] [logging.py:96:log_dist] [Rank 0] step=6450, skipped=82, lr=[1.224448646881958e-06, 1.224448646881958e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:44:12,356] [INFO] [timer.py:199:stop] epoch=0/micro_step=6450/global_step=6450, RunningAvgSamplesPerSec=105.5742490838407, CurrSamplesPerSec=102.64236918076665, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:44:12,448] [INFO] [logging.py:96:log_dist] [Rank 0] step=6450, skipped=108, lr=[6.511813936905043e-07, 6.511813936905043e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6449|ppo_ep: 1|act_loss: 0.00443267822265625|cri_loss: 0.002407073974609375|unsuper_loss: 0.0
+average reward score: 5.19921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.47s (22.01%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6450|ppo_ep: 1|act_loss: 0.01702880859375|cri_loss: 0.00946807861328125|unsuper_loss: 0.0
+average reward score: 5.30078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.65%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6451|ppo_ep: 1|act_loss: -0.0035552978515625|cri_loss: -0.00168609619140625|unsuper_loss: 0.0
+average reward score: 5.3515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.55%) |Training time=0.47s (20.17%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.64 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6452|ppo_ep: 1|act_loss: 0.0164794921875|cri_loss: 0.0085906982421875|unsuper_loss: 0.0
+average reward score: 5.63671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.96%) |Training time=0.49s (22.45%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6453|ppo_ep: 1|act_loss: 0.007053375244140625|cri_loss: 0.0038814544677734375|unsuper_loss: 0.0
+average reward score: 4.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.23%) |Training time=0.48s (22.16%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6454|ppo_ep: 1|act_loss: -0.013641357421875|cri_loss: -0.0064697265625|unsuper_loss: 0.0
+average reward score: 4.8125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.23%) |Training time=0.48s (22.16%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6455|ppo_ep: 1|act_loss: 0.036041259765625|cri_loss: 0.0184478759765625|unsuper_loss: 0.0
+average reward score: 5.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.13%) |Training time=0.48s (22.28%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6456|ppo_ep: 1|act_loss: 0.0010776519775390625|cri_loss: 0.0009908676147460938|unsuper_loss: 0.0
+average reward score: 6.23828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (22.00%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6457|ppo_ep: 1|act_loss: 0.039031982421875|cri_loss: 0.02099609375|unsuper_loss: 0.0
+average reward score: 5.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.04%) |Training time=0.49s (22.37%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6458|ppo_ep: 1|act_loss: -0.01117706298828125|cri_loss: -0.005397796630859375|unsuper_loss: 0.0
+average reward score: 4.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.74%) |Training time=0.47s (21.63%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
+[2023-04-14 12:44:34,188] [INFO] [logging.py:96:log_dist] [Rank 0] step=6460, skipped=82, lr=[1.2121093640075971e-06, 1.2121093640075971e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:44:34,207] [INFO] [timer.py:199:stop] epoch=0/micro_step=6460/global_step=6460, RunningAvgSamplesPerSec=105.56811658804929, CurrSamplesPerSec=105.1897695692178, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:44:34,300] [INFO] [logging.py:96:log_dist] [Rank 0] step=6460, skipped=108, lr=[6.447162851406805e-07, 6.447162851406805e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6459|ppo_ep: 1|act_loss: -0.0026264190673828125|cri_loss: -0.0008635520935058594|unsuper_loss: 0.0
+average reward score: 5.09765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.87%) |Training time=0.47s (21.53%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6460|ppo_ep: 1|act_loss: -0.0277099609375|cri_loss: -0.013702392578125|unsuper_loss: 0.0
+average reward score: 5.5
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.27%) |Training time=0.46s (21.11%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6461|ppo_ep: 1|act_loss: 0.051910400390625|cri_loss: 0.0283203125|unsuper_loss: 0.0
+average reward score: 5.41015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.21%) |Training time=0.46s (21.14%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6462|ppo_ep: 1|act_loss: -0.0036907196044921875|cri_loss: -0.001331329345703125|unsuper_loss: 0.0
+average reward score: 5.80078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.40%) |Training time=0.45s (20.97%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6463|ppo_ep: 1|act_loss: 0.0004858970642089844|cri_loss: 0.00047969818115234375|unsuper_loss: 0.0
+average reward score: 5.33984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.41%) |Training time=0.45s (20.95%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6464|ppo_ep: 1|act_loss: -0.0036411285400390625|cri_loss: -0.001377105712890625|unsuper_loss: 0.0
+average reward score: 5.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.13%) |Training time=0.46s (21.26%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6465|ppo_ep: 1|act_loss: 0.0038700103759765625|cri_loss: 0.002208709716796875|unsuper_loss: 0.0
+average reward score: 6.25
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.20%) |Training time=0.46s (21.16%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6466|ppo_ep: 1|act_loss: 0.00542449951171875|cri_loss: 0.00310516357421875|unsuper_loss: 0.0
+average reward score: 4.60546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.30%) |Training time=0.45s (19.34%) |Others=0.10 (4.36%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6467|ppo_ep: 1|act_loss: 0.01116943359375|cri_loss: 0.006710052490234375|unsuper_loss: 0.0
+average reward score: 5.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.46%) |Training time=0.45s (20.90%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6468|ppo_ep: 1|act_loss: -0.049163818359375|cri_loss: -0.0239410400390625|unsuper_loss: 0.0
+average reward score: 4.875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.53%) |Training time=0.45s (20.82%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
+[2023-04-14 12:44:56,036] [INFO] [logging.py:96:log_dist] [Rank 0] step=6470, skipped=82, lr=[1.1998236328880862e-06, 1.1998236328880862e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:44:56,055] [INFO] [timer.py:199:stop] epoch=0/micro_step=6470/global_step=6470, RunningAvgSamplesPerSec=105.57323794039799, CurrSamplesPerSec=107.27137351562263, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:44:56,147] [INFO] [logging.py:96:log_dist] [Rank 0] step=6470, skipped=108, lr=[6.382786763711393e-07, 6.382786763711393e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6469|ppo_ep: 1|act_loss: -0.0016469955444335938|cri_loss: -0.0007343292236328125|unsuper_loss: 0.0
+average reward score: 5.234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.17%) |Training time=0.46s (21.23%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6470|ppo_ep: 1|act_loss: -0.0221710205078125|cri_loss: -0.0108489990234375|unsuper_loss: 0.0
+average reward score: 5.48828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.34%) |Training time=0.45s (21.04%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6471|ppo_ep: 1|act_loss: 0.0247344970703125|cri_loss: 0.01265716552734375|unsuper_loss: 0.0
+average reward score: 4.68359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.26%) |Training time=0.46s (21.11%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6472|ppo_ep: 1|act_loss: -0.020843505859375|cri_loss: -0.0093841552734375|unsuper_loss: 0.0
+average reward score: 5.80859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.17%) |Training time=0.46s (21.20%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6473|ppo_ep: 1|act_loss: -0.0097808837890625|cri_loss: -0.00446319580078125|unsuper_loss: 0.0
+average reward score: 5.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.55%) |Training time=0.45s (20.85%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6474|ppo_ep: 1|act_loss: 0.00592803955078125|cri_loss: 0.0030498504638671875|unsuper_loss: 0.0
+average reward score: 5.75
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.39%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6475|ppo_ep: 1|act_loss: 0.03131103515625|cri_loss: 0.0160369873046875|unsuper_loss: 0.0
+average reward score: 4.97265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.25%) |Training time=0.46s (21.12%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6476|ppo_ep: 1|act_loss: 0.0002980232238769531|cri_loss: 0.00029349327087402344|unsuper_loss: 0.0
+average reward score: 4.92578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.49%) |Training time=0.45s (20.90%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6477|ppo_ep: 1|act_loss: 0.0050506591796875|cri_loss: 0.003326416015625|unsuper_loss: 0.0
+average reward score: 5.1640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.50%) |Training time=0.46s (20.98%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6478|ppo_ep: 1|act_loss: 0.0130462646484375|cri_loss: 0.006927490234375|unsuper_loss: 0.0
+average reward score: 5.08984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.37%) |Training time=0.46s (20.20%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.53
+[2023-04-14 12:45:17,830] [INFO] [logging.py:96:log_dist] [Rank 0] step=6480, skipped=82, lr=[1.1875916356275982e-06, 1.1875916356275982e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:45:17,848] [INFO] [timer.py:199:stop] epoch=0/micro_step=6480/global_step=6480, RunningAvgSamplesPerSec=105.5773912936444, CurrSamplesPerSec=107.25242944209734, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:45:17,941] [INFO] [logging.py:96:log_dist] [Rank 0] step=6480, skipped=108, lr=[6.318686628027723e-07, 6.318686628027723e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6479|ppo_ep: 1|act_loss: -0.0038814544677734375|cri_loss: -0.0016021728515625|unsuper_loss: 0.0
+average reward score: 5.3828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.10%) |Training time=0.46s (21.30%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6480|ppo_ep: 1|act_loss: -0.0041351318359375|cri_loss: -0.0018663406372070312|unsuper_loss: 0.0
+average reward score: 4.90234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.03%) |Training time=0.46s (21.08%) |Others=0.11 (4.89%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6481|ppo_ep: 1|act_loss: -0.0183868408203125|cri_loss: -0.00872039794921875|unsuper_loss: 0.0
+average reward score: 5.5546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.18%) |Training time=0.46s (19.51%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.63 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6482|ppo_ep: 1|act_loss: 0.034393310546875|cri_loss: 0.01800537109375|unsuper_loss: 0.0
+average reward score: 6.03515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.82%) |Training time=0.47s (21.58%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6483|ppo_ep: 1|act_loss: -0.027374267578125|cri_loss: -0.01319122314453125|unsuper_loss: 0.0
+average reward score: 5.34375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.82%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6484|ppo_ep: 1|act_loss: -0.0150909423828125|cri_loss: -0.007389068603515625|unsuper_loss: 0.0
+average reward score: 4.9296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.99%) |Training time=0.49s (22.42%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6485|ppo_ep: 1|act_loss: 0.019775390625|cri_loss: 0.01065826416015625|unsuper_loss: 0.0
+average reward score: 5.6640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.59%) |Training time=0.47s (21.68%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6486|ppo_ep: 1|act_loss: -0.003971099853515625|cri_loss: -0.00188446044921875|unsuper_loss: 0.0
+average reward score: 5.3125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.46%) |Training time=0.41s (18.83%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6487|ppo_ep: 1|act_loss: -0.009490966796875|cri_loss: -0.004245758056640625|unsuper_loss: 0.0
+average reward score: 5.3046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.31%) |Training time=0.46s (21.02%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6488|ppo_ep: 1|act_loss: 0.0171966552734375|cri_loss: 0.00887298583984375|unsuper_loss: 0.0
+average reward score: 5.30078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.10%) |Training time=0.44s (20.20%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.53
+[2023-04-14 12:45:39,763] [INFO] [logging.py:96:log_dist] [Rank 0] step=6490, skipped=82, lr=[1.1754135535338393e-06, 1.1754135535338393e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:45:39,781] [INFO] [timer.py:199:stop] epoch=0/micro_step=6490/global_step=6490, RunningAvgSamplesPerSec=105.5822909862731, CurrSamplesPerSec=115.98079224638602, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:45:39,874] [INFO] [logging.py:96:log_dist] [Rank 0] step=6490, skipped=108, lr=[6.254863394474447e-07, 6.254863394474447e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6489|ppo_ep: 1|act_loss: 0.0064697265625|cri_loss: 0.003589630126953125|unsuper_loss: 0.0
+average reward score: 5.390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.06%) |Training time=0.44s (20.23%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6490|ppo_ep: 1|act_loss: -0.0025119781494140625|cri_loss: -0.00113677978515625|unsuper_loss: 0.0
+average reward score: 5.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.85%) |Training time=0.44s (20.43%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6491|ppo_ep: 1|act_loss: 0.027984619140625|cri_loss: 0.01507568359375|unsuper_loss: 0.0
+average reward score: 5.82421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.35%) |Training time=0.41s (18.90%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6492|ppo_ep: 1|act_loss: -0.004123687744140625|cri_loss: -0.0017004013061523438|unsuper_loss: 0.0
+average reward score: 4.8984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.08%) |Training time=0.46s (21.23%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6493|ppo_ep: 1|act_loss: 0.004833221435546875|cri_loss: 0.002483367919921875|unsuper_loss: 0.0
+average reward score: 4.6484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.65%) |Training time=0.48s (21.73%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6494|ppo_ep: 1|act_loss: 0.03521728515625|cri_loss: 0.01904296875|unsuper_loss: 0.0
+average reward score: 5.23046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.98%) |Training time=0.47s (21.39%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.53
+[2023-04-14 12:45:52,903] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 6495|ppo_ep: 1|act_loss: 0.0124969482421875|cri_loss: 0.00667572021484375|unsuper_loss: 0.0
+average reward score: 5.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.02%) |Training time=0.45s (20.76%) |Others=0.09 (4.22%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.53
+[2023-04-14 12:45:55,239] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 6496|ppo_ep: 1|act_loss: 0.0023250579833984375|cri_loss: 0.0012359619140625|unsuper_loss: 0.0
+average reward score: 5.99609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.06%) |Training time=0.45s (19.08%) |Others=0.09 (3.86%)|CurSamplesPerSec=13.70 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6497|ppo_ep: 1|act_loss: -0.01434326171875|cri_loss: -0.006961822509765625|unsuper_loss: 0.0
+average reward score: 5.4296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.00%) |Training time=0.47s (21.38%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6498|ppo_ep: 1|act_loss: -0.019866943359375|cri_loss: -0.0095367431640625|unsuper_loss: 0.0
+average reward score: 5.30078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.36%) |Training time=0.45s (20.92%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.53
+[2023-04-14 12:46:01,651] [INFO] [logging.py:96:log_dist] [Rank 0] step=6500, skipped=82, lr=[1.1632895671153645e-06, 1.1632895671153645e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:46:01,668] [INFO] [timer.py:199:stop] epoch=0/micro_step=6500/global_step=6500, RunningAvgSamplesPerSec=105.59168797775624, CurrSamplesPerSec=124.59731974764485, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:46:01,761] [INFO] [logging.py:96:log_dist] [Rank 0] step=6500, skipped=110, lr=[6.204004813025569e-07, 6.204004813025569e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6499|ppo_ep: 1|act_loss: 0.0130615234375|cri_loss: 0.006610870361328125|unsuper_loss: 0.0
+average reward score: 5.859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.78%) |Training time=0.42s (19.35%) |Others=0.11 (4.87%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6500|ppo_ep: 1|act_loss: 0.00244140625|cri_loss: 0.0013818740844726562|unsuper_loss: 0.0
+average reward score: 4.9296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.74%) |Training time=0.40s (18.52%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6501|ppo_ep: 1|act_loss: 0.01165771484375|cri_loss: 0.00713348388671875|unsuper_loss: 0.0
+average reward score: 5.703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.04%) |Training time=0.47s (21.31%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6502|ppo_ep: 1|act_loss: 0.0173797607421875|cri_loss: 0.0088348388671875|unsuper_loss: 0.0
+average reward score: 4.984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.44%) |Training time=0.45s (20.87%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6503|ppo_ep: 1|act_loss: 0.029022216796875|cri_loss: 0.015045166015625|unsuper_loss: 0.0
+average reward score: 5.40234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.43%) |Training time=0.45s (20.89%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6504|ppo_ep: 1|act_loss: -0.00212860107421875|cri_loss: -0.00066375732421875|unsuper_loss: 0.0
+average reward score: 4.8828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.46s (21.14%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6505|ppo_ep: 1|act_loss: -0.002971649169921875|cri_loss: -0.0011959075927734375|unsuper_loss: 0.0
+average reward score: 5.40234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.97%) |Training time=0.44s (20.31%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6506|ppo_ep: 1|act_loss: 0.01275634765625|cri_loss: 0.006744384765625|unsuper_loss: 0.0
+average reward score: 4.95703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.15%) |Training time=0.47s (21.24%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6507|ppo_ep: 1|act_loss: -0.032470703125|cri_loss: -0.01538848876953125|unsuper_loss: 0.0
+average reward score: 4.96484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.30%) |Training time=0.49s (21.31%) |Others=0.10 (4.39%)|CurSamplesPerSec=13.95 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6508|ppo_ep: 1|act_loss: -0.0428466796875|cri_loss: -0.02069091796875|unsuper_loss: 0.0
+average reward score: 4.9609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.68%) |Training time=0.47s (21.68%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.53
+[2023-04-14 12:46:23,525] [INFO] [logging.py:96:log_dist] [Rank 0] step=6510, skipped=82, lr=[1.151219856078901e-06, 1.151219856078901e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:46:23,543] [INFO] [timer.py:199:stop] epoch=0/micro_step=6510/global_step=6510, RunningAvgSamplesPerSec=105.59656922152335, CurrSamplesPerSec=103.94636394544389, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:46:23,636] [INFO] [logging.py:96:log_dist] [Rank 0] step=6510, skipped=110, lr=[6.140682384495902e-07, 6.140682384495902e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6509|ppo_ep: 1|act_loss: 0.0023174285888671875|cri_loss: 0.0017957687377929688|unsuper_loss: 0.0
+average reward score: 5.625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.66%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6510|ppo_ep: 1|act_loss: 0.01549530029296875|cri_loss: 0.00812530517578125|unsuper_loss: 0.0
+average reward score: 5.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.45%) |Training time=0.48s (21.92%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6511|ppo_ep: 1|act_loss: -0.018798828125|cri_loss: -0.00925445556640625|unsuper_loss: 0.0
+average reward score: 5.58984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.77s (74.68%) |Training time=0.50s (21.07%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.49 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6512|ppo_ep: 1|act_loss: 0.015350341796875|cri_loss: 0.00786590576171875|unsuper_loss: 0.0
+average reward score: 5.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.44%) |Training time=0.45s (20.91%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6513|ppo_ep: 1|act_loss: -0.0010328292846679688|cri_loss: -0.00041866302490234375|unsuper_loss: 0.0
+average reward score: 5.33984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.55%) |Training time=0.45s (20.64%) |Others=0.10 (4.82%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6514|ppo_ep: 1|act_loss: -0.0087890625|cri_loss: -0.003993988037109375|unsuper_loss: 0.0
+average reward score: 6.12890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.89%) |Training time=0.44s (20.47%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6515|ppo_ep: 1|act_loss: 0.0076751708984375|cri_loss: 0.004138946533203125|unsuper_loss: 0.0
+average reward score: 5.64453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.73%) |Training time=0.45s (20.63%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6516|ppo_ep: 1|act_loss: 0.0009355545043945312|cri_loss: 0.0005583763122558594|unsuper_loss: 0.0
+average reward score: 5.44921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.38%) |Training time=0.45s (20.57%) |Others=0.11 (5.05%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6517|ppo_ep: 1|act_loss: 0.021087646484375|cri_loss: 0.011016845703125|unsuper_loss: 0.0
+average reward score: 5.484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.23%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6518|ppo_ep: 1|act_loss: 0.0050048828125|cri_loss: 0.0026264190673828125|unsuper_loss: 0.0
+average reward score: 5.44140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.88%) |Training time=0.47s (21.50%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
+[2023-04-14 12:46:45,374] [INFO] [logging.py:96:log_dist] [Rank 0] step=6520, skipped=82, lr=[1.1392045993266831e-06, 1.1392045993266831e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:46:45,392] [INFO] [timer.py:199:stop] epoch=0/micro_step=6520/global_step=6520, RunningAvgSamplesPerSec=105.6001638945027, CurrSamplesPerSec=108.51050440209877, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:46:45,485] [INFO] [logging.py:96:log_dist] [Rank 0] step=6520, skipped=110, lr=[6.077639496548926e-07, 6.077639496548926e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6519|ppo_ep: 1|act_loss: -0.0101165771484375|cri_loss: -0.004627227783203125|unsuper_loss: 0.0
+average reward score: 5.37109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.19%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6520|ppo_ep: 1|act_loss: -0.0202178955078125|cri_loss: -0.0098114013671875|unsuper_loss: 0.0
+average reward score: 5.40234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.17%) |Training time=0.46s (21.21%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6521|ppo_ep: 1|act_loss: 0.021697998046875|cri_loss: 0.01116180419921875|unsuper_loss: 0.0
+average reward score: 5.3671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.25%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6522|ppo_ep: 1|act_loss: -0.0177001953125|cri_loss: -0.00839996337890625|unsuper_loss: 0.0
+average reward score: 5.65625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.40%) |Training time=0.45s (20.96%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6523|ppo_ep: 1|act_loss: -0.0059967041015625|cri_loss: -0.002777099609375|unsuper_loss: 0.0
+average reward score: 5.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.36%) |Training time=0.45s (20.99%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6524|ppo_ep: 1|act_loss: -0.02264404296875|cri_loss: -0.01093292236328125|unsuper_loss: 0.0
+average reward score: 5.390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.21%) |Training time=0.46s (21.16%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6525|ppo_ep: 1|act_loss: -0.0240478515625|cri_loss: -0.01178741455078125|unsuper_loss: 0.0
+average reward score: 5.48046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.46s (21.21%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6526|ppo_ep: 1|act_loss: 0.02386474609375|cri_loss: 0.0123138427734375|unsuper_loss: 0.0
+average reward score: 5.04296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.47%) |Training time=0.48s (20.26%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.57 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6527|ppo_ep: 1|act_loss: 0.022735595703125|cri_loss: 0.01175689697265625|unsuper_loss: 0.0
+average reward score: 5.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.28%) |Training time=0.46s (21.10%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6528|ppo_ep: 1|act_loss: 0.0005331039428710938|cri_loss: 0.0003275871276855469|unsuper_loss: 0.0
+average reward score: 4.90234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.23%) |Training time=0.46s (21.20%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.53
+[2023-04-14 12:47:07,221] [INFO] [logging.py:96:log_dist] [Rank 0] step=6530, skipped=82, lr=[1.1272439749537998e-06, 1.1272439749537998e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:47:07,239] [INFO] [timer.py:199:stop] epoch=0/micro_step=6530/global_step=6530, RunningAvgSamplesPerSec=105.60338435686515, CurrSamplesPerSec=109.80625927543986, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:47:07,332] [INFO] [logging.py:96:log_dist] [Rank 0] step=6530, skipped=110, lr=[6.01487708363232e-07, 6.01487708363232e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6529|ppo_ep: 1|act_loss: 0.05078125|cri_loss: 0.026214599609375|unsuper_loss: 0.0
+average reward score: 5.1875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.39%) |Training time=0.45s (21.01%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6530|ppo_ep: 1|act_loss: 0.002910614013671875|cri_loss: 0.00180816650390625|unsuper_loss: 0.0
+average reward score: 4.74609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.48%) |Training time=0.45s (20.88%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6531|ppo_ep: 1|act_loss: -0.0006227493286132812|cri_loss: 0.0004520416259765625|unsuper_loss: 0.0
+average reward score: 5.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.62%) |Training time=0.45s (20.75%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6532|ppo_ep: 1|act_loss: -0.0004138946533203125|cri_loss: 0.00014734268188476562|unsuper_loss: 0.0
+average reward score: 5.3671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.20%) |Training time=0.46s (21.16%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6533|ppo_ep: 1|act_loss: -0.022857666015625|cri_loss: -0.01116943359375|unsuper_loss: 0.0
+average reward score: 5.15625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.48%) |Training time=0.45s (20.77%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6534|ppo_ep: 1|act_loss: -0.0067138671875|cri_loss: -0.00313568115234375|unsuper_loss: 0.0
+average reward score: 5.72265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.49%) |Training time=0.45s (20.86%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6535|ppo_ep: 1|act_loss: -0.0179595947265625|cri_loss: -0.0081939697265625|unsuper_loss: 0.0
+average reward score: 4.765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.52%) |Training time=0.45s (20.88%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6536|ppo_ep: 1|act_loss: 0.00745391845703125|cri_loss: 0.003978729248046875|unsuper_loss: 0.0
+average reward score: 5.09375
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.30%) |Training time=0.46s (20.91%) |Others=0.11 (4.79%)|CurSamplesPerSec=14.46 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6537|ppo_ep: 1|act_loss: 0.0206451416015625|cri_loss: 0.01094818115234375|unsuper_loss: 0.0
+average reward score: 4.89453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.95%) |Training time=0.47s (20.62%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6538|ppo_ep: 1|act_loss: -0.032745361328125|cri_loss: -0.0157318115234375|unsuper_loss: 0.0
+average reward score: 5.44921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.62%) |Training time=0.45s (20.73%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
+[2023-04-14 12:47:29,021] [INFO] [logging.py:96:log_dist] [Rank 0] step=6540, skipped=82, lr=[1.1153381602455581e-06, 1.1153381602455581e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:47:29,039] [INFO] [timer.py:199:stop] epoch=0/micro_step=6540/global_step=6540, RunningAvgSamplesPerSec=105.60941207531546, CurrSamplesPerSec=111.99790386308354, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:47:29,132] [INFO] [logging.py:96:log_dist] [Rank 0] step=6540, skipped=110, lr=[5.952396076036457e-07, 5.952396076036457e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6539|ppo_ep: 1|act_loss: 0.002498626708984375|cri_loss: 0.001873016357421875|unsuper_loss: 0.0
+average reward score: 5.484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.56%) |Training time=0.45s (20.82%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6540|ppo_ep: 1|act_loss: -0.006824493408203125|cri_loss: -0.0032329559326171875|unsuper_loss: 0.0
+average reward score: 5.83203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.32%) |Training time=0.45s (21.04%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6541|ppo_ep: 1|act_loss: -0.019256591796875|cri_loss: -0.00939178466796875|unsuper_loss: 0.0
+average reward score: 5.77734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.69%) |Training time=0.47s (20.02%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.62 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6542|ppo_ep: 1|act_loss: 0.023956298828125|cri_loss: 0.01236724853515625|unsuper_loss: 0.0
+average reward score: 6.453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.79%) |Training time=0.47s (21.61%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6543|ppo_ep: 1|act_loss: 0.014129638671875|cri_loss: 0.007312774658203125|unsuper_loss: 0.0
+average reward score: 4.609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.83%) |Training time=0.49s (22.55%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6544|ppo_ep: 1|act_loss: -0.0164642333984375|cri_loss: -0.0080413818359375|unsuper_loss: 0.0
+average reward score: 4.69140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.61%) |Training time=0.49s (22.77%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6545|ppo_ep: 1|act_loss: -0.00740814208984375|cri_loss: -0.003650665283203125|unsuper_loss: 0.0
+average reward score: 4.8515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.92%) |Training time=0.49s (22.47%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6546|ppo_ep: 1|act_loss: -0.0016851425170898438|cri_loss: -0.0007281303405761719|unsuper_loss: 0.0
+average reward score: 5.1953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.48s (22.38%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6547|ppo_ep: 1|act_loss: -0.0064849853515625|cri_loss: -0.0031108856201171875|unsuper_loss: 0.0
+average reward score: 5.515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.81%) |Training time=0.49s (22.57%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6548|ppo_ep: 1|act_loss: -0.0235748291015625|cri_loss: -0.01151275634765625|unsuper_loss: 0.0
+average reward score: 5.35546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.96%) |Training time=0.49s (22.42%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
+[2023-04-14 12:47:50,874] [INFO] [logging.py:96:log_dist] [Rank 0] step=6550, skipped=82, lr=[1.103487331674853e-06, 1.103487331674853e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:47:50,892] [INFO] [timer.py:199:stop] epoch=0/micro_step=6550/global_step=6550, RunningAvgSamplesPerSec=105.60097993930471, CurrSamplesPerSec=98.59352218869277, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:47:50,985] [INFO] [logging.py:96:log_dist] [Rank 0] step=6550, skipped=110, lr=[5.890197399880581e-07, 5.890197399880581e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6549|ppo_ep: 1|act_loss: -0.0108489990234375|cri_loss: -0.005306243896484375|unsuper_loss: 0.0
+average reward score: 6.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.94%) |Training time=0.49s (22.49%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6550|ppo_ep: 1|act_loss: 0.0272064208984375|cri_loss: 0.01419830322265625|unsuper_loss: 0.0
+average reward score: 5.375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.67%) |Training time=0.49s (22.72%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6551|ppo_ep: 1|act_loss: 0.005901336669921875|cri_loss: 0.003063201904296875|unsuper_loss: 0.0
+average reward score: 5.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.66%) |Training time=0.50s (22.74%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6552|ppo_ep: 1|act_loss: -0.0030727386474609375|cri_loss: -0.000606536865234375|unsuper_loss: 0.0
+average reward score: 4.1875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.63%) |Training time=0.49s (22.68%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6553|ppo_ep: 1|act_loss: 0.006561279296875|cri_loss: 0.003398895263671875|unsuper_loss: 0.0
+average reward score: 5.53515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.87%) |Training time=0.49s (22.52%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6554|ppo_ep: 1|act_loss: 0.011383056640625|cri_loss: 0.005832672119140625|unsuper_loss: 0.0
+average reward score: 5.84765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.68%) |Training time=0.49s (22.72%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6555|ppo_ep: 1|act_loss: -0.00696563720703125|cri_loss: -0.00336456298828125|unsuper_loss: 0.0
+average reward score: 5.6328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.48s (22.27%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6556|ppo_ep: 1|act_loss: 0.0443115234375|cri_loss: 0.0237274169921875|unsuper_loss: 0.0
+average reward score: 5.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.61s (70.90%) |Training time=0.56s (24.58%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.13 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6557|ppo_ep: 1|act_loss: 0.0042877197265625|cri_loss: 0.002197265625|unsuper_loss: 0.0
+average reward score: 5.73828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.58%) |Training time=0.47s (21.79%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6558|ppo_ep: 1|act_loss: 0.016754150390625|cri_loss: 0.00850677490234375|unsuper_loss: 0.0
+average reward score: 5.75
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.17%) |Training time=0.48s (22.21%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
+[2023-04-14 12:48:12,678] [INFO] [logging.py:96:log_dist] [Rank 0] step=6560, skipped=82, lr=[1.091691664899555e-06, 1.091691664899555e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:48:12,696] [INFO] [timer.py:199:stop] epoch=0/micro_step=6560/global_step=6560, RunningAvgSamplesPerSec=105.58608899616205, CurrSamplesPerSec=100.87136841519845, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:48:12,789] [INFO] [logging.py:96:log_dist] [Rank 0] step=6560, skipped=110, lr=[5.828281977099129e-07, 5.828281977099129e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6559|ppo_ep: 1|act_loss: -0.0164337158203125|cri_loss: -0.00799560546875|unsuper_loss: 0.0
+average reward score: 5.5234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.23%) |Training time=0.48s (22.15%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6560|ppo_ep: 1|act_loss: -0.00439453125|cri_loss: -0.0017576217651367188|unsuper_loss: 0.0
+average reward score: 5.0078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.48s (22.27%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6561|ppo_ep: 1|act_loss: 0.029449462890625|cri_loss: 0.0149383544921875|unsuper_loss: 0.0
+average reward score: 5.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.14%) |Training time=0.48s (22.24%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6562|ppo_ep: 1|act_loss: 0.00925445556640625|cri_loss: 0.0048828125|unsuper_loss: 0.0
+average reward score: 5.6875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.13%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6563|ppo_ep: 1|act_loss: -0.004062652587890625|cri_loss: -0.0015611648559570312|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.22%) |Training time=0.48s (22.15%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6564|ppo_ep: 1|act_loss: -0.002902984619140625|cri_loss: -0.001293182373046875|unsuper_loss: 0.0
+average reward score: 5.14453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.59%) |Training time=0.48s (22.24%) |Others=0.11 (5.17%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6565|ppo_ep: 1|act_loss: 0.026123046875|cri_loss: 0.01336669921875|unsuper_loss: 0.0
+average reward score: 5.4296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.27%) |Training time=0.48s (22.13%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6566|ppo_ep: 1|act_loss: 0.0179443359375|cri_loss: 0.00943756103515625|unsuper_loss: 0.0
+average reward score: 5.12890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.69s (73.87%) |Training time=0.50s (21.73%) |Others=0.10 (4.40%)|CurSamplesPerSec=14.02 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6567|ppo_ep: 1|act_loss: 0.02716064453125|cri_loss: 0.0139312744140625|unsuper_loss: 0.0
+average reward score: 5.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.21%) |Training time=0.48s (22.16%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6568|ppo_ep: 1|act_loss: -0.009429931640625|cri_loss: -0.004634857177734375|unsuper_loss: 0.0
+average reward score: 5.96875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.48s (22.35%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+[2023-04-14 12:48:34,461] [INFO] [logging.py:96:log_dist] [Rank 0] step=6570, skipped=82, lr=[1.0799513347598973e-06, 1.0799513347598973e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:48:34,479] [INFO] [timer.py:199:stop] epoch=0/micro_step=6570/global_step=6570, RunningAvgSamplesPerSec=105.57698320907716, CurrSamplesPerSec=100.9552079838854, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:48:34,572] [INFO] [logging.py:96:log_dist] [Rank 0] step=6570, skipped=110, lr=[5.766650725428027e-07, 5.766650725428027e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6569|ppo_ep: 1|act_loss: -0.0192108154296875|cri_loss: -0.00888824462890625|unsuper_loss: 0.0
+average reward score: 5.83984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.22%) |Training time=0.48s (22.16%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6570|ppo_ep: 1|act_loss: -0.0006604194641113281|cri_loss: -6.580352783203125e-05|unsuper_loss: 0.0
+average reward score: 4.609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.98%) |Training time=0.48s (22.39%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6571|ppo_ep: 1|act_loss: -0.014739990234375|cri_loss: -0.00199127197265625|unsuper_loss: 0.0
+average reward score: 5.109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.34%) |Training time=0.50s (21.37%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.63 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6572|ppo_ep: 1|act_loss: 0.0235137939453125|cri_loss: 0.01197052001953125|unsuper_loss: 0.0
+average reward score: 5.5546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.71%) |Training time=0.50s (22.68%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6573|ppo_ep: 1|act_loss: -0.00745391845703125|cri_loss: -0.003570556640625|unsuper_loss: 0.0
+average reward score: 5.0
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.12%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6574|ppo_ep: 1|act_loss: -0.0093536376953125|cri_loss: -0.004589080810546875|unsuper_loss: 0.0
+average reward score: 5.453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.95%) |Training time=0.49s (22.44%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6575|ppo_ep: 1|act_loss: -0.0058135986328125|cri_loss: -0.0026302337646484375|unsuper_loss: 0.0
+average reward score: 4.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.13%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6576|ppo_ep: 1|act_loss: 0.005096435546875|cri_loss: 0.0028324127197265625|unsuper_loss: 0.0
+average reward score: 4.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.17%) |Training time=0.48s (22.19%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6577|ppo_ep: 1|act_loss: 0.022705078125|cri_loss: 0.01177978515625|unsuper_loss: 0.0
+average reward score: 5.515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.02%) |Training time=0.48s (22.35%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6578|ppo_ep: 1|act_loss: -0.009124755859375|cri_loss: -0.004364013671875|unsuper_loss: 0.0
+average reward score: 5.16796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.98%) |Training time=0.48s (22.19%) |Others=0.11 (4.84%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.53
+[2023-04-14 12:48:56,317] [INFO] [logging.py:96:log_dist] [Rank 0] step=6580, skipped=82, lr=[1.0682665152758964e-06, 1.0682665152758964e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:48:56,336] [INFO] [timer.py:199:stop] epoch=0/micro_step=6580/global_step=6580, RunningAvgSamplesPerSec=105.56653477849474, CurrSamplesPerSec=100.78146226450514, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:48:56,428] [INFO] [logging.py:96:log_dist] [Rank 0] step=6580, skipped=110, lr=[5.705304558391109e-07, 5.705304558391109e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6579|ppo_ep: 1|act_loss: -0.004825592041015625|cri_loss: -0.00231170654296875|unsuper_loss: 0.0
+average reward score: 5.6875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.23%) |Training time=0.48s (22.15%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6580|ppo_ep: 1|act_loss: -0.003093719482421875|cri_loss: -0.0014362335205078125|unsuper_loss: 0.0
+average reward score: 5.28125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.18%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6581|ppo_ep: 1|act_loss: 0.0014276504516601562|cri_loss: 0.001483917236328125|unsuper_loss: 0.0
+average reward score: 5.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.14%) |Training time=0.48s (22.22%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6582|ppo_ep: 1|act_loss: 0.00543975830078125|cri_loss: 0.0029315948486328125|unsuper_loss: 0.0
+average reward score: 5.90625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.18%) |Training time=0.48s (22.19%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6583|ppo_ep: 1|act_loss: 0.003963470458984375|cri_loss: 0.00213623046875|unsuper_loss: 0.0
+average reward score: 5.2109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.26%) |Training time=0.48s (22.09%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6584|ppo_ep: 1|act_loss: -0.0157318115234375|cri_loss: -0.007671356201171875|unsuper_loss: 0.0
+average reward score: 5.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.98%) |Training time=0.49s (22.41%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6585|ppo_ep: 1|act_loss: 0.019989013671875|cri_loss: 0.01015472412109375|unsuper_loss: 0.0
+average reward score: 4.8203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.47s (21.86%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6586|ppo_ep: 1|act_loss: 0.0076751708984375|cri_loss: 0.004123687744140625|unsuper_loss: 0.0
+average reward score: 5.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.99%) |Training time=0.48s (20.55%) |Others=0.10 (4.45%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6587|ppo_ep: 1|act_loss: -0.020965576171875|cri_loss: -0.00983428955078125|unsuper_loss: 0.0
+average reward score: 5.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.19%) |Training time=0.48s (22.14%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6588|ppo_ep: 1|act_loss: -0.0211181640625|cri_loss: -0.01003265380859375|unsuper_loss: 0.0
+average reward score: 5.23828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.27%) |Training time=0.48s (22.10%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
+[2023-04-14 12:49:18,127] [INFO] [logging.py:96:log_dist] [Rank 0] step=6590, skipped=82, lr=[1.0566373796447659e-06, 1.0566373796447659e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:49:18,145] [INFO] [timer.py:199:stop] epoch=0/micro_step=6590/global_step=6590, RunningAvgSamplesPerSec=105.55925555019671, CurrSamplesPerSec=101.66607937390451, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:49:18,238] [INFO] [logging.py:96:log_dist] [Rank 0] step=6590, skipped=110, lr=[5.644244385286548e-07, 5.644244385286548e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6589|ppo_ep: 1|act_loss: -0.019561767578125|cri_loss: -0.009368896484375|unsuper_loss: 0.0
+average reward score: 5.1875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.31%) |Training time=0.48s (22.06%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6590|ppo_ep: 1|act_loss: -0.0198974609375|cri_loss: -0.00963592529296875|unsuper_loss: 0.0
+average reward score: 5.9140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.24%) |Training time=0.48s (22.06%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6591|ppo_ep: 1|act_loss: -0.0243988037109375|cri_loss: -0.011871337890625|unsuper_loss: 0.0
+average reward score: 5.78515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.03%) |Training time=0.48s (22.34%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6592|ppo_ep: 1|act_loss: -0.00626373291015625|cri_loss: -0.002712249755859375|unsuper_loss: 0.0
+average reward score: 5.6015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.19%) |Training time=0.48s (22.16%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6593|ppo_ep: 1|act_loss: 0.025970458984375|cri_loss: 0.01407623291015625|unsuper_loss: 0.0
+average reward score: 5.1953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.14%) |Training time=0.48s (22.23%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6594|ppo_ep: 1|act_loss: 0.0013799667358398438|cri_loss: 0.0013532638549804688|unsuper_loss: 0.0
+average reward score: 5.94921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.48s (22.27%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6595|ppo_ep: 1|act_loss: 0.02069091796875|cri_loss: 0.01064300537109375|unsuper_loss: 0.0
+average reward score: 5.4921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.96%) |Training time=0.47s (21.42%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6596|ppo_ep: 1|act_loss: -0.0013408660888671875|cri_loss: -0.00049591064453125|unsuper_loss: 0.0
+average reward score: 5.109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.27%) |Training time=0.48s (21.31%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.09 |AvgSamplesPerSec=14.53
+[2023-04-14 12:49:35,683] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 6597|ppo_ep: 1|act_loss: -0.003520965576171875|cri_loss: -0.0013427734375|unsuper_loss: 0.0
+average reward score: 5.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.63%) |Training time=0.48s (22.20%) |Others=0.09 (4.17%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.53
+[2023-04-14 12:49:37,840] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 6598|ppo_ep: 1|act_loss: 0.0361328125|cri_loss: 0.0187530517578125|unsuper_loss: 0.0
+average reward score: 5.328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.45%) |Training time=0.48s (22.24%) |Others=0.09 (4.31%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.53
+[2023-04-14 12:49:39,893] [INFO] [logging.py:96:log_dist] [Rank 0] step=6600, skipped=82, lr=[1.0450641002383495e-06, 1.0450641002383495e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:49:39,911] [INFO] [timer.py:199:stop] epoch=0/micro_step=6600/global_step=6600, RunningAvgSamplesPerSec=105.55175453772948, CurrSamplesPerSec=100.11944716301052, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:49:40,006] [INFO] [logging.py:96:log_dist] [Rank 0] step=6600, skipped=112, lr=[5.59560277077667e-07, 5.59560277077667e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6599|ppo_ep: 1|act_loss: 0.0292510986328125|cri_loss: 0.0148773193359375|unsuper_loss: 0.0
+average reward score: 6.1796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.25%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6600|ppo_ep: 1|act_loss: 0.002239227294921875|cri_loss: 0.00122833251953125|unsuper_loss: 0.0
+average reward score: 5.59765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.21%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6601|ppo_ep: 1|act_loss: -0.0012359619140625|cri_loss: -0.0005125999450683594|unsuper_loss: 0.0
+average reward score: 5.6328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.27%) |Training time=0.48s (22.10%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6602|ppo_ep: 1|act_loss: -0.00958251953125|cri_loss: -0.00464630126953125|unsuper_loss: 0.0
+average reward score: 5.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.65s (71.46%) |Training time=0.56s (24.18%) |Others=0.10 (4.36%)|CurSamplesPerSec=13.88 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6603|ppo_ep: 1|act_loss: 0.0058135986328125|cri_loss: 0.0030841827392578125|unsuper_loss: 0.0
+average reward score: 5.0859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.90%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6604|ppo_ep: 1|act_loss: -0.0102691650390625|cri_loss: -0.00485992431640625|unsuper_loss: 0.0
+average reward score: 5.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.43%) |Training time=0.50s (22.98%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6605|ppo_ep: 1|act_loss: -0.0034236907958984375|cri_loss: -0.0015735626220703125|unsuper_loss: 0.0
+average reward score: 5.28125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.93%) |Training time=0.49s (22.46%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6606|ppo_ep: 1|act_loss: -0.02056884765625|cri_loss: -0.01004791259765625|unsuper_loss: 0.0
+average reward score: 5.08984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.88%) |Training time=0.49s (22.50%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6607|ppo_ep: 1|act_loss: -0.0178070068359375|cri_loss: -0.0086517333984375|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.88%) |Training time=0.47s (21.49%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6608|ppo_ep: 1|act_loss: 0.016571044921875|cri_loss: 0.00848388671875|unsuper_loss: 0.0
+average reward score: 5.13671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.04%) |Training time=0.46s (21.34%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
+[2023-04-14 12:50:01,722] [INFO] [logging.py:96:log_dist] [Rank 0] step=6610, skipped=82, lr=[1.0335468486005652e-06, 1.0335468486005652e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:50:01,740] [INFO] [timer.py:199:stop] epoch=0/micro_step=6610/global_step=6610, RunningAvgSamplesPerSec=105.54412564005361, CurrSamplesPerSec=105.0409292468261, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:50:01,836] [INFO] [logging.py:96:log_dist] [Rank 0] step=6610, skipped=112, lr=[5.53505966464195e-07, 5.53505966464195e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6609|ppo_ep: 1|act_loss: -0.007537841796875|cri_loss: -0.0036907196044921875|unsuper_loss: 0.0
+average reward score: 5.42578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.75%) |Training time=0.47s (21.53%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6610|ppo_ep: 1|act_loss: -0.001495361328125|cri_loss: -0.0006341934204101562|unsuper_loss: 0.0
+average reward score: 6.0
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.78%) |Training time=0.47s (21.60%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6611|ppo_ep: 1|act_loss: -0.0090789794921875|cri_loss: -0.004428863525390625|unsuper_loss: 0.0
+average reward score: 4.9296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.08%) |Training time=0.47s (21.35%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6612|ppo_ep: 1|act_loss: -0.01470184326171875|cri_loss: -0.0068206787109375|unsuper_loss: 0.0
+average reward score: 5.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.75%) |Training time=0.47s (21.64%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6613|ppo_ep: 1|act_loss: 0.0101318359375|cri_loss: 0.00521087646484375|unsuper_loss: 0.0
+average reward score: 5.36328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.63%) |Training time=0.47s (21.69%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6614|ppo_ep: 1|act_loss: -0.000881195068359375|cri_loss: 7.82012939453125e-05|unsuper_loss: 0.0
+average reward score: 4.9921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.77%) |Training time=0.47s (21.62%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6615|ppo_ep: 1|act_loss: -0.00189971923828125|cri_loss: -0.0008196830749511719|unsuper_loss: 0.0
+average reward score: 5.88671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.36%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6616|ppo_ep: 1|act_loss: 0.0089111328125|cri_loss: 0.004726409912109375|unsuper_loss: 0.0
+average reward score: 5.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.91%) |Training time=0.47s (21.47%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6617|ppo_ep: 1|act_loss: -0.017181396484375|cri_loss: -0.0083770751953125|unsuper_loss: 0.0
+average reward score: 5.984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.20%) |Training time=0.48s (21.70%) |Others=0.14 (6.10%)|CurSamplesPerSec=14.37 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6618|ppo_ep: 1|act_loss: -0.006389617919921875|cri_loss: -0.0030574798583984375|unsuper_loss: 0.0
+average reward score: 5.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.68s (74.84%) |Training time=0.47s (20.68%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.23 |AvgSamplesPerSec=14.53
+[2023-04-14 12:50:23,554] [INFO] [logging.py:96:log_dist] [Rank 0] step=6620, skipped=82, lr=[1.0220857954448672e-06, 1.0220857954448672e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:50:23,572] [INFO] [timer.py:199:stop] epoch=0/micro_step=6620/global_step=6620, RunningAvgSamplesPerSec=105.54226757249026, CurrSamplesPerSec=101.52726766602092, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:50:23,665] [INFO] [logging.py:96:log_dist] [Rank 0] step=6620, skipped=112, lr=[5.474805075879616e-07, 5.474805075879616e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6619|ppo_ep: 1|act_loss: -0.0173492431640625|cri_loss: -0.00852203369140625|unsuper_loss: 0.0
+average reward score: 5.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.51%) |Training time=0.48s (21.91%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6620|ppo_ep: 1|act_loss: -0.02215576171875|cri_loss: -0.01073455810546875|unsuper_loss: 0.0
+average reward score: 5.5
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.35%) |Training time=0.48s (22.02%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6621|ppo_ep: 1|act_loss: 0.03692626953125|cri_loss: 0.0188446044921875|unsuper_loss: 0.0
+average reward score: 5.125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.01%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6622|ppo_ep: 1|act_loss: -0.00305938720703125|cri_loss: -0.001110076904296875|unsuper_loss: 0.0
+average reward score: 4.94921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.12%) |Training time=0.48s (22.24%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6623|ppo_ep: 1|act_loss: 0.00664520263671875|cri_loss: 0.0036258697509765625|unsuper_loss: 0.0
+average reward score: 5.5703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.08%) |Training time=0.48s (22.30%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6624|ppo_ep: 1|act_loss: 0.002655029296875|cri_loss: 0.0015382766723632812|unsuper_loss: 0.0
+average reward score: 5.0
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.28%) |Training time=0.48s (22.14%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6625|ppo_ep: 1|act_loss: -0.017791748046875|cri_loss: -0.008697509765625|unsuper_loss: 0.0
+average reward score: 5.609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.48%) |Training time=0.48s (21.08%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.13 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6626|ppo_ep: 1|act_loss: -0.005893707275390625|cri_loss: -0.0027294158935546875|unsuper_loss: 0.0
+average reward score: 5.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.30%) |Training time=0.48s (22.06%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6627|ppo_ep: 1|act_loss: -0.0082244873046875|cri_loss: -0.003978729248046875|unsuper_loss: 0.0
+average reward score: 5.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.48s (22.11%) |Others=0.10 (4.84%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6628|ppo_ep: 1|act_loss: -0.011962890625|cri_loss: -0.0058441162109375|unsuper_loss: 0.0
+average reward score: 5.21875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.04%) |Training time=0.48s (22.33%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
+[2023-04-14 12:50:45,332] [INFO] [logging.py:96:log_dist] [Rank 0] step=6630, skipped=82, lr=[1.0106811106517118e-06, 1.0106811106517118e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:50:45,350] [INFO] [timer.py:199:stop] epoch=0/micro_step=6630/global_step=6630, RunningAvgSamplesPerSec=105.53529721787794, CurrSamplesPerSec=101.13496418923755, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:50:45,442] [INFO] [logging.py:96:log_dist] [Rank 0] step=6630, skipped=112, lr=[5.41483989760803e-07, 5.41483989760803e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6629|ppo_ep: 1|act_loss: 0.043121337890625|cri_loss: 0.0227813720703125|unsuper_loss: 0.0
+average reward score: 5.6953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.16%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6630|ppo_ep: 1|act_loss: -0.00467681884765625|cri_loss: -0.001865386962890625|unsuper_loss: 0.0
+average reward score: 5.37890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.90%) |Training time=0.49s (22.49%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6631|ppo_ep: 1|act_loss: -0.00705718994140625|cri_loss: -0.003284454345703125|unsuper_loss: 0.0
+average reward score: 5.27734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.47s (21.97%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6632|ppo_ep: 1|act_loss: -0.001850128173828125|cri_loss: -0.0005612373352050781|unsuper_loss: 0.0
+average reward score: 5.32421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.35%) |Training time=0.48s (20.35%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6633|ppo_ep: 1|act_loss: 0.007160186767578125|cri_loss: 0.0037708282470703125|unsuper_loss: 0.0
+average reward score: 5.4453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.21%) |Training time=0.48s (22.15%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6634|ppo_ep: 1|act_loss: 0.026824951171875|cri_loss: 0.0143890380859375|unsuper_loss: 0.0
+average reward score: 5.37109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.21%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6635|ppo_ep: 1|act_loss: -0.00769805908203125|cri_loss: -0.0037708282470703125|unsuper_loss: 0.0
+average reward score: 5.69921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.17%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6636|ppo_ep: 1|act_loss: -0.016510009765625|cri_loss: -0.00434112548828125|unsuper_loss: 0.0
+average reward score: 5.8203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.21%) |Training time=0.48s (22.17%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6637|ppo_ep: 1|act_loss: 0.0094146728515625|cri_loss: 0.004886627197265625|unsuper_loss: 0.0
+average reward score: 4.875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.13%) |Training time=0.48s (22.25%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6638|ppo_ep: 1|act_loss: -0.014373779296875|cri_loss: -0.006885528564453125|unsuper_loss: 0.0
+average reward score: 4.91015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.11%) |Training time=0.48s (22.26%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53
+[2023-04-14 12:51:07,159] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+[2023-04-14 12:51:07,159] [INFO] [logging.py:96:log_dist] [Rank 0] step=6640, skipped=83, lr=[1.0004652290207957e-06, 1.0004652290207957e-06], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:51:07,160] [INFO] [timer.py:199:stop] epoch=0/micro_step=6640/global_step=6640, RunningAvgSamplesPerSec=105.52910092880747, CurrSamplesPerSec=109.88258198147307, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:51:07,252] [INFO] [logging.py:96:log_dist] [Rank 0] step=6640, skipped=112, lr=[5.355165018655778e-07, 5.355165018655778e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6639|ppo_ep: 1|act_loss: -0.00891876220703125|cri_loss: -0.004344940185546875|unsuper_loss: 0.0
+average reward score: 5.36328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.34%) |Training time=0.45s (21.05%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.53
+[2023-04-14 12:51:09,301] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 6640|ppo_ep: 1|act_loss: -0.0029621124267578125|cri_loss: -0.0013227462768554688|unsuper_loss: 0.0
+average reward score: 5.09375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.80%) |Training time=0.46s (21.55%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.53
+epoch: 0|step: 6641|ppo_ep: 1|act_loss: -0.01140594482421875|cri_loss: -0.00524139404296875|unsuper_loss: 0.0
+average reward score: 5.4453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.12%) |Training time=0.48s (22.26%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6642|ppo_ep: 1|act_loss: 0.020294189453125|cri_loss: 0.010894775390625|unsuper_loss: 0.0
+average reward score: 5.3046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.99%) |Training time=0.49s (22.40%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6643|ppo_ep: 1|act_loss: 0.0182037353515625|cri_loss: 0.00933837890625|unsuper_loss: 0.0
+average reward score: 4.203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.02%) |Training time=0.48s (22.35%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6644|ppo_ep: 1|act_loss: -0.00525665283203125|cri_loss: -0.002307891845703125|unsuper_loss: 0.0
+average reward score: 5.11328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.30%) |Training time=0.48s (22.07%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6645|ppo_ep: 1|act_loss: 0.0092010498046875|cri_loss: 0.005702972412109375|unsuper_loss: 0.0
+average reward score: 6.05859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.23%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6646|ppo_ep: 1|act_loss: -0.037841796875|cri_loss: -0.01470947265625|unsuper_loss: 0.0
+average reward score: 5.76171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.22%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6647|ppo_ep: 1|act_loss: 0.005802154541015625|cri_loss: 0.0029850006103515625|unsuper_loss: 0.0
+average reward score: 5.6953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.88%) |Training time=0.49s (20.83%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6648|ppo_ep: 1|act_loss: 0.0277862548828125|cri_loss: 0.01418304443359375|unsuper_loss: 0.0
+average reward score: 4.96875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.47s (21.53%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.54
+[2023-04-14 12:51:28,951] [INFO] [logging.py:96:log_dist] [Rank 0] step=6650, skipped=84, lr=[9.90295265354164e-07, 9.90295265354164e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:51:28,969] [INFO] [timer.py:199:stop] epoch=0/micro_step=6650/global_step=6650, RunningAvgSamplesPerSec=105.52190506169607, CurrSamplesPerSec=98.76568618000208, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:51:29,062] [INFO] [logging.py:96:log_dist] [Rank 0] step=6650, skipped=112, lr=[5.295781323548535e-07, 5.295781323548535e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6649|ppo_ep: 1|act_loss: -0.032806396484375|cri_loss: -0.01139068603515625|unsuper_loss: 0.0
+average reward score: 5.41015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.01%) |Training time=0.49s (22.40%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6650|ppo_ep: 1|act_loss: -0.01148223876953125|cri_loss: -0.005680084228515625|unsuper_loss: 0.0
+average reward score: 5.47265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.22%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6651|ppo_ep: 1|act_loss: -0.0098876953125|cri_loss: -0.004856109619140625|unsuper_loss: 0.0
+average reward score: 5.765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.47s (21.90%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6652|ppo_ep: 1|act_loss: 0.0227203369140625|cri_loss: 0.0144195556640625|unsuper_loss: 0.0
+average reward score: 4.796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.03%) |Training time=0.48s (22.36%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6653|ppo_ep: 1|act_loss: 0.0352783203125|cri_loss: 0.0180816650390625|unsuper_loss: 0.0
+average reward score: 5.27734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.35%) |Training time=0.48s (22.01%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6654|ppo_ep: 1|act_loss: -0.01111602783203125|cri_loss: -0.004726409912109375|unsuper_loss: 0.0
+average reward score: 5.01171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.62s (72.03%) |Training time=0.49s (21.78%) |Others=0.14 (6.19%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6655|ppo_ep: 1|act_loss: 0.003253936767578125|cri_loss: 0.002178192138671875|unsuper_loss: 0.0
+average reward score: 5.5546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.06%) |Training time=0.48s (22.33%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6656|ppo_ep: 1|act_loss: 0.01036834716796875|cri_loss: 0.00536346435546875|unsuper_loss: 0.0
+average reward score: 4.9453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.04%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6657|ppo_ep: 1|act_loss: -0.001857757568359375|cri_loss: -0.0005521774291992188|unsuper_loss: 0.0
+average reward score: 5.34765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.06%) |Training time=0.48s (22.32%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6658|ppo_ep: 1|act_loss: -0.00725555419921875|cri_loss: -0.003559112548828125|unsuper_loss: 0.0
+average reward score: 5.63671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.20%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.54
+[2023-04-14 12:51:50,678] [INFO] [logging.py:96:log_dist] [Rank 0] step=6660, skipped=84, lr=[9.790493086186587e-07, 9.790493086186587e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:51:50,697] [INFO] [timer.py:199:stop] epoch=0/micro_step=6660/global_step=6660, RunningAvgSamplesPerSec=105.51379557442812, CurrSamplesPerSec=99.45649093894681, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:51:50,790] [INFO] [logging.py:96:log_dist] [Rank 0] step=6660, skipped=112, lr=[5.236689692495916e-07, 5.236689692495916e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6659|ppo_ep: 1|act_loss: -0.0159912109375|cri_loss: -0.00777435302734375|unsuper_loss: 0.0
+average reward score: 5.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.48s (22.38%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6660|ppo_ep: 1|act_loss: -0.00875091552734375|cri_loss: -0.00411224365234375|unsuper_loss: 0.0
+average reward score: 5.234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.33%) |Training time=0.47s (22.01%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6661|ppo_ep: 1|act_loss: -0.009613037109375|cri_loss: -0.0046539306640625|unsuper_loss: 0.0
+average reward score: 5.36328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.29%) |Training time=0.48s (22.08%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6662|ppo_ep: 1|act_loss: -0.020050048828125|cri_loss: -0.00936126708984375|unsuper_loss: 0.0
+average reward score: 5.8046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.27%) |Training time=0.54s (24.21%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.42 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6663|ppo_ep: 1|act_loss: -0.0089111328125|cri_loss: -0.00431060791015625|unsuper_loss: 0.0
+average reward score: 4.75
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.76%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6664|ppo_ep: 1|act_loss: -0.006011962890625|cri_loss: -0.00070953369140625|unsuper_loss: 0.0
+average reward score: 5.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.94%) |Training time=0.46s (21.38%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6665|ppo_ep: 1|act_loss: -0.01068115234375|cri_loss: -0.005191802978515625|unsuper_loss: 0.0
+average reward score: 5.3203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.04%) |Training time=0.45s (21.27%) |Others=0.10 (4.69%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6666|ppo_ep: 1|act_loss: -0.01541900634765625|cri_loss: -0.0076141357421875|unsuper_loss: 0.0
+average reward score: 5.34375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.34%) |Training time=0.47s (22.00%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54
+[2023-04-14 12:52:07,930] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
+epoch: 0|step: 6667|ppo_ep: 1|act_loss: 0.03076171875|cri_loss: 0.0161285400390625|unsuper_loss: 0.0
+average reward score: 5.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.71%) |Training time=0.43s (20.58%) |Others=0.10 (4.71%)|CurSamplesPerSec=15.15 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6668|ppo_ep: 1|act_loss: 0.005889892578125|cri_loss: 0.003192901611328125|unsuper_loss: 0.0
+average reward score: 4.875
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.02%) |Training time=0.45s (21.30%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.54
+[2023-04-14 12:52:12,186] [INFO] [logging.py:96:log_dist] [Rank 0] step=6670, skipped=85, lr=[9.689766831817931e-07, 9.689766831817931e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:52:12,205] [INFO] [timer.py:199:stop] epoch=0/micro_step=6670/global_step=6670, RunningAvgSamplesPerSec=105.51352462606164, CurrSamplesPerSec=108.40804827478699, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:52:12,297] [INFO] [logging.py:96:log_dist] [Rank 0] step=6670, skipped=112, lr=[5.177891001378454e-07, 5.177891001378454e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6669|ppo_ep: 1|act_loss: -0.0132293701171875|cri_loss: -0.006320953369140625|unsuper_loss: 0.0
+average reward score: 5.38671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.93%) |Training time=0.46s (21.40%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6670|ppo_ep: 1|act_loss: -0.0029659271240234375|cri_loss: -0.0012969970703125|unsuper_loss: 0.0
+average reward score: 5.30078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.05%) |Training time=0.45s (21.24%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6671|ppo_ep: 1|act_loss: -0.00601959228515625|cri_loss: -0.00296783447265625|unsuper_loss: 0.0
+average reward score: 5.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.85%) |Training time=0.46s (21.49%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6672|ppo_ep: 1|act_loss: -0.0157928466796875|cri_loss: -0.007663726806640625|unsuper_loss: 0.0
+average reward score: 4.9453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.19%) |Training time=0.45s (21.10%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6673|ppo_ep: 1|act_loss: 0.002552032470703125|cri_loss: 0.001552581787109375|unsuper_loss: 0.0
+average reward score: 5.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.21%) |Training time=0.45s (21.11%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6674|ppo_ep: 1|act_loss: -0.0108184814453125|cri_loss: -0.005218505859375|unsuper_loss: 0.0
+average reward score: 5.38671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.12%) |Training time=0.45s (21.19%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6675|ppo_ep: 1|act_loss: -0.0014591217041015625|cri_loss: -0.0005102157592773438|unsuper_loss: 0.0
+average reward score: 5.95703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.32%) |Training time=0.45s (20.86%) |Others=0.10 (4.81%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6676|ppo_ep: 1|act_loss: 0.00400543212890625|cri_loss: 0.00254058837890625|unsuper_loss: 0.0
+average reward score: 5.6875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.12%) |Training time=0.45s (21.22%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6677|ppo_ep: 1|act_loss: 0.00536346435546875|cri_loss: 0.0029087066650390625|unsuper_loss: 0.0
+average reward score: 5.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.21%) |Training time=0.45s (21.09%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6678|ppo_ep: 1|act_loss: 0.0085906982421875|cri_loss: 0.004543304443359375|unsuper_loss: 0.0
+average reward score: 6.25
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.34%) |Training time=0.48s (20.36%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.54
+[2023-04-14 12:52:33,771] [INFO] [logging.py:96:log_dist] [Rank 0] step=6680, skipped=85, lr=[9.578391801772933e-07, 9.578391801772933e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:52:33,790] [INFO] [timer.py:199:stop] epoch=0/micro_step=6680/global_step=6680, RunningAvgSamplesPerSec=105.51922652617614, CurrSamplesPerSec=109.9554075841187, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:52:33,882] [INFO] [logging.py:96:log_dist] [Rank 0] step=6680, skipped=112, lr=[5.119386121734576e-07, 5.119386121734576e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6679|ppo_ep: 1|act_loss: -0.0096435546875|cri_loss: -0.0047149658203125|unsuper_loss: 0.0
+average reward score: 6.37109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.19%) |Training time=0.45s (21.15%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6680|ppo_ep: 1|act_loss: -0.0172576904296875|cri_loss: -0.00783538818359375|unsuper_loss: 0.0
+average reward score: 5.3125
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.22%) |Training time=0.45s (21.08%) |Others=0.10 (4.70%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6681|ppo_ep: 1|act_loss: -0.0127716064453125|cri_loss: -0.006191253662109375|unsuper_loss: 0.0
+average reward score: 5.703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.88%) |Training time=0.46s (21.44%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6682|ppo_ep: 1|act_loss: -0.00936126708984375|cri_loss: -0.0045166015625|unsuper_loss: 0.0
+average reward score: 4.59375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.16%) |Training time=0.45s (21.14%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6683|ppo_ep: 1|act_loss: -0.0095062255859375|cri_loss: -0.003978729248046875|unsuper_loss: 0.0
+average reward score: 5.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.46s (21.29%) |Others=0.11 (4.89%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6684|ppo_ep: 1|act_loss: 0.025115966796875|cri_loss: 0.012786865234375|unsuper_loss: 0.0
+average reward score: 5.875
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.62s (71.17%) |Training time=0.56s (24.41%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.07 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6685|ppo_ep: 1|act_loss: -0.01519775390625|cri_loss: -0.00749969482421875|unsuper_loss: 0.0
+average reward score: 5.41015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.73%) |Training time=0.45s (21.13%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6686|ppo_ep: 1|act_loss: -0.0013332366943359375|cri_loss: -0.0005702972412109375|unsuper_loss: 0.0
+average reward score: 5.21875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.80%) |Training time=0.46s (21.52%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6687|ppo_ep: 1|act_loss: -0.019500732421875|cri_loss: -0.00909423828125|unsuper_loss: 0.0
+average reward score: 5.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.89%) |Training time=0.46s (21.41%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6688|ppo_ep: 1|act_loss: -0.01041412353515625|cri_loss: -0.005092620849609375|unsuper_loss: 0.0
+average reward score: 5.38671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.06%) |Training time=0.45s (21.25%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.54
+[2023-04-14 12:52:55,313] [INFO] [logging.py:96:log_dist] [Rank 0] step=6690, skipped=85, lr=[9.467589978251612e-07, 9.467589978251612e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:52:55,331] [INFO] [timer.py:199:stop] epoch=0/micro_step=6690/global_step=6690, RunningAvgSamplesPerSec=105.5206408271309, CurrSamplesPerSec=109.22053367771504, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:52:55,424] [INFO] [logging.py:96:log_dist] [Rank 0] step=6690, skipped=112, lr=[5.061175920747744e-07, 5.061175920747744e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6689|ppo_ep: 1|act_loss: -0.0106201171875|cri_loss: -0.00505828857421875|unsuper_loss: 0.0
+average reward score: 5.125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.97%) |Training time=0.46s (21.36%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6690|ppo_ep: 1|act_loss: -0.00559234619140625|cri_loss: -0.00270843505859375|unsuper_loss: 0.0
+average reward score: 6.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.88%) |Training time=0.46s (21.42%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6691|ppo_ep: 1|act_loss: -0.0083770751953125|cri_loss: -0.004062652587890625|unsuper_loss: 0.0
+average reward score: 5.73828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.86%) |Training time=0.46s (21.45%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6692|ppo_ep: 1|act_loss: -0.00913238525390625|cri_loss: -0.0028133392333984375|unsuper_loss: 0.0
+average reward score: 5.828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.00%) |Training time=0.45s (21.32%) |Others=0.10 (4.68%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6693|ppo_ep: 1|act_loss: -0.0193328857421875|cri_loss: -0.0092010498046875|unsuper_loss: 0.0
+average reward score: 5.26171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.62%) |Training time=0.46s (21.62%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6694|ppo_ep: 1|act_loss: -0.01210784912109375|cri_loss: -0.00600433349609375|unsuper_loss: 0.0
+average reward score: 6.19140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.59%) |Training time=0.50s (21.05%) |Others=0.10 (4.35%)|CurSamplesPerSec=13.60 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6695|ppo_ep: 1|act_loss: 0.0144500732421875|cri_loss: 0.007442474365234375|unsuper_loss: 0.0
+average reward score: 4.94140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.01%) |Training time=0.46s (21.31%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6696|ppo_ep: 1|act_loss: 0.005863189697265625|cri_loss: 0.003330230712890625|unsuper_loss: 0.0
+average reward score: 5.046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.15%) |Training time=0.45s (21.15%) |Others=0.10 (4.70%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6697|ppo_ep: 1|act_loss: -0.009918212890625|cri_loss: -0.00487518310546875|unsuper_loss: 0.0
+average reward score: 5.25390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.06%) |Training time=0.45s (21.25%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6698|ppo_ep: 1|act_loss: 0.02923583984375|cri_loss: 0.01513671875|unsuper_loss: 0.0
+average reward score: 5.6796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.80%) |Training time=0.46s (21.55%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.54
+[2023-04-14 12:53:16,902] [INFO] [logging.py:96:log_dist] [Rank 0] step=6700, skipped=85, lr=[9.357363003604284e-07, 9.357363003604284e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:53:16,920] [INFO] [timer.py:199:stop] epoch=0/micro_step=6700/global_step=6700, RunningAvgSamplesPerSec=105.52344625762034, CurrSamplesPerSec=109.05564597185241, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:53:17,004] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+[2023-04-14 12:53:17,005] [INFO] [logging.py:96:log_dist] [Rank 0] step=6700, skipped=113, lr=[5.009039403318924e-07, 5.009039403318924e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6699|ppo_ep: 1|act_loss: -0.01666259765625|cri_loss: -0.00447845458984375|unsuper_loss: 0.0
+average reward score: 4.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.36%) |Training time=0.46s (21.45%) |Others=0.09 (4.19%)|CurSamplesPerSec=15.06 |AvgSamplesPerSec=14.54
+[2023-04-14 12:53:19,135] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 6700|ppo_ep: 1|act_loss: 0.007534027099609375|cri_loss: 0.003940582275390625|unsuper_loss: 0.0
+average reward score: 5.96875
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.21%) |Training time=0.46s (21.58%) |Others=0.09 (4.20%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6701|ppo_ep: 1|act_loss: -0.0025157928466796875|cri_loss: -0.0008792877197265625|unsuper_loss: 0.0
+average reward score: 5.5234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.04%) |Training time=0.46s (21.30%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6702|ppo_ep: 1|act_loss: 0.00467681884765625|cri_loss: 0.002513885498046875|unsuper_loss: 0.0
+average reward score: 5.2734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.84%) |Training time=0.46s (21.48%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6703|ppo_ep: 1|act_loss: 0.0185394287109375|cri_loss: 0.0096435546875|unsuper_loss: 0.0
+average reward score: 5.203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.01%) |Training time=0.46s (21.30%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6704|ppo_ep: 1|act_loss: 0.00250244140625|cri_loss: 0.0015621185302734375|unsuper_loss: 0.0
+average reward score: 5.515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.34%) |Training time=0.45s (20.96%) |Others=0.10 (4.69%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6705|ppo_ep: 1|act_loss: -0.0096435546875|cri_loss: -0.0043487548828125|unsuper_loss: 0.0
+average reward score: 4.9296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.16%) |Training time=0.45s (21.15%) |Others=0.10 (4.68%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6706|ppo_ep: 1|act_loss: 0.00850677490234375|cri_loss: 0.0045928955078125|unsuper_loss: 0.0
+average reward score: 5.43359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.45%) |Training time=0.44s (20.85%) |Others=0.10 (4.70%)|CurSamplesPerSec=15.03 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6707|ppo_ep: 1|act_loss: -0.016876220703125|cri_loss: -0.008056640625|unsuper_loss: 0.0
+average reward score: 5.56640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.94%) |Training time=0.46s (21.39%) |Others=0.10 (4.67%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6708|ppo_ep: 1|act_loss: -0.020416259765625|cri_loss: -0.0099334716796875|unsuper_loss: 0.0
+average reward score: 5.32421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.13%) |Training time=0.52s (23.65%) |Others=0.12 (5.22%)|CurSamplesPerSec=14.41 |AvgSamplesPerSec=14.54
+[2023-04-14 12:53:38,530] [INFO] [logging.py:96:log_dist] [Rank 0] step=6710, skipped=85, lr=[9.247712511660617e-07, 9.247712511660617e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:53:38,547] [INFO] [timer.py:199:stop] epoch=0/micro_step=6710/global_step=6710, RunningAvgSamplesPerSec=105.52877706520603, CurrSamplesPerSec=108.05545040370528, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:53:38,640] [INFO] [logging.py:96:log_dist] [Rank 0] step=6710, skipped=114, lr=[4.957142900498335e-07, 4.957142900498335e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6709|ppo_ep: 1|act_loss: -0.00634765625|cri_loss: -0.0029582977294921875|unsuper_loss: 0.0
+average reward score: 5.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.11%) |Training time=0.46s (19.61%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6710|ppo_ep: 1|act_loss: -0.012725830078125|cri_loss: -0.0059661865234375|unsuper_loss: 0.0
+average reward score: 5.39453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.81%) |Training time=0.44s (20.52%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6711|ppo_ep: 1|act_loss: 0.0023860931396484375|cri_loss: 0.0013742446899414062|unsuper_loss: 0.0
+average reward score: 5.5234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.72%) |Training time=0.44s (20.61%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6712|ppo_ep: 1|act_loss: 0.0177459716796875|cri_loss: 0.00939178466796875|unsuper_loss: 0.0
+average reward score: 5.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (20.88%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6713|ppo_ep: 1|act_loss: 0.0116729736328125|cri_loss: 0.006237030029296875|unsuper_loss: 0.0
+average reward score: 5.03515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.73%) |Training time=0.44s (20.54%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6714|ppo_ep: 1|act_loss: 0.0052642822265625|cri_loss: 0.0027484893798828125|unsuper_loss: 0.0
+average reward score: 5.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.03%) |Training time=0.46s (20.56%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.15 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6715|ppo_ep: 1|act_loss: -0.00588226318359375|cri_loss: -0.002285003662109375|unsuper_loss: 0.0
+average reward score: 5.81640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.57%) |Training time=0.44s (20.76%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6716|ppo_ep: 1|act_loss: 0.007537841796875|cri_loss: 0.00385284423828125|unsuper_loss: 0.0
+average reward score: 6.08984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.76%) |Training time=0.44s (20.56%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6717|ppo_ep: 1|act_loss: 0.00479888916015625|cri_loss: 0.00278472900390625|unsuper_loss: 0.0
+average reward score: 5.68359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.42%) |Training time=0.45s (20.93%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6718|ppo_ep: 1|act_loss: 0.0034942626953125|cri_loss: 0.0019989013671875|unsuper_loss: 0.0
+average reward score: 5.24609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.10%) |Training time=0.45s (21.22%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.54
+[2023-04-14 12:54:00,080] [INFO] [logging.py:96:log_dist] [Rank 0] step=6720, skipped=85, lr=[9.138640127705436e-07, 9.138640127705436e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:54:00,098] [INFO] [timer.py:199:stop] epoch=0/micro_step=6720/global_step=6720, RunningAvgSamplesPerSec=105.53805823334427, CurrSamplesPerSec=109.25004944906014, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:54:00,190] [INFO] [logging.py:96:log_dist] [Rank 0] step=6720, skipped=114, lr=[4.899762375939118e-07, 4.899762375939118e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6719|ppo_ep: 1|act_loss: -0.02252197265625|cri_loss: -0.01092529296875|unsuper_loss: 0.0
+average reward score: 5.78515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.03%) |Training time=0.46s (21.31%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6720|ppo_ep: 1|act_loss: 0.0220947265625|cri_loss: 0.01131439208984375|unsuper_loss: 0.0
+average reward score: 6.6484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.86%) |Training time=0.46s (21.47%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6721|ppo_ep: 1|act_loss: -0.0132293701171875|cri_loss: -0.00577545166015625|unsuper_loss: 0.0
+average reward score: 5.41796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.69%) |Training time=0.46s (21.65%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6722|ppo_ep: 1|act_loss: -0.018951416015625|cri_loss: -0.00917816162109375|unsuper_loss: 0.0
+average reward score: 4.51171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.37%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6723|ppo_ep: 1|act_loss: -0.013641357421875|cri_loss: -0.006664276123046875|unsuper_loss: 0.0
+average reward score: 5.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.92%) |Training time=0.46s (21.42%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6724|ppo_ep: 1|act_loss: -0.0198211669921875|cri_loss: -0.009765625|unsuper_loss: 0.0
+average reward score: 5.37890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.04%) |Training time=0.48s (20.65%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6725|ppo_ep: 1|act_loss: 0.004795074462890625|cri_loss: 0.0027618408203125|unsuper_loss: 0.0
+average reward score: 5.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.46s (21.40%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6726|ppo_ep: 1|act_loss: 0.00786590576171875|cri_loss: 0.004146575927734375|unsuper_loss: 0.0
+average reward score: 4.609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.02%) |Training time=0.46s (21.30%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6727|ppo_ep: 1|act_loss: -0.0087127685546875|cri_loss: -0.00384521484375|unsuper_loss: 0.0
+average reward score: 5.765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.69%) |Training time=0.46s (21.66%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6728|ppo_ep: 1|act_loss: 0.0158233642578125|cri_loss: 0.00830841064453125|unsuper_loss: 0.0
+average reward score: 5.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.98%) |Training time=0.46s (21.35%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.54
+[2023-04-14 12:54:21,710] [INFO] [logging.py:96:log_dist] [Rank 0] step=6730, skipped=85, lr=[9.03014746845457e-07, 9.03014746845457e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:54:21,729] [INFO] [timer.py:199:stop] epoch=0/micro_step=6730/global_step=6730, RunningAvgSamplesPerSec=105.53983507346179, CurrSamplesPerSec=108.58169774427452, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:54:21,821] [INFO] [logging.py:96:log_dist] [Rank 0] step=6730, skipped=114, lr=[4.842679785390672e-07, 4.842679785390672e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6729|ppo_ep: 1|act_loss: 0.0019283294677734375|cri_loss: 0.0010194778442382812|unsuper_loss: 0.0
+average reward score: 5.609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.93%) |Training time=0.46s (21.40%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6730|ppo_ep: 1|act_loss: -0.023345947265625|cri_loss: -0.01141357421875|unsuper_loss: 0.0
+average reward score: 5.7265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.99%) |Training time=0.46s (21.32%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6731|ppo_ep: 1|act_loss: 0.00859832763671875|cri_loss: 0.00452423095703125|unsuper_loss: 0.0
+average reward score: 5.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.80%) |Training time=0.46s (21.52%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6732|ppo_ep: 1|act_loss: -0.002971649169921875|cri_loss: -0.0011320114135742188|unsuper_loss: 0.0
+average reward score: 5.890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.45s (21.19%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6733|ppo_ep: 1|act_loss: -0.011474609375|cri_loss: -0.00536346435546875|unsuper_loss: 0.0
+average reward score: 5.203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.81%) |Training time=0.46s (21.52%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6734|ppo_ep: 1|act_loss: -0.00032138824462890625|cri_loss: -0.00010442733764648438|unsuper_loss: 0.0
+average reward score: 5.109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.62%) |Training time=0.47s (21.72%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6735|ppo_ep: 1|act_loss: -0.010223388671875|cri_loss: -0.004852294921875|unsuper_loss: 0.0
+average reward score: 4.62890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.12%) |Training time=0.45s (21.21%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6736|ppo_ep: 1|act_loss: 0.0158538818359375|cri_loss: 0.0081787109375|unsuper_loss: 0.0
+average reward score: 4.828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.90%) |Training time=0.46s (21.44%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6737|ppo_ep: 1|act_loss: -0.01104736328125|cri_loss: -0.005374908447265625|unsuper_loss: 0.0
+average reward score: 5.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.89%) |Training time=0.46s (21.44%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6738|ppo_ep: 1|act_loss: 0.015716552734375|cri_loss: 0.008941650390625|unsuper_loss: 0.0
+average reward score: 5.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.89%) |Training time=0.46s (21.44%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.54
+[2023-04-14 12:54:43,139] [INFO] [logging.py:96:log_dist] [Rank 0] step=6740, skipped=85, lr=[8.922236142030963e-07, 8.922236142030963e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:54:43,165] [INFO] [timer.py:199:stop] epoch=0/micro_step=6740/global_step=6740, RunningAvgSamplesPerSec=105.54288850018418, CurrSamplesPerSec=105.9612526723656, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:54:43,311] [INFO] [logging.py:96:log_dist] [Rank 0] step=6740, skipped=114, lr=[4.785895974954696e-07, 4.785895974954696e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6739|ppo_ep: 1|act_loss: 0.042236328125|cri_loss: 0.021759033203125|unsuper_loss: 0.0
+average reward score: 5.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.74%) |Training time=0.47s (21.34%) |Others=0.15 (6.92%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6740|ppo_ep: 1|act_loss: 0.003856658935546875|cri_loss: 0.0022125244140625|unsuper_loss: 0.0
+average reward score: 4.85546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.02%) |Training time=0.47s (20.50%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.08 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6741|ppo_ep: 1|act_loss: -0.006992340087890625|cri_loss: -0.0033550262451171875|unsuper_loss: 0.0
+average reward score: 5.44140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.82%) |Training time=0.47s (21.49%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6742|ppo_ep: 1|act_loss: 0.002826690673828125|cri_loss: 0.0014934539794921875|unsuper_loss: 0.0
+average reward score: 5.0078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.33%) |Training time=0.43s (19.96%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6743|ppo_ep: 1|act_loss: 0.0209503173828125|cri_loss: 0.01145172119140625|unsuper_loss: 0.0
+average reward score: 5.35546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.67s (75.40%) |Training time=0.44s (19.95%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.46 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6744|ppo_ep: 1|act_loss: -0.01523590087890625|cri_loss: -0.00753021240234375|unsuper_loss: 0.0
+average reward score: 6.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.75s (76.03%) |Training time=0.45s (19.51%) |Others=0.10 (4.46%)|CurSamplesPerSec=13.92 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6745|ppo_ep: 1|act_loss: 0.063232421875|cri_loss: 0.03399658203125|unsuper_loss: 0.0
+average reward score: 5.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.24%) |Training time=0.44s (20.04%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6746|ppo_ep: 1|act_loss: 0.0103607177734375|cri_loss: 0.0072021484375|unsuper_loss: 0.0
+average reward score: 5.68359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.31%) |Training time=0.44s (19.99%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6747|ppo_ep: 1|act_loss: -0.0131072998046875|cri_loss: -0.006072998046875|unsuper_loss: 0.0
+average reward score: 5.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.35%) |Training time=0.48s (21.81%) |Others=0.11 (4.83%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6748|ppo_ep: 1|act_loss: 0.0301055908203125|cri_loss: 0.01544952392578125|unsuper_loss: 0.0
+average reward score: 5.671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.14%) |Training time=0.48s (22.24%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.54
+[2023-04-14 12:55:05,236] [INFO] [logging.py:96:log_dist] [Rank 0] step=6750, skipped=85, lr=[8.81490774794079e-07, 8.81490774794079e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:55:05,255] [INFO] [timer.py:199:stop] epoch=0/micro_step=6750/global_step=6750, RunningAvgSamplesPerSec=105.54850591402027, CurrSamplesPerSec=105.97112470885476, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:55:05,348] [INFO] [logging.py:96:log_dist] [Rank 0] step=6750, skipped=114, lr=[4.729411786304247e-07, 4.729411786304247e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6749|ppo_ep: 1|act_loss: -0.0013751983642578125|cri_loss: -0.00048041343688964844|unsuper_loss: 0.0
+average reward score: 5.23828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.91%) |Training time=0.47s (21.48%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6750|ppo_ep: 1|act_loss: 0.0038166046142578125|cri_loss: 0.002071380615234375|unsuper_loss: 0.0
+average reward score: 5.60546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.95%) |Training time=0.47s (21.46%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6751|ppo_ep: 1|act_loss: -0.01177978515625|cri_loss: -0.005767822265625|unsuper_loss: 0.0
+average reward score: 5.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.35%) |Training time=0.46s (21.04%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6752|ppo_ep: 1|act_loss: 0.0181884765625|cri_loss: 0.009521484375|unsuper_loss: 0.0
+average reward score: 5.59765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.21%) |Training time=0.46s (21.15%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6753|ppo_ep: 1|act_loss: -0.011199951171875|cri_loss: -0.005107879638671875|unsuper_loss: 0.0
+average reward score: 5.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.06%) |Training time=0.47s (21.32%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6754|ppo_ep: 1|act_loss: -0.00250244140625|cri_loss: -0.0010585784912109375|unsuper_loss: 0.0
+average reward score: 5.14453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.30%) |Training time=0.45s (19.38%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6755|ppo_ep: 1|act_loss: -0.004077911376953125|cri_loss: -0.0019178390502929688|unsuper_loss: 0.0
+average reward score: 5.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.38%) |Training time=0.43s (19.83%) |Others=0.10 (4.79%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6756|ppo_ep: 1|act_loss: -0.00409698486328125|cri_loss: -0.0018758773803710938|unsuper_loss: 0.0
+average reward score: 4.796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.28%) |Training time=0.46s (21.02%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6757|ppo_ep: 1|act_loss: 0.0287628173828125|cri_loss: 0.0147857666015625|unsuper_loss: 0.0
+average reward score: 4.98046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.43%) |Training time=0.46s (21.01%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6758|ppo_ep: 1|act_loss: 0.03680419921875|cri_loss: 0.018829345703125|unsuper_loss: 0.0
+average reward score: 5.3515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.02%) |Training time=0.47s (21.39%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.54
+[2023-04-14 12:55:27,208] [INFO] [logging.py:96:log_dist] [Rank 0] step=6760, skipped=85, lr=[8.708163877049794e-07, 8.708163877049794e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:55:27,226] [INFO] [timer.py:199:stop] epoch=0/micro_step=6760/global_step=6760, RunningAvgSamplesPerSec=105.55200119918464, CurrSamplesPerSec=105.85219716823623, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:55:27,322] [INFO] [logging.py:96:log_dist] [Rank 0] step=6760, skipped=114, lr=[4.673228056671253e-07, 4.673228056671253e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6759|ppo_ep: 1|act_loss: 0.0150146484375|cri_loss: 0.00763702392578125|unsuper_loss: 0.0
+average reward score: 5.51171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.88%) |Training time=0.47s (21.37%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6760|ppo_ep: 1|act_loss: -0.008392333984375|cri_loss: -0.00408172607421875|unsuper_loss: 0.0
+average reward score: 5.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.84%) |Training time=0.47s (21.49%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6761|ppo_ep: 1|act_loss: 0.03912353515625|cri_loss: 0.020965576171875|unsuper_loss: 0.0
+average reward score: 5.9609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.82%) |Training time=0.47s (21.58%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6762|ppo_ep: 1|act_loss: -0.00550079345703125|cri_loss: -0.0025272369384765625|unsuper_loss: 0.0
+average reward score: 5.44921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.95%) |Training time=0.47s (21.45%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6763|ppo_ep: 1|act_loss: -0.0089874267578125|cri_loss: -0.00429534912109375|unsuper_loss: 0.0
+average reward score: 4.921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.84%) |Training time=0.47s (21.58%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6764|ppo_ep: 1|act_loss: 0.06707763671875|cri_loss: 0.035980224609375|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.72s (74.51%) |Training time=0.48s (20.82%) |Others=0.11 (4.67%)|CurSamplesPerSec=13.86 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6765|ppo_ep: 1|act_loss: 0.0087127685546875|cri_loss: 0.004486083984375|unsuper_loss: 0.0
+average reward score: 5.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.60%) |Training time=0.48s (21.82%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6766|ppo_ep: 1|act_loss: 0.017425537109375|cri_loss: 0.00891876220703125|unsuper_loss: 0.0
+average reward score: 5.7421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.62%) |Training time=0.48s (21.79%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6767|ppo_ep: 1|act_loss: -0.010345458984375|cri_loss: -0.004726409912109375|unsuper_loss: 0.0
+average reward score: 5.23046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.66%) |Training time=0.47s (21.75%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6768|ppo_ep: 1|act_loss: -0.04034423828125|cri_loss: -0.01947021484375|unsuper_loss: 0.0
+average reward score: 4.66015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.60%) |Training time=0.48s (21.80%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.54
+[2023-04-14 12:55:49,293] [INFO] [logging.py:96:log_dist] [Rank 0] step=6770, skipped=85, lr=[8.602006111559606e-07, 8.602006111559606e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:55:49,311] [INFO] [timer.py:199:stop] epoch=0/micro_step=6770/global_step=6770, RunningAvgSamplesPerSec=105.54869605550402, CurrSamplesPerSec=104.80069649682399, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:55:49,407] [INFO] [logging.py:96:log_dist] [Rank 0] step=6770, skipped=114, lr=[4.6173456188341234e-07, 4.6173456188341234e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6769|ppo_ep: 1|act_loss: -0.00492095947265625|cri_loss: -0.002368927001953125|unsuper_loss: 0.0
+average reward score: 5.3515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.57%) |Training time=0.47s (20.02%) |Others=0.10 (4.42%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6770|ppo_ep: 1|act_loss: -0.00428009033203125|cri_loss: -0.0020275115966796875|unsuper_loss: 0.0
+average reward score: 5.9296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.16%) |Training time=0.46s (21.19%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6771|ppo_ep: 1|act_loss: -0.02099609375|cri_loss: -0.010284423828125|unsuper_loss: 0.0
+average reward score: 5.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.69%) |Training time=0.47s (21.69%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6772|ppo_ep: 1|act_loss: 0.03363037109375|cri_loss: 0.0172271728515625|unsuper_loss: 0.0
+average reward score: 5.56640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.39%) |Training time=0.46s (20.97%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6773|ppo_ep: 1|act_loss: -0.00351715087890625|cri_loss: -0.0007381439208984375|unsuper_loss: 0.0
+average reward score: 5.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.19%) |Training time=0.46s (20.40%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.07 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6774|ppo_ep: 1|act_loss: -0.0094451904296875|cri_loss: -0.00452423095703125|unsuper_loss: 0.0
+average reward score: 5.49609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.07%) |Training time=0.46s (21.24%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6775|ppo_ep: 1|act_loss: 0.01076507568359375|cri_loss: 0.00554656982421875|unsuper_loss: 0.0
+average reward score: 5.859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.46s (21.18%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6776|ppo_ep: 1|act_loss: -0.003173828125|cri_loss: -0.001331329345703125|unsuper_loss: 0.0
+average reward score: 5.125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.03%) |Training time=0.46s (21.34%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6777|ppo_ep: 1|act_loss: -0.00521087646484375|cri_loss: -0.0022106170654296875|unsuper_loss: 0.0
+average reward score: 5.23046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.36%) |Training time=0.48s (22.05%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6778|ppo_ep: 1|act_loss: 0.01116943359375|cri_loss: 0.005718231201171875|unsuper_loss: 0.0
+average reward score: 5.45703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.12%) |Training time=0.46s (21.25%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.54
+[2023-04-14 12:56:11,135] [INFO] [logging.py:96:log_dist] [Rank 0] step=6780, skipped=85, lr=[8.496436024984428e-07, 8.496436024984428e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:56:11,153] [INFO] [timer.py:199:stop] epoch=0/micro_step=6780/global_step=6780, RunningAvgSamplesPerSec=105.54960072512817, CurrSamplesPerSec=107.5528061003715, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:56:11,246] [INFO] [logging.py:96:log_dist] [Rank 0] step=6780, skipped=114, lr=[4.5617653011053977e-07, 4.5617653011053977e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6779|ppo_ep: 1|act_loss: 0.0100555419921875|cri_loss: 0.00524139404296875|unsuper_loss: 0.0
+average reward score: 5.98828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.28%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6780|ppo_ep: 1|act_loss: -0.01525115966796875|cri_loss: -0.007476806640625|unsuper_loss: 0.0
+average reward score: 5.8046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.89%) |Training time=0.47s (21.48%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6781|ppo_ep: 1|act_loss: 0.020843505859375|cri_loss: 0.01080322265625|unsuper_loss: 0.0
+average reward score: 4.92578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.01%) |Training time=0.46s (21.36%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6782|ppo_ep: 1|act_loss: -0.04522705078125|cri_loss: -0.022186279296875|unsuper_loss: 0.0
+average reward score: 5.19921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.03%) |Training time=0.46s (21.36%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6783|ppo_ep: 1|act_loss: -0.006313323974609375|cri_loss: -0.0029087066650390625|unsuper_loss: 0.0
+average reward score: 5.66015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.80%) |Training time=0.47s (21.59%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6784|ppo_ep: 1|act_loss: -0.0184173583984375|cri_loss: -0.00901031494140625|unsuper_loss: 0.0
+average reward score: 4.83203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.53%) |Training time=0.54s (23.98%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.30 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6785|ppo_ep: 1|act_loss: -0.0084991455078125|cri_loss: -0.004001617431640625|unsuper_loss: 0.0
+average reward score: 5.36328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.09%) |Training time=0.46s (21.29%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6786|ppo_ep: 1|act_loss: -0.0009622573852539062|cri_loss: -0.00033354759216308594|unsuper_loss: 0.0
+average reward score: 5.6640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.11%) |Training time=0.46s (21.28%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6787|ppo_ep: 1|act_loss: -0.005367279052734375|cri_loss: -0.002521514892578125|unsuper_loss: 0.0
+average reward score: 5.72265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.90%) |Training time=0.47s (21.47%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6788|ppo_ep: 1|act_loss: -0.00386810302734375|cri_loss: -0.001888275146484375|unsuper_loss: 0.0
+average reward score: 5.3828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.92%) |Training time=0.46s (21.44%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.54
+[2023-04-14 12:56:32,879] [INFO] [logging.py:96:log_dist] [Rank 0] step=6790, skipped=85, lr=[8.391455182127607e-07, 8.391455182127607e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:56:32,898] [INFO] [timer.py:199:stop] epoch=0/micro_step=6790/global_step=6790, RunningAvgSamplesPerSec=105.54857659001864, CurrSamplesPerSec=106.04438580204238, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:56:32,990] [INFO] [logging.py:96:log_dist] [Rank 0] step=6790, skipped=114, lr=[4.506487927319475e-07, 4.506487927319475e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6789|ppo_ep: 1|act_loss: 0.0081634521484375|cri_loss: 0.00420379638671875|unsuper_loss: 0.0
+average reward score: 5.7109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.43%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6790|ppo_ep: 1|act_loss: -0.0150146484375|cri_loss: -0.006427764892578125|unsuper_loss: 0.0
+average reward score: 5.890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.60%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6791|ppo_ep: 1|act_loss: -0.012847900390625|cri_loss: -0.006328582763671875|unsuper_loss: 0.0
+average reward score: 6.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.05%) |Training time=0.46s (21.34%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6792|ppo_ep: 1|act_loss: 0.02069091796875|cri_loss: 0.01058197021484375|unsuper_loss: 0.0
+average reward score: 4.94140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.95%) |Training time=0.46s (21.38%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6793|ppo_ep: 1|act_loss: 0.00525665283203125|cri_loss: 0.003208160400390625|unsuper_loss: 0.0
+average reward score: 5.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.58%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6794|ppo_ep: 1|act_loss: 0.0136566162109375|cri_loss: 0.0073394775390625|unsuper_loss: 0.0
+average reward score: 5.03125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.46s (21.39%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6795|ppo_ep: 1|act_loss: -0.0012187957763671875|cri_loss: -0.00019693374633789062|unsuper_loss: 0.0
+average reward score: 6.234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.65%) |Training time=0.47s (21.62%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6796|ppo_ep: 1|act_loss: -0.006439208984375|cri_loss: -0.003108978271484375|unsuper_loss: 0.0
+average reward score: 5.4296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.02%) |Training time=0.46s (21.38%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6797|ppo_ep: 1|act_loss: 0.0185546875|cri_loss: 0.0095672607421875|unsuper_loss: 0.0
+average reward score: 5.5703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.83%) |Training time=0.44s (20.52%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6798|ppo_ep: 1|act_loss: -0.00604248046875|cri_loss: -0.0026149749755859375|unsuper_loss: 0.0
+average reward score: 4.9296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.13%) |Training time=0.46s (21.24%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.54
+[2023-04-14 12:56:54,707] [INFO] [logging.py:96:log_dist] [Rank 0] step=6800, skipped=85, lr=[8.2870651390585e-07, 8.2870651390585e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:56:54,726] [INFO] [timer.py:199:stop] epoch=0/micro_step=6800/global_step=6800, RunningAvgSamplesPerSec=105.55108170847568, CurrSamplesPerSec=114.21317600887035, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:56:54,818] [INFO] [logging.py:96:log_dist] [Rank 0] step=6800, skipped=114, lr=[4.4515143168203685e-07, 4.4515143168203685e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6799|ppo_ep: 1|act_loss: -0.0230712890625|cri_loss: -0.0112762451171875|unsuper_loss: 0.0
+average reward score: 5.48046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.55%) |Training time=0.44s (19.14%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6800|ppo_ep: 1|act_loss: 0.00914764404296875|cri_loss: 0.0051727294921875|unsuper_loss: 0.0
+average reward score: 5.55078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.80%) |Training time=0.44s (20.53%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.54
+[2023-04-14 12:56:59,114] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 6801|ppo_ep: 1|act_loss: -0.00785064697265625|cri_loss: -0.0037937164306640625|unsuper_loss: 0.0
+average reward score: 5.546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.44%) |Training time=0.44s (20.36%) |Others=0.09 (4.20%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.54
+[2023-04-14 12:57:01,326] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 6802|ppo_ep: 1|act_loss: -0.00553131103515625|cri_loss: -0.002628326416015625|unsuper_loss: 0.0
+average reward score: 5.078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.79%) |Training time=0.46s (20.84%) |Others=0.12 (5.38%)|CurSamplesPerSec=14.43 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6803|ppo_ep: 1|act_loss: -0.00485992431640625|cri_loss: -0.0023365020751953125|unsuper_loss: 0.0
+average reward score: 5.484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.59%) |Training time=0.45s (20.76%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6804|ppo_ep: 1|act_loss: 0.05767822265625|cri_loss: 0.029541015625|unsuper_loss: 0.0
+average reward score: 5.078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.40%) |Training time=0.45s (20.95%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6805|ppo_ep: 1|act_loss: -0.0240478515625|cri_loss: -0.01161956787109375|unsuper_loss: 0.0
+average reward score: 5.125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.54%) |Training time=0.45s (20.84%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6806|ppo_ep: 1|act_loss: -0.010101318359375|cri_loss: -0.004650115966796875|unsuper_loss: 0.0
+average reward score: 6.125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (20.97%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6807|ppo_ep: 1|act_loss: 0.0011091232299804688|cri_loss: 0.0006418228149414062|unsuper_loss: 0.0
+average reward score: 5.83984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (20.94%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6808|ppo_ep: 1|act_loss: 0.019500732421875|cri_loss: 0.01010894775390625|unsuper_loss: 0.0
+average reward score: 4.984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.85%) |Training time=0.47s (21.53%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.54
+[2023-04-14 12:57:16,401] [INFO] [logging.py:96:log_dist] [Rank 0] step=6810, skipped=85, lr=[8.183267443089356e-07, 8.183267443089356e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:57:16,420] [INFO] [timer.py:199:stop] epoch=0/micro_step=6810/global_step=6810, RunningAvgSamplesPerSec=105.55701946349674, CurrSamplesPerSec=104.74279208024329, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:57:16,513] [INFO] [logging.py:96:log_dist] [Rank 0] step=6810, skipped=116, lr=[4.4077546857126923e-07, 4.4077546857126923e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6809|ppo_ep: 1|act_loss: -0.0061492919921875|cri_loss: -0.002872467041015625|unsuper_loss: 0.0
+average reward score: 5.6875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.83%) |Training time=0.47s (21.57%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6810|ppo_ep: 1|act_loss: 0.0145263671875|cri_loss: 0.00749969482421875|unsuper_loss: 0.0
+average reward score: 6.078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.02%) |Training time=0.46s (21.36%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6811|ppo_ep: 1|act_loss: 0.005298614501953125|cri_loss: 0.0028667449951171875|unsuper_loss: 0.0
+average reward score: 5.1015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.47%) |Training time=0.45s (20.87%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6812|ppo_ep: 1|act_loss: 0.030303955078125|cri_loss: 0.0157012939453125|unsuper_loss: 0.0
+average reward score: 5.83203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.30%) |Training time=0.45s (21.07%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6813|ppo_ep: 1|act_loss: 0.031890869140625|cri_loss: 0.0163726806640625|unsuper_loss: 0.0
+average reward score: 5.7109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.37%) |Training time=0.45s (20.98%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6814|ppo_ep: 1|act_loss: 0.00494384765625|cri_loss: 0.002788543701171875|unsuper_loss: 0.0
+average reward score: 5.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.78%) |Training time=0.48s (21.54%) |Others=0.13 (5.69%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6815|ppo_ep: 1|act_loss: -0.010528564453125|cri_loss: -0.004947662353515625|unsuper_loss: 0.0
+average reward score: 5.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.23%) |Training time=0.46s (21.11%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6816|ppo_ep: 1|act_loss: -0.0227203369140625|cri_loss: -0.01078033447265625|unsuper_loss: 0.0
+average reward score: 6.0078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.52%) |Training time=0.45s (20.84%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6817|ppo_ep: 1|act_loss: -0.007720947265625|cri_loss: -0.0037822723388671875|unsuper_loss: 0.0
+average reward score: 4.859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.42%) |Training time=0.45s (20.92%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6818|ppo_ep: 1|act_loss: 0.015472412109375|cri_loss: 0.00791168212890625|unsuper_loss: 0.0
+average reward score: 4.86328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.39%) |Training time=0.45s (20.92%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.54
+[2023-04-14 12:57:38,029] [INFO] [logging.py:96:log_dist] [Rank 0] step=6820, skipped=85, lr=[8.080063632752432e-07, 8.080063632752432e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:57:38,048] [INFO] [timer.py:199:stop] epoch=0/micro_step=6820/global_step=6820, RunningAvgSamplesPerSec=105.56231609549081, CurrSamplesPerSec=111.32460272236474, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:57:38,141] [INFO] [logging.py:96:log_dist] [Rank 0] step=6820, skipped=116, lr=[4.3533298994971135e-07, 4.3533298994971135e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6819|ppo_ep: 1|act_loss: 0.01313018798828125|cri_loss: 0.00687408447265625|unsuper_loss: 0.0
+average reward score: 5.67578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.90%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6820|ppo_ep: 1|act_loss: 0.002880096435546875|cri_loss: 0.0019025802612304688|unsuper_loss: 0.0
+average reward score: 4.97265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.41%) |Training time=0.45s (20.93%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6821|ppo_ep: 1|act_loss: 0.01995849609375|cri_loss: 0.01031494140625|unsuper_loss: 0.0
+average reward score: 5.06640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.46%) |Training time=0.45s (20.88%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6822|ppo_ep: 1|act_loss: 0.004322052001953125|cri_loss: 0.0023365020751953125|unsuper_loss: 0.0
+average reward score: 5.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.25%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6823|ppo_ep: 1|act_loss: 0.0030231475830078125|cri_loss: 0.001598358154296875|unsuper_loss: 0.0
+average reward score: 5.04296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.50%) |Training time=0.45s (20.84%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6824|ppo_ep: 1|act_loss: 0.0154876708984375|cri_loss: 0.00792694091796875|unsuper_loss: 0.0
+average reward score: 5.23046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.49%) |Training time=0.45s (20.85%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6825|ppo_ep: 1|act_loss: 0.0019969940185546875|cri_loss: 0.0011749267578125|unsuper_loss: 0.0
+average reward score: 5.6796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.33%) |Training time=0.45s (21.05%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6826|ppo_ep: 1|act_loss: 0.00228118896484375|cri_loss: 0.0012102127075195312|unsuper_loss: 0.0
+average reward score: 5.29296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.59%) |Training time=0.45s (20.75%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6827|ppo_ep: 1|act_loss: 0.0092926025390625|cri_loss: 0.004863739013671875|unsuper_loss: 0.0
+average reward score: 4.99609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.68%) |Training time=0.45s (20.67%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6828|ppo_ep: 1|act_loss: 0.01129150390625|cri_loss: 0.005802154541015625|unsuper_loss: 0.0
+average reward score: 4.984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.64%) |Training time=0.45s (20.71%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.54
+[2023-04-14 12:57:59,584] [INFO] [logging.py:96:log_dist] [Rank 0] step=6830, skipped=85, lr=[7.977455237777167e-07, 7.977455237777167e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:57:59,603] [INFO] [timer.py:199:stop] epoch=0/micro_step=6830/global_step=6830, RunningAvgSamplesPerSec=105.57015739655915, CurrSamplesPerSec=110.71612653627207, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:57:59,695] [INFO] [logging.py:96:log_dist] [Rank 0] step=6830, skipped=116, lr=[4.2992111467398014e-07, 4.2992111467398014e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6829|ppo_ep: 1|act_loss: 0.03900146484375|cri_loss: 0.021759033203125|unsuper_loss: 0.0
+average reward score: 5.11328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.47%) |Training time=0.45s (20.90%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6830|ppo_ep: 1|act_loss: -0.0240936279296875|cri_loss: -0.011871337890625|unsuper_loss: 0.0
+average reward score: 5.34765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.36%) |Training time=0.45s (19.30%) |Others=0.10 (4.34%)|CurSamplesPerSec=13.78 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6831|ppo_ep: 1|act_loss: -0.05914306640625|cri_loss: -0.0196075439453125|unsuper_loss: 0.0
+average reward score: 4.9765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.45s (20.71%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6832|ppo_ep: 1|act_loss: -0.03326416015625|cri_loss: -0.0150299072265625|unsuper_loss: 0.0
+average reward score: 5.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.63%) |Training time=0.45s (19.94%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.15 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6833|ppo_ep: 1|act_loss: 0.0269775390625|cri_loss: 0.0163726806640625|unsuper_loss: 0.0
+average reward score: 5.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.94%) |Training time=0.44s (20.43%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6834|ppo_ep: 1|act_loss: 0.03277587890625|cri_loss: 0.0167388916015625|unsuper_loss: 0.0
+average reward score: 5.078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.54%) |Training time=0.45s (20.83%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6835|ppo_ep: 1|act_loss: -0.0255889892578125|cri_loss: -0.01232147216796875|unsuper_loss: 0.0
+average reward score: 4.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.35%) |Training time=0.45s (21.01%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6836|ppo_ep: 1|act_loss: -0.007732391357421875|cri_loss: -0.003696441650390625|unsuper_loss: 0.0
+average reward score: 5.96875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.83%) |Training time=0.44s (20.51%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6837|ppo_ep: 1|act_loss: -0.0099334716796875|cri_loss: -0.004741668701171875|unsuper_loss: 0.0
+average reward score: 5.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.43%) |Training time=0.45s (20.93%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6838|ppo_ep: 1|act_loss: 0.00641632080078125|cri_loss: 0.0033054351806640625|unsuper_loss: 0.0
+average reward score: 5.671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.45%) |Training time=0.45s (20.91%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.54
+[2023-04-14 12:58:21,453] [INFO] [logging.py:96:log_dist] [Rank 0] step=6840, skipped=85, lr=[7.875443779067518e-07, 7.875443779067518e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:58:21,472] [INFO] [timer.py:199:stop] epoch=0/micro_step=6840/global_step=6840, RunningAvgSamplesPerSec=105.57848061468555, CurrSamplesPerSec=109.96874088494299, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:58:21,564] [INFO] [logging.py:96:log_dist] [Rank 0] step=6840, skipped=116, lr=[4.2453992296112384e-07, 4.2453992296112384e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6839|ppo_ep: 1|act_loss: 0.01873779296875|cri_loss: 0.00957489013671875|unsuper_loss: 0.0
+average reward score: 4.9765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.38%) |Training time=0.45s (20.99%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6840|ppo_ep: 1|act_loss: 0.0223388671875|cri_loss: 0.01148223876953125|unsuper_loss: 0.0
+average reward score: 5.9140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.43%) |Training time=0.45s (20.92%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6841|ppo_ep: 1|act_loss: 0.00296783447265625|cri_loss: 0.0015993118286132812|unsuper_loss: 0.0
+average reward score: 5.5
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.84%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6842|ppo_ep: 1|act_loss: -0.007354736328125|cri_loss: -0.003299713134765625|unsuper_loss: 0.0
+average reward score: 5.71484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.32%) |Training time=0.45s (21.04%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6843|ppo_ep: 1|act_loss: -0.006191253662109375|cri_loss: -0.0029296875|unsuper_loss: 0.0
+average reward score: 5.26171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.52%) |Training time=0.45s (20.82%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6844|ppo_ep: 1|act_loss: -0.024139404296875|cri_loss: -0.01123809814453125|unsuper_loss: 0.0
+average reward score: 4.97265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.42%) |Training time=0.45s (20.94%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6845|ppo_ep: 1|act_loss: -0.0137786865234375|cri_loss: -0.006679534912109375|unsuper_loss: 0.0
+average reward score: 6.1796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.55%) |Training time=0.45s (20.79%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6846|ppo_ep: 1|act_loss: 0.00154876708984375|cri_loss: 0.0009756088256835938|unsuper_loss: 0.0
+average reward score: 5.3359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.22%) |Training time=0.45s (19.35%) |Others=0.10 (4.42%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6847|ppo_ep: 1|act_loss: 0.0138397216796875|cri_loss: 0.00823974609375|unsuper_loss: 0.0
+average reward score: 5.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.40%) |Training time=0.51s (23.10%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6848|ppo_ep: 1|act_loss: 0.0116729736328125|cri_loss: 0.006023406982421875|unsuper_loss: 0.0
+average reward score: 5.3125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.76%) |Training time=0.42s (19.48%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.54
+[2023-04-14 12:58:43,264] [INFO] [logging.py:96:log_dist] [Rank 0] step=6850, skipped=85, lr=[7.774030768679357e-07, 7.774030768679357e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:58:43,283] [INFO] [timer.py:199:stop] epoch=0/micro_step=6850/global_step=6850, RunningAvgSamplesPerSec=105.58711633613972, CurrSamplesPerSec=132.29534435123225, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:58:43,375] [INFO] [logging.py:96:log_dist] [Rank 0] step=6850, skipped=116, lr=[4.191894945733832e-07, 4.191894945733832e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6849|ppo_ep: 1|act_loss: 0.0125579833984375|cri_loss: 0.00640869140625|unsuper_loss: 0.0
+average reward score: 5.6015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.68s (76.71%) |Training time=0.41s (18.73%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6850|ppo_ep: 1|act_loss: -0.00998687744140625|cri_loss: -0.004856109619140625|unsuper_loss: 0.0
+average reward score: 6.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.99%) |Training time=0.46s (21.31%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6851|ppo_ep: 1|act_loss: -0.0087127685546875|cri_loss: -0.004150390625|unsuper_loss: 0.0
+average reward score: 5.75390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.15%) |Training time=0.46s (21.19%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6852|ppo_ep: 1|act_loss: -0.015838623046875|cri_loss: -0.0067901611328125|unsuper_loss: 0.0
+average reward score: 5.6171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.00%) |Training time=0.46s (21.32%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6853|ppo_ep: 1|act_loss: 0.003696441650390625|cri_loss: 0.002101898193359375|unsuper_loss: 0.0
+average reward score: 5.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.38%) |Training time=0.46s (21.04%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6854|ppo_ep: 1|act_loss: -0.0147705078125|cri_loss: -0.0072479248046875|unsuper_loss: 0.0
+average reward score: 4.82421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.72%) |Training time=0.45s (20.60%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6855|ppo_ep: 1|act_loss: 0.0623779296875|cri_loss: 0.033233642578125|unsuper_loss: 0.0
+average reward score: 5.83984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.53%) |Training time=0.45s (20.87%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6856|ppo_ep: 1|act_loss: -0.020172119140625|cri_loss: -0.009246826171875|unsuper_loss: 0.0
+average reward score: 5.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.61%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6857|ppo_ep: 1|act_loss: 0.021636962890625|cri_loss: 0.0110321044921875|unsuper_loss: 0.0
+average reward score: 5.1875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.26%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6858|ppo_ep: 1|act_loss: 0.00704193115234375|cri_loss: 0.003887176513671875|unsuper_loss: 0.0
+average reward score: 4.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.09%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.54
+[2023-04-14 12:59:04,969] [INFO] [logging.py:96:log_dist] [Rank 0] step=6860, skipped=85, lr=[7.673217709798165e-07, 7.673217709798165e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:59:04,988] [INFO] [timer.py:199:stop] epoch=0/micro_step=6860/global_step=6860, RunningAvgSamplesPerSec=105.58874862976468, CurrSamplesPerSec=101.04542453423234, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:59:05,081] [INFO] [logging.py:96:log_dist] [Rank 0] step=6860, skipped=116, lr=[4.1386990881701476e-07, 4.1386990881701476e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6859|ppo_ep: 1|act_loss: -0.013885498046875|cri_loss: -0.00675201416015625|unsuper_loss: 0.0
+average reward score: 4.53515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.28%) |Training time=0.48s (22.08%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6860|ppo_ep: 1|act_loss: 0.002838134765625|cri_loss: 0.0017108917236328125|unsuper_loss: 0.0
+average reward score: 5.08984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.21%) |Training time=0.48s (22.15%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6861|ppo_ep: 1|act_loss: 0.009490966796875|cri_loss: 0.00521087646484375|unsuper_loss: 0.0
+average reward score: 5.43359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.42s |Gather latency=0.00s (0.00%) |Generate time=1.84s (75.80%) |Training time=0.49s (20.11%) |Others=0.10 (4.09%)|CurSamplesPerSec=13.21 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6862|ppo_ep: 1|act_loss: 0.0081939697265625|cri_loss: 0.00434112548828125|unsuper_loss: 0.0
+average reward score: 5.1328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.86%) |Training time=0.45s (19.70%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.10 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6863|ppo_ep: 1|act_loss: -0.00571441650390625|cri_loss: -0.00274658203125|unsuper_loss: 0.0
+average reward score: 5.87109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.85%) |Training time=0.42s (19.45%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6864|ppo_ep: 1|act_loss: 0.01397705078125|cri_loss: 0.0092926025390625|unsuper_loss: 0.0
+average reward score: 5.703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.66%) |Training time=0.42s (19.62%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6865|ppo_ep: 1|act_loss: -0.015228271484375|cri_loss: -0.007534027099609375|unsuper_loss: 0.0
+average reward score: 5.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.61%) |Training time=0.42s (19.59%) |Others=0.10 (4.80%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6866|ppo_ep: 1|act_loss: 0.004055023193359375|cri_loss: 0.0021114349365234375|unsuper_loss: 0.0
+average reward score: 5.1640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.97%) |Training time=0.44s (20.35%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6867|ppo_ep: 1|act_loss: -0.002750396728515625|cri_loss: -0.0011930465698242188|unsuper_loss: 0.0
+average reward score: 5.56640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.29%) |Training time=0.43s (20.04%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.54
+[2023-04-14 12:59:24,692] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 6868|ppo_ep: 1|act_loss: -0.062408447265625|cri_loss: -0.026763916015625|unsuper_loss: 0.0
+average reward score: 5.3515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.61s (76.19%) |Training time=0.40s (19.08%) |Others=0.10 (4.74%)|CurSamplesPerSec=15.13 |AvgSamplesPerSec=14.54
+[2023-04-14 12:59:26,832] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+[2023-04-14 12:59:26,833] [INFO] [logging.py:96:log_dist] [Rank 0] step=6870, skipped=87, lr=[7.59300023224231e-07, 7.59300023224231e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:59:26,833] [INFO] [timer.py:199:stop] epoch=0/micro_step=6870/global_step=6870, RunningAvgSamplesPerSec=105.6030931208463, CurrSamplesPerSec=119.03673509931426, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:59:26,925] [INFO] [logging.py:96:log_dist] [Rank 0] step=6870, skipped=116, lr=[4.085812445411133e-07, 4.085812445411133e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6869|ppo_ep: 1|act_loss: -0.018341064453125|cri_loss: -0.00882720947265625|unsuper_loss: 0.0
+average reward score: 5.5234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.29%) |Training time=0.43s (20.13%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6870|ppo_ep: 1|act_loss: -0.0390625|cri_loss: -0.019287109375|unsuper_loss: 0.0
+average reward score: 4.8828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.02%) |Training time=0.49s (22.40%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6871|ppo_ep: 1|act_loss: 0.02459716796875|cri_loss: 0.01280975341796875|unsuper_loss: 0.0
+average reward score: 5.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.23%) |Training time=0.49s (22.27%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6872|ppo_ep: 1|act_loss: 0.0074005126953125|cri_loss: 0.0037975311279296875|unsuper_loss: 0.0
+average reward score: 5.48046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.61%) |Training time=0.47s (21.67%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6873|ppo_ep: 1|act_loss: 0.006969451904296875|cri_loss: 0.00408935546875|unsuper_loss: 0.0
+average reward score: 5.73046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.35%) |Training time=0.41s (19.06%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6874|ppo_ep: 1|act_loss: -0.00328826904296875|cri_loss: -0.0014743804931640625|unsuper_loss: 0.0
+average reward score: 5.375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.41%) |Training time=0.41s (18.89%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6875|ppo_ep: 1|act_loss: -0.016204833984375|cri_loss: -0.00778961181640625|unsuper_loss: 0.0
+average reward score: 5.96484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.31%) |Training time=0.46s (21.18%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6876|ppo_ep: 1|act_loss: -0.011138916015625|cri_loss: -0.00545501708984375|unsuper_loss: 0.0
+average reward score: 5.18359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.38s |Gather latency=0.00s (0.00%) |Generate time=1.78s (74.83%) |Training time=0.50s (20.93%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.42 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6877|ppo_ep: 1|act_loss: 0.018096923828125|cri_loss: 0.0096282958984375|unsuper_loss: 0.0
+average reward score: 5.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.47%) |Training time=0.45s (20.86%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6878|ppo_ep: 1|act_loss: -0.0166473388671875|cri_loss: -0.00775909423828125|unsuper_loss: 0.0
+average reward score: 5.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.25%) |Training time=0.43s (20.08%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.54
+[2023-04-14 12:59:48,741] [INFO] [logging.py:96:log_dist] [Rank 0] step=6880, skipped=87, lr=[7.493270845701285e-07, 7.493270845701285e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 12:59:48,759] [INFO] [timer.py:199:stop] epoch=0/micro_step=6880/global_step=6880, RunningAvgSamplesPerSec=105.60848080411316, CurrSamplesPerSec=114.59905703068834, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 12:59:48,851] [INFO] [logging.py:96:log_dist] [Rank 0] step=6880, skipped=116, lr=[4.033235801364402e-07, 4.033235801364402e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6879|ppo_ep: 1|act_loss: -0.0301055908203125|cri_loss: -0.0129241943359375|unsuper_loss: 0.0
+average reward score: 5.19921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.01%) |Training time=0.44s (20.45%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6880|ppo_ep: 1|act_loss: -0.00040602684020996094|cri_loss: -5.7220458984375e-05|unsuper_loss: 0.0
+average reward score: 6.0
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.07%) |Training time=0.46s (21.28%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6881|ppo_ep: 1|act_loss: -0.0007367134094238281|cri_loss: -0.000293731689453125|unsuper_loss: 0.0
+average reward score: 5.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.25%) |Training time=0.46s (21.22%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6882|ppo_ep: 1|act_loss: 0.005901336669921875|cri_loss: 0.00348663330078125|unsuper_loss: 0.0
+average reward score: 5.91015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.16%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6883|ppo_ep: 1|act_loss: 0.0136260986328125|cri_loss: 0.007183074951171875|unsuper_loss: 0.0
+average reward score: 6.16796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.16%) |Training time=0.46s (21.30%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6884|ppo_ep: 1|act_loss: 0.0162353515625|cri_loss: 0.0089569091796875|unsuper_loss: 0.0
+average reward score: 4.76953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.22%) |Training time=0.46s (21.16%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6885|ppo_ep: 1|act_loss: -0.008392333984375|cri_loss: -0.0038623809814453125|unsuper_loss: 0.0
+average reward score: 5.65234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.43%) |Training time=0.45s (20.92%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6886|ppo_ep: 1|act_loss: -0.0019283294677734375|cri_loss: -0.0008502006530761719|unsuper_loss: 0.0
+average reward score: 6.03515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.36%) |Training time=0.46s (21.01%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6887|ppo_ep: 1|act_loss: 0.0006146430969238281|cri_loss: 0.00042629241943359375|unsuper_loss: 0.0
+average reward score: 5.59765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.78%) |Training time=0.45s (20.69%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6888|ppo_ep: 1|act_loss: 0.009735107421875|cri_loss: 0.00511932373046875|unsuper_loss: 0.0
+average reward score: 5.375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.19%) |Training time=0.45s (20.88%) |Others=0.11 (4.93%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.54
+[2023-04-14 13:00:10,366] [INFO] [logging.py:96:log_dist] [Rank 0] step=6890, skipped=87, lr=[7.394145572206594e-07, 7.394145572206594e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:00:10,384] [INFO] [timer.py:199:stop] epoch=0/micro_step=6890/global_step=6890, RunningAvgSamplesPerSec=105.61396103794894, CurrSamplesPerSec=112.90964551727154, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:00:10,477] [INFO] [logging.py:96:log_dist] [Rank 0] step=6890, skipped=116, lr=[3.9809699353426603e-07, 3.9809699353426603e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6889|ppo_ep: 1|act_loss: -0.007568359375|cri_loss: -0.0036182403564453125|unsuper_loss: 0.0
+average reward score: 5.890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.66%) |Training time=0.45s (20.69%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6890|ppo_ep: 1|act_loss: 0.0322265625|cri_loss: 0.0164947509765625|unsuper_loss: 0.0
+average reward score: 5.84765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.21%) |Training time=0.46s (21.14%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6891|ppo_ep: 1|act_loss: 0.00508880615234375|cri_loss: 0.002838134765625|unsuper_loss: 0.0
+average reward score: 5.00390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.63s (72.97%) |Training time=0.50s (22.53%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.31 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6892|ppo_ep: 1|act_loss: -0.0269317626953125|cri_loss: -0.0132293701171875|unsuper_loss: 0.0
+average reward score: 5.48046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.09%) |Training time=0.46s (19.59%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.70 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6893|ppo_ep: 1|act_loss: -0.014617919921875|cri_loss: -0.007213592529296875|unsuper_loss: 0.0
+average reward score: 5.48046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.13%) |Training time=0.49s (22.27%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6894|ppo_ep: 1|act_loss: -0.0035247802734375|cri_loss: -0.0015897750854492188|unsuper_loss: 0.0
+average reward score: 5.25390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.09%) |Training time=0.49s (22.29%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.54
+[2023-04-14 13:00:23,665] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
+epoch: 0|step: 6895|ppo_ep: 1|act_loss: -0.02349853515625|cri_loss: -0.01157379150390625|unsuper_loss: 0.0
+average reward score: 5.625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.20%) |Training time=0.46s (21.22%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6896|ppo_ep: 1|act_loss: 0.01934814453125|cri_loss: 0.01012420654296875|unsuper_loss: 0.0
+average reward score: 5.9375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.01%) |Training time=0.49s (22.37%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6897|ppo_ep: 1|act_loss: -0.0141448974609375|cri_loss: -0.006839752197265625|unsuper_loss: 0.0
+average reward score: 4.65625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.80%) |Training time=0.47s (21.58%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6898|ppo_ep: 1|act_loss: 0.0094146728515625|cri_loss: 0.00492095947265625|unsuper_loss: 0.0
+average reward score: 5.42578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.15%) |Training time=0.48s (22.25%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.54
+[2023-04-14 13:00:32,334] [INFO] [logging.py:96:log_dist] [Rank 0] step=6900, skipped=88, lr=[7.305450557257688e-07, 7.305450557257688e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:00:32,352] [INFO] [timer.py:199:stop] epoch=0/micro_step=6900/global_step=6900, RunningAvgSamplesPerSec=105.61108100532957, CurrSamplesPerSec=104.086465661044, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:00:32,445] [INFO] [logging.py:96:log_dist] [Rank 0] step=6900, skipped=116, lr=[3.9290156220521235e-07, 3.9290156220521235e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6899|ppo_ep: 1|act_loss: -0.0037364959716796875|cri_loss: -0.0016689300537109375|unsuper_loss: 0.0
+average reward score: 5.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.82%) |Training time=0.47s (21.66%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6900|ppo_ep: 1|act_loss: 0.0152740478515625|cri_loss: 0.0078887939453125|unsuper_loss: 0.0
+average reward score: 4.59765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.19%) |Training time=0.48s (22.20%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6901|ppo_ep: 1|act_loss: -0.01605224609375|cri_loss: -0.007904052734375|unsuper_loss: 0.0
+average reward score: 5.3515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (21.95%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6902|ppo_ep: 1|act_loss: -0.00234222412109375|cri_loss: -0.0009698867797851562|unsuper_loss: 0.0
+average reward score: 5.01953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.46%) |Training time=0.48s (21.91%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.54
+[2023-04-14 13:00:41,119] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 6903|ppo_ep: 1|act_loss: 0.004150390625|cri_loss: 0.003192901611328125|unsuper_loss: 0.0
+average reward score: 5.17578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.48s (22.33%) |Others=0.09 (4.05%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.54
+[2023-04-14 13:00:43,275] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 6904|ppo_ep: 1|act_loss: -0.030059814453125|cri_loss: -0.014678955078125|unsuper_loss: 0.0
+average reward score: 5.9375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.47s (21.87%) |Others=0.09 (4.18%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6905|ppo_ep: 1|act_loss: -0.02294921875|cri_loss: -0.01105499267578125|unsuper_loss: 0.0
+average reward score: 5.89453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (21.99%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6906|ppo_ep: 1|act_loss: -0.00711822509765625|cri_loss: -0.0034732818603515625|unsuper_loss: 0.0
+average reward score: 5.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.47s (21.80%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6907|ppo_ep: 1|act_loss: -0.0452880859375|cri_loss: -0.021453857421875|unsuper_loss: 0.0
+average reward score: 5.73046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.15%) |Training time=0.49s (20.61%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.52 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6908|ppo_ep: 1|act_loss: -0.00554656982421875|cri_loss: -0.002651214599609375|unsuper_loss: 0.0
+average reward score: 5.109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.04%) |Training time=0.44s (20.26%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.54
+[2023-04-14 13:00:54,154] [INFO] [logging.py:96:log_dist] [Rank 0] step=6910, skipped=88, lr=[7.207477138987122e-07, 7.207477138987122e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:00:54,172] [INFO] [timer.py:199:stop] epoch=0/micro_step=6910/global_step=6910, RunningAvgSamplesPerSec=105.6090924949022, CurrSamplesPerSec=118.3087503415692, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:00:54,265] [INFO] [logging.py:96:log_dist] [Rank 0] step=6910, skipped=118, lr=[3.88767700704093e-07, 3.88767700704093e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6909|ppo_ep: 1|act_loss: -0.0126800537109375|cri_loss: -0.00616455078125|unsuper_loss: 0.0
+average reward score: 5.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.05%) |Training time=0.43s (20.28%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6910|ppo_ep: 1|act_loss: 0.0088958740234375|cri_loss: 0.004871368408203125|unsuper_loss: 0.0
+average reward score: 5.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.99%) |Training time=0.46s (21.32%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6911|ppo_ep: 1|act_loss: 0.00817108154296875|cri_loss: 0.0043182373046875|unsuper_loss: 0.0
+average reward score: 5.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.04%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6912|ppo_ep: 1|act_loss: -0.016021728515625|cri_loss: -0.007843017578125|unsuper_loss: 0.0
+average reward score: 4.8046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.96%) |Training time=0.46s (21.37%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6913|ppo_ep: 1|act_loss: 0.0341796875|cri_loss: 0.0176849365234375|unsuper_loss: 0.0
+average reward score: 4.98828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.40%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6914|ppo_ep: 1|act_loss: -0.0117950439453125|cri_loss: -0.00492095947265625|unsuper_loss: 0.0
+average reward score: 5.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.45s (21.18%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6915|ppo_ep: 1|act_loss: 0.0032196044921875|cri_loss: 0.001708984375|unsuper_loss: 0.0
+average reward score: 5.30078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.17%) |Training time=0.45s (21.24%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6916|ppo_ep: 1|act_loss: -0.0145416259765625|cri_loss: -0.007175445556640625|unsuper_loss: 0.0
+average reward score: 5.06640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.17%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6917|ppo_ep: 1|act_loss: -0.0222930908203125|cri_loss: -0.01084136962890625|unsuper_loss: 0.0
+average reward score: 5.21484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.33%) |Training time=0.45s (21.07%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6918|ppo_ep: 1|act_loss: -0.0175933837890625|cri_loss: -0.00868988037109375|unsuper_loss: 0.0
+average reward score: 5.23046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.97%) |Training time=0.46s (21.35%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.54
+[2023-04-14 13:01:15,589] [INFO] [logging.py:96:log_dist] [Rank 0] step=6920, skipped=88, lr=[7.110112069915053e-07, 7.110112069915053e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:01:15,607] [INFO] [timer.py:199:stop] epoch=0/micro_step=6920/global_step=6920, RunningAvgSamplesPerSec=105.61367204126265, CurrSamplesPerSec=104.81772013304312, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:01:15,700] [INFO] [logging.py:96:log_dist] [Rank 0] step=6920, skipped=118, lr=[3.836285426178418e-07, 3.836285426178418e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6919|ppo_ep: 1|act_loss: -0.02325439453125|cri_loss: -0.0114898681640625|unsuper_loss: 0.0
+average reward score: 4.4921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.73%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6920|ppo_ep: 1|act_loss: 0.0015735626220703125|cri_loss: 0.0011959075927734375|unsuper_loss: 0.0
+average reward score: 5.515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.95%) |Training time=0.46s (21.13%) |Others=0.11 (4.92%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6921|ppo_ep: 1|act_loss: -0.00385284423828125|cri_loss: -0.0017328262329101562|unsuper_loss: 0.0
+average reward score: 5.83984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.58%) |Training time=0.47s (21.00%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.15 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6922|ppo_ep: 1|act_loss: -0.00983428955078125|cri_loss: -0.0047607421875|unsuper_loss: 0.0
+average reward score: 6.046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.89%) |Training time=0.49s (20.85%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.64 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6923|ppo_ep: 1|act_loss: -0.0155029296875|cri_loss: -0.007350921630859375|unsuper_loss: 0.0
+average reward score: 4.94921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.13%) |Training time=0.45s (21.18%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6924|ppo_ep: 1|act_loss: 0.0159149169921875|cri_loss: 0.00820159912109375|unsuper_loss: 0.0
+average reward score: 5.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.95%) |Training time=0.46s (21.39%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6925|ppo_ep: 1|act_loss: 0.02459716796875|cri_loss: 0.01308441162109375|unsuper_loss: 0.0
+average reward score: 5.109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.05%) |Training time=0.46s (21.28%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6926|ppo_ep: 1|act_loss: -0.02740478515625|cri_loss: -0.013519287109375|unsuper_loss: 0.0
+average reward score: 5.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.27%) |Training time=0.45s (21.06%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6927|ppo_ep: 1|act_loss: 0.019256591796875|cri_loss: 0.00994110107421875|unsuper_loss: 0.0
+average reward score: 5.46484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.24%) |Training time=0.45s (21.18%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6928|ppo_ep: 1|act_loss: -0.00569915771484375|cri_loss: -0.002689361572265625|unsuper_loss: 0.0
+average reward score: 4.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.45s (21.11%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.54
+[2023-04-14 13:01:37,364] [INFO] [logging.py:96:log_dist] [Rank 0] step=6930, skipped=88, lr=[7.013356793226694e-07, 7.013356793226694e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:01:37,383] [INFO] [timer.py:199:stop] epoch=0/micro_step=6930/global_step=6930, RunningAvgSamplesPerSec=105.61659593065568, CurrSamplesPerSec=112.18822396565588, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:01:37,475] [INFO] [logging.py:96:log_dist] [Rank 0] step=6930, skipped=118, lr=[3.7852075426205644e-07, 3.7852075426205644e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6929|ppo_ep: 1|act_loss: -0.0022640228271484375|cri_loss: -0.0007987022399902344|unsuper_loss: 0.0
+average reward score: 4.76953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.39%) |Training time=0.45s (20.94%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6930|ppo_ep: 1|act_loss: 0.00907135009765625|cri_loss: 0.00473785400390625|unsuper_loss: 0.0
+average reward score: 5.1640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.38%) |Training time=0.45s (20.94%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6931|ppo_ep: 1|act_loss: 0.0194549560546875|cri_loss: 0.01029205322265625|unsuper_loss: 0.0
+average reward score: 5.54296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.08%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6932|ppo_ep: 1|act_loss: -0.004150390625|cri_loss: -0.00170135498046875|unsuper_loss: 0.0
+average reward score: 5.06640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.41%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6933|ppo_ep: 1|act_loss: -0.0095367431640625|cri_loss: -0.00458526611328125|unsuper_loss: 0.0
+average reward score: 4.95703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.18%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6934|ppo_ep: 1|act_loss: 0.0804443359375|cri_loss: 0.04730224609375|unsuper_loss: 0.0
+average reward score: 5.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.05%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6935|ppo_ep: 1|act_loss: -0.00833892822265625|cri_loss: -0.0038661956787109375|unsuper_loss: 0.0
+average reward score: 5.125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.53%) |Training time=0.47s (21.82%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6936|ppo_ep: 1|act_loss: 0.00691986083984375|cri_loss: 0.0047149658203125|unsuper_loss: 0.0
+average reward score: 5.1328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.58%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6937|ppo_ep: 1|act_loss: -0.0009016990661621094|cri_loss: -0.00012063980102539062|unsuper_loss: 0.0
+average reward score: 5.41796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.59s (69.74%) |Training time=0.59s (25.82%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.05 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6938|ppo_ep: 1|act_loss: -0.00502777099609375|cri_loss: -0.0024127960205078125|unsuper_loss: 0.0
+average reward score: 5.05859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (21.95%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.54
+[2023-04-14 13:01:59,006] [INFO] [logging.py:96:log_dist] [Rank 0] step=6940, skipped=88, lr=[6.917212743068631e-07, 6.917212743068631e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:01:59,025] [INFO] [timer.py:199:stop] epoch=0/micro_step=6940/global_step=6940, RunningAvgSamplesPerSec=105.61316551068417, CurrSamplesPerSec=110.00605527115124, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:01:59,117] [INFO] [logging.py:96:log_dist] [Rank 0] step=6940, skipped=118, lr=[3.73444411346483e-07, 3.73444411346483e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6939|ppo_ep: 1|act_loss: -0.0012607574462890625|cri_loss: -0.00046753883361816406|unsuper_loss: 0.0
+average reward score: 5.26171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.35%) |Training time=0.45s (21.02%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6940|ppo_ep: 1|act_loss: 0.0012426376342773438|cri_loss: 0.0006957054138183594|unsuper_loss: 0.0
+average reward score: 5.62109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.28%) |Training time=0.45s (21.07%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6941|ppo_ep: 1|act_loss: -0.0323486328125|cri_loss: -0.0152435302734375|unsuper_loss: 0.0
+average reward score: 5.43359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.65%) |Training time=0.44s (20.68%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6942|ppo_ep: 1|act_loss: 0.03973388671875|cri_loss: 0.020416259765625|unsuper_loss: 0.0
+average reward score: 5.51953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.09%) |Training time=0.46s (21.27%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6943|ppo_ep: 1|act_loss: 0.0152435302734375|cri_loss: 0.00778961181640625|unsuper_loss: 0.0
+average reward score: 4.96484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.76%) |Training time=0.46s (21.58%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6944|ppo_ep: 1|act_loss: -0.019775390625|cri_loss: -0.00980377197265625|unsuper_loss: 0.0
+average reward score: 6.36328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.50%) |Training time=0.48s (21.88%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6945|ppo_ep: 1|act_loss: -0.0194091796875|cri_loss: -0.00959014892578125|unsuper_loss: 0.0
+average reward score: 4.8984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.74%) |Training time=0.46s (21.60%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6946|ppo_ep: 1|act_loss: -0.0009250640869140625|cri_loss: -0.00030112266540527344|unsuper_loss: 0.0
+average reward score: 6.0
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.47s (21.98%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6947|ppo_ep: 1|act_loss: -0.016876220703125|cri_loss: -0.00833892822265625|unsuper_loss: 0.0
+average reward score: 5.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.60%) |Training time=0.47s (21.73%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6948|ppo_ep: 1|act_loss: -0.0037174224853515625|cri_loss: -0.0017242431640625|unsuper_loss: 0.0
+average reward score: 5.1640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.56%) |Training time=0.47s (21.78%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.54
+[2023-04-14 13:02:20,540] [INFO] [logging.py:96:log_dist] [Rank 0] step=6950, skipped=88, lr=[6.821681344527626e-07, 6.821681344527626e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:02:20,558] [INFO] [timer.py:199:stop] epoch=0/micro_step=6950/global_step=6950, RunningAvgSamplesPerSec=105.61406004000848, CurrSamplesPerSec=104.52931262752917, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:02:20,651] [INFO] [logging.py:96:log_dist] [Rank 0] step=6950, skipped=118, lr=[3.683995891147696e-07, 3.683995891147696e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6949|ppo_ep: 1|act_loss: -0.019744873046875|cri_loss: -0.00972747802734375|unsuper_loss: 0.0
+average reward score: 4.98828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.80%) |Training time=0.47s (21.69%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6950|ppo_ep: 1|act_loss: 0.0229644775390625|cri_loss: 0.01183319091796875|unsuper_loss: 0.0
+average reward score: 5.9609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.02%) |Training time=0.47s (21.40%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6951|ppo_ep: 1|act_loss: -0.0074310302734375|cri_loss: -0.0035552978515625|unsuper_loss: 0.0
+average reward score: 5.6640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.68s (74.76%) |Training time=0.47s (20.80%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.20 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6952|ppo_ep: 1|act_loss: -0.001399993896484375|cri_loss: 0.001354217529296875|unsuper_loss: 0.0
+average reward score: 5.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.54%) |Training time=0.47s (21.82%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6953|ppo_ep: 1|act_loss: -0.0012025833129882812|cri_loss: -0.0003528594970703125|unsuper_loss: 0.0
+average reward score: 4.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.89%) |Training time=0.46s (19.80%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6954|ppo_ep: 1|act_loss: 0.03936767578125|cri_loss: 0.0204010009765625|unsuper_loss: 0.0
+average reward score: 5.41796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.72%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6955|ppo_ep: 1|act_loss: 0.0828857421875|cri_loss: 0.0494384765625|unsuper_loss: 0.0
+average reward score: 5.48828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.11%) |Training time=0.46s (21.34%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6956|ppo_ep: 1|act_loss: 0.0193939208984375|cri_loss: 0.010009765625|unsuper_loss: 0.0
+average reward score: 5.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.48s (22.02%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6957|ppo_ep: 1|act_loss: -0.0172119140625|cri_loss: -0.00844573974609375|unsuper_loss: 0.0
+average reward score: 5.296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.80%) |Training time=0.46s (21.49%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6958|ppo_ep: 1|act_loss: 0.001201629638671875|cri_loss: 0.0013179779052734375|unsuper_loss: 0.0
+average reward score: 4.984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.46%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.54
+[2023-04-14 13:02:42,376] [INFO] [logging.py:96:log_dist] [Rank 0] step=6960, skipped=88, lr=[6.726764013609494e-07, 6.726764013609494e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:02:42,395] [INFO] [timer.py:199:stop] epoch=0/micro_step=6960/global_step=6960, RunningAvgSamplesPerSec=105.61365815136743, CurrSamplesPerSec=104.42098973666357, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:02:42,487] [INFO] [logging.py:96:log_dist] [Rank 0] step=6960, skipped=118, lr=[3.63386362343352e-07, 3.63386362343352e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6959|ppo_ep: 1|act_loss: -0.0089263916015625|cri_loss: -0.00392913818359375|unsuper_loss: 0.0
+average reward score: 5.41796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.76%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6960|ppo_ep: 1|act_loss: 0.005954742431640625|cri_loss: 0.003894805908203125|unsuper_loss: 0.0
+average reward score: 5.484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.64%) |Training time=0.47s (21.71%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6961|ppo_ep: 1|act_loss: -0.0018558502197265625|cri_loss: -0.0008111000061035156|unsuper_loss: 0.0
+average reward score: 4.68359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.94%) |Training time=0.46s (21.50%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6962|ppo_ep: 1|act_loss: -0.0201416015625|cri_loss: -0.007419586181640625|unsuper_loss: 0.0
+average reward score: 5.4453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.58%) |Training time=0.47s (21.76%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6963|ppo_ep: 1|act_loss: -0.01812744140625|cri_loss: -0.00882720947265625|unsuper_loss: 0.0
+average reward score: 5.15625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.59%) |Training time=0.47s (21.86%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6964|ppo_ep: 1|act_loss: -0.00725555419921875|cri_loss: -0.0033168792724609375|unsuper_loss: 0.0
+average reward score: 5.37109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.54%) |Training time=0.47s (21.80%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6965|ppo_ep: 1|act_loss: 0.0039215087890625|cri_loss: 0.002716064453125|unsuper_loss: 0.0
+average reward score: 5.1328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.46s (21.54%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6966|ppo_ep: 1|act_loss: 0.0004565715789794922|cri_loss: 0.00033473968505859375|unsuper_loss: 0.0
+average reward score: 5.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.47s (21.98%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6967|ppo_ep: 1|act_loss: -0.090576171875|cri_loss: -0.0345458984375|unsuper_loss: 0.0
+average reward score: 5.4921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.77%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6968|ppo_ep: 1|act_loss: 0.017822265625|cri_loss: 0.00933074951171875|unsuper_loss: 0.0
+average reward score: 5.109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.68%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.54
+[2023-04-14 13:03:04,094] [INFO] [logging.py:96:log_dist] [Rank 0] step=6970, skipped=88, lr=[6.632462157218022e-07, 6.632462157218022e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:03:04,112] [INFO] [timer.py:199:stop] epoch=0/micro_step=6970/global_step=6970, RunningAvgSamplesPerSec=105.61183941805801, CurrSamplesPerSec=100.77752732722541, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:03:04,205] [INFO] [logging.py:96:log_dist] [Rank 0] step=6970, skipped=118, lr=[3.5840480534034355e-07, 3.5840480534034355e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6969|ppo_ep: 1|act_loss: 0.007671356201171875|cri_loss: 0.004016876220703125|unsuper_loss: 0.0
+average reward score: 5.8984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.23%) |Training time=0.48s (20.51%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.66 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6970|ppo_ep: 1|act_loss: 0.0205230712890625|cri_loss: 0.01056671142578125|unsuper_loss: 0.0
+average reward score: 5.0234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.63%) |Training time=0.47s (21.71%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6971|ppo_ep: 1|act_loss: -0.00946807861328125|cri_loss: -0.004543304443359375|unsuper_loss: 0.0
+average reward score: 5.45703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.67%) |Training time=0.47s (21.78%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6972|ppo_ep: 1|act_loss: -0.007415771484375|cri_loss: -0.003566741943359375|unsuper_loss: 0.0
+average reward score: 5.80859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.57%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6973|ppo_ep: 1|act_loss: -0.034423828125|cri_loss: -0.0169219970703125|unsuper_loss: 0.0
+average reward score: 5.34765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.73%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6974|ppo_ep: 1|act_loss: 0.00567626953125|cri_loss: 0.002979278564453125|unsuper_loss: 0.0
+average reward score: 5.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.64%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6975|ppo_ep: 1|act_loss: -0.020263671875|cri_loss: -0.009857177734375|unsuper_loss: 0.0
+average reward score: 5.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.71%) |Training time=0.47s (21.73%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6976|ppo_ep: 1|act_loss: 0.00650787353515625|cri_loss: 0.0033054351806640625|unsuper_loss: 0.0
+average reward score: 5.28515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.49%) |Training time=0.47s (21.86%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6977|ppo_ep: 1|act_loss: -0.0411376953125|cri_loss: -0.019805908203125|unsuper_loss: 0.0
+average reward score: 4.625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.71%) |Training time=0.46s (21.63%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6978|ppo_ep: 1|act_loss: 0.01885986328125|cri_loss: 0.0095672607421875|unsuper_loss: 0.0
+average reward score: 5.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.40%) |Training time=0.47s (21.97%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.54
+[2023-04-14 13:03:25,646] [INFO] [logging.py:96:log_dist] [Rank 0] step=6980, skipped=88, lr=[6.53877717313422e-07, 6.53877717313422e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:03:25,664] [INFO] [timer.py:199:stop] epoch=0/micro_step=6980/global_step=6980, RunningAvgSamplesPerSec=105.6101181820172, CurrSamplesPerSec=102.82117836505277, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:03:25,757] [INFO] [logging.py:96:log_dist] [Rank 0] step=6980, skipped=118, lr=[3.5345499194443663e-07, 3.5345499194443663e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6979|ppo_ep: 1|act_loss: -0.0111846923828125|cri_loss: -0.00531768798828125|unsuper_loss: 0.0
+average reward score: 4.7109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.86%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6980|ppo_ep: 1|act_loss: -0.02044677734375|cri_loss: -0.00836181640625|unsuper_loss: 0.0
+average reward score: 5.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.64s (71.45%) |Training time=0.55s (24.16%) |Others=0.10 (4.38%)|CurSamplesPerSec=13.94 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6981|ppo_ep: 1|act_loss: -0.0110626220703125|cri_loss: -0.005458831787109375|unsuper_loss: 0.0
+average reward score: 5.90625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.54%) |Training time=0.47s (21.83%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6982|ppo_ep: 1|act_loss: 0.0313720703125|cri_loss: 0.01617431640625|unsuper_loss: 0.0
+average reward score: 4.60546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.54%) |Training time=0.47s (21.81%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6983|ppo_ep: 1|act_loss: 0.0027866363525390625|cri_loss: 0.0015430450439453125|unsuper_loss: 0.0
+average reward score: 5.45703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.91%) |Training time=0.46s (21.43%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6984|ppo_ep: 1|act_loss: -0.00066375732421875|cri_loss: -0.0002378225326538086|unsuper_loss: 0.0
+average reward score: 5.03125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.91%) |Training time=0.46s (21.43%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6985|ppo_ep: 1|act_loss: 0.031341552734375|cri_loss: 0.015899658203125|unsuper_loss: 0.0
+average reward score: 6.01171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.57%) |Training time=0.47s (20.19%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6986|ppo_ep: 1|act_loss: 0.0004143714904785156|cri_loss: 0.00025391578674316406|unsuper_loss: 0.0
+average reward score: 5.1171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.46s (21.61%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6987|ppo_ep: 1|act_loss: 0.005809783935546875|cri_loss: 0.0031070709228515625|unsuper_loss: 0.0
+average reward score: 4.97265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.61%) |Training time=0.44s (20.71%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6988|ppo_ep: 1|act_loss: 0.029205322265625|cri_loss: 0.0149383544921875|unsuper_loss: 0.0
+average reward score: 5.1328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.17%) |Training time=0.46s (21.19%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.54
+[2023-04-14 13:03:47,461] [INFO] [logging.py:96:log_dist] [Rank 0] step=6990, skipped=88, lr=[6.445710449995562e-07, 6.445710449995562e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:03:47,479] [INFO] [timer.py:199:stop] epoch=0/micro_step=6990/global_step=6990, RunningAvgSamplesPerSec=105.60882625178036, CurrSamplesPerSec=114.9070274944245, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:03:47,572] [INFO] [logging.py:96:log_dist] [Rank 0] step=6990, skipped=118, lr=[3.48536995523808e-07, 3.48536995523808e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6989|ppo_ep: 1|act_loss: -0.0030765533447265625|cri_loss: -0.0014123916625976562|unsuper_loss: 0.0
+average reward score: 5.17578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.87%) |Training time=0.44s (20.58%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6990|ppo_ep: 1|act_loss: -0.019012451171875|cri_loss: -0.0092620849609375|unsuper_loss: 0.0
+average reward score: 5.05078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.51%) |Training time=0.45s (20.84%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6991|ppo_ep: 1|act_loss: 0.0038299560546875|cri_loss: 0.00209808349609375|unsuper_loss: 0.0
+average reward score: 5.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.55%) |Training time=0.45s (20.80%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6992|ppo_ep: 1|act_loss: -0.0175018310546875|cri_loss: -0.0086212158203125|unsuper_loss: 0.0
+average reward score: 5.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (20.95%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6993|ppo_ep: 1|act_loss: -0.0030231475830078125|cri_loss: -0.00106048583984375|unsuper_loss: 0.0
+average reward score: 5.83203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.39%) |Training time=0.45s (21.06%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6994|ppo_ep: 1|act_loss: -0.0001442432403564453|cri_loss: 3.1948089599609375e-05|unsuper_loss: 0.0
+average reward score: 5.75
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.29%) |Training time=0.45s (21.05%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6995|ppo_ep: 1|act_loss: 0.0048828125|cri_loss: 0.002719879150390625|unsuper_loss: 0.0
+average reward score: 4.8359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.28%) |Training time=0.46s (21.16%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6996|ppo_ep: 1|act_loss: 0.0269775390625|cri_loss: 0.01395416259765625|unsuper_loss: 0.0
+average reward score: 4.69921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.66%) |Training time=0.46s (21.60%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6997|ppo_ep: 1|act_loss: 0.00444793701171875|cri_loss: 0.0022983551025390625|unsuper_loss: 0.0
+average reward score: 5.73046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.78%) |Training time=0.46s (21.55%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.54
+epoch: 0|step: 6998|ppo_ep: 1|act_loss: -0.0012836456298828125|cri_loss: -0.00047588348388671875|unsuper_loss: 0.0
+average reward score: 5.37109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.41%) |Training time=0.47s (21.91%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.55
+[2023-04-14 13:04:08,962] [INFO] [logging.py:96:log_dist] [Rank 0] step=7000, skipped=88, lr=[6.353263367275399e-07, 6.353263367275399e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:04:08,980] [INFO] [timer.py:199:stop] epoch=0/micro_step=7000/global_step=7000, RunningAvgSamplesPerSec=105.61414569249841, CurrSamplesPerSec=114.3478458671951, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:04:09,073] [INFO] [logging.py:96:log_dist] [Rank 0] step=7000, skipped=118, lr=[3.43650888975027e-07, 3.43650888975027e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 6999|ppo_ep: 1|act_loss: -0.026153564453125|cri_loss: -0.012939453125|unsuper_loss: 0.0
+average reward score: 5.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.84%) |Training time=0.44s (20.60%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7000|ppo_ep: 1|act_loss: 0.012725830078125|cri_loss: 0.00653839111328125|unsuper_loss: 0.0
+average reward score: 5.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.61s (71.96%) |Training time=0.46s (20.72%) |Others=0.16 (7.32%)|CurSamplesPerSec=14.27 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7001|ppo_ep: 1|act_loss: -0.005931854248046875|cri_loss: -0.0025920867919921875|unsuper_loss: 0.0
+average reward score: 4.66796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.04%) |Training time=0.44s (20.36%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7002|ppo_ep: 1|act_loss: -0.0015783309936523438|cri_loss: -0.0006132125854492188|unsuper_loss: 0.0
+average reward score: 4.69140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.45s (21.07%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7003|ppo_ep: 1|act_loss: 0.017547607421875|cri_loss: 0.009063720703125|unsuper_loss: 0.0
+average reward score: 5.6171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.77%) |Training time=0.44s (20.65%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7004|ppo_ep: 1|act_loss: 0.03875732421875|cri_loss: 0.0197906494140625|unsuper_loss: 0.0
+average reward score: 5.015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.32%) |Training time=0.45s (21.02%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.55
+[2023-04-14 13:04:22,083] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 7005|ppo_ep: 1|act_loss: 0.0027103424072265625|cri_loss: 0.0014028549194335938|unsuper_loss: 0.0
+average reward score: 5.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.47%) |Training time=0.46s (21.29%) |Others=0.09 (4.23%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.55
+[2023-04-14 13:04:24,221] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 7006|ppo_ep: 1|act_loss: -0.00681304931640625|cri_loss: -0.0032558441162109375|unsuper_loss: 0.0
+average reward score: 5.15234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.77%) |Training time=0.45s (21.04%) |Others=0.09 (4.20%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7007|ppo_ep: 1|act_loss: -0.01065826416015625|cri_loss: -0.005084991455078125|unsuper_loss: 0.0
+average reward score: 5.125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.07%) |Training time=0.46s (21.32%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7008|ppo_ep: 1|act_loss: 0.01336669921875|cri_loss: 0.007213592529296875|unsuper_loss: 0.0
+average reward score: 5.6875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.34%) |Training time=0.45s (21.00%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.55
+[2023-04-14 13:04:30,588] [INFO] [logging.py:96:log_dist] [Rank 0] step=7010, skipped=88, lr=[6.261437295262503e-07, 6.261437295262503e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:04:30,607] [INFO] [timer.py:199:stop] epoch=0/micro_step=7010/global_step=7010, RunningAvgSamplesPerSec=105.62016992871143, CurrSamplesPerSec=106.12538378393765, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:04:30,699] [INFO] [logging.py:96:log_dist] [Rank 0] step=7010, skipped=120, lr=[3.397650131284941e-07, 3.397650131284941e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7009|ppo_ep: 1|act_loss: 0.00827789306640625|cri_loss: 0.00453948974609375|unsuper_loss: 0.0
+average reward score: 5.8828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.24%) |Training time=0.46s (21.28%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7010|ppo_ep: 1|act_loss: 0.003917694091796875|cri_loss: 0.0020294189453125|unsuper_loss: 0.0
+average reward score: 5.19140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.48%) |Training time=0.45s (20.07%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.25 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7011|ppo_ep: 1|act_loss: 0.00323486328125|cri_loss: 0.0018863677978515625|unsuper_loss: 0.0
+average reward score: 5.08984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.45s (20.79%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7012|ppo_ep: 1|act_loss: 0.0350341796875|cri_loss: 0.0192108154296875|unsuper_loss: 0.0
+average reward score: 5.41015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.45s (20.77%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7013|ppo_ep: 1|act_loss: 0.01261138916015625|cri_loss: 0.00699615478515625|unsuper_loss: 0.0
+average reward score: 5.50390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.45s (20.88%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7014|ppo_ep: 1|act_loss: -0.021697998046875|cri_loss: -0.01053619384765625|unsuper_loss: 0.0
+average reward score: 5.375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.68%) |Training time=0.45s (20.68%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7015|ppo_ep: 1|act_loss: -0.0042572021484375|cri_loss: -0.0020732879638671875|unsuper_loss: 0.0
+average reward score: 6.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.75%) |Training time=0.44s (20.67%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7016|ppo_ep: 1|act_loss: -0.01311492919921875|cri_loss: -0.00646209716796875|unsuper_loss: 0.0
+average reward score: 4.98046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.76s (76.24%) |Training time=0.45s (19.44%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7017|ppo_ep: 1|act_loss: -0.0018291473388671875|cri_loss: -0.0008478164672851562|unsuper_loss: 0.0
+average reward score: 5.03125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.45s (21.14%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7018|ppo_ep: 1|act_loss: 0.03436279296875|cri_loss: 0.017791748046875|unsuper_loss: 0.0
+average reward score: 5.73828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.64%) |Training time=0.44s (20.68%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.55
+[2023-04-14 13:04:52,328] [INFO] [logging.py:96:log_dist] [Rank 0] step=7020, skipped=88, lr=[6.170233595040777e-07, 6.170233595040777e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:04:52,347] [INFO] [timer.py:199:stop] epoch=0/micro_step=7020/global_step=7020, RunningAvgSamplesPerSec=105.6290418746056, CurrSamplesPerSec=111.25124374398021, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:04:52,439] [INFO] [logging.py:96:log_dist] [Rank 0] step=7020, skipped=120, lr=[3.3493649053890325e-07, 3.3493649053890325e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7019|ppo_ep: 1|act_loss: 0.0511474609375|cri_loss: 0.0259552001953125|unsuper_loss: 0.0
+average reward score: 5.51953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.47%) |Training time=0.45s (20.98%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7020|ppo_ep: 1|act_loss: -0.01282501220703125|cri_loss: -0.0062713623046875|unsuper_loss: 0.0
+average reward score: 5.234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.37%) |Training time=0.45s (20.96%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7021|ppo_ep: 1|act_loss: 0.04193115234375|cri_loss: 0.0215911865234375|unsuper_loss: 0.0
+average reward score: 5.45703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.42%) |Training time=0.45s (21.02%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7022|ppo_ep: 1|act_loss: -0.0235748291015625|cri_loss: -0.01166534423828125|unsuper_loss: 0.0
+average reward score: 5.73046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.16%) |Training time=0.45s (21.19%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7023|ppo_ep: 1|act_loss: -0.025726318359375|cri_loss: -0.01245880126953125|unsuper_loss: 0.0
+average reward score: 5.98046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.44%) |Training time=0.45s (20.90%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7024|ppo_ep: 1|act_loss: 0.0184783935546875|cri_loss: 0.009521484375|unsuper_loss: 0.0
+average reward score: 5.05859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.70%) |Training time=0.45s (20.73%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7025|ppo_ep: 1|act_loss: 0.004638671875|cri_loss: 0.0024890899658203125|unsuper_loss: 0.0
+average reward score: 5.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.20%) |Training time=0.43s (20.21%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7026|ppo_ep: 1|act_loss: -0.016021728515625|cri_loss: -0.007556915283203125|unsuper_loss: 0.0
+average reward score: 5.28125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.92%) |Training time=0.44s (20.41%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7027|ppo_ep: 1|act_loss: 0.003475189208984375|cri_loss: 0.0026454925537109375|unsuper_loss: 0.0
+average reward score: 6.0234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.90%) |Training time=0.44s (20.41%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7028|ppo_ep: 1|act_loss: -0.01137542724609375|cri_loss: -0.005615234375|unsuper_loss: 0.0
+average reward score: 5.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.90%) |Training time=0.44s (20.44%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55
+[2023-04-14 13:05:13,796] [INFO] [logging.py:96:log_dist] [Rank 0] step=7030, skipped=88, lr=[6.079653618469082e-07, 6.079653618469082e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:05:13,814] [INFO] [timer.py:199:stop] epoch=0/micro_step=7030/global_step=7030, RunningAvgSamplesPerSec=105.64047894564446, CurrSamplesPerSec=121.2592382456413, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:05:13,907] [INFO] [logging.py:96:log_dist] [Rank 0] step=7030, skipped=120, lr=[3.3014005941345406e-07, 3.3014005941345406e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7029|ppo_ep: 1|act_loss: -0.0184326171875|cri_loss: -0.00904083251953125|unsuper_loss: 0.0
+average reward score: 5.6875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.61%) |Training time=0.43s (19.83%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7030|ppo_ep: 1|act_loss: 0.02105712890625|cri_loss: 0.0115814208984375|unsuper_loss: 0.0
+average reward score: 5.28125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.10%) |Training time=0.43s (20.22%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7031|ppo_ep: 1|act_loss: 0.05078125|cri_loss: 0.026123046875|unsuper_loss: 0.0
+average reward score: 5.375
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.72%) |Training time=0.44s (19.03%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7032|ppo_ep: 1|act_loss: -0.00595855712890625|cri_loss: -0.00264739990234375|unsuper_loss: 0.0
+average reward score: 5.5234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.95%) |Training time=0.44s (20.40%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7033|ppo_ep: 1|act_loss: -0.0072021484375|cri_loss: -0.003490447998046875|unsuper_loss: 0.0
+average reward score: 5.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.13%) |Training time=0.44s (20.33%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7034|ppo_ep: 1|act_loss: 0.0024433135986328125|cri_loss: 0.0013751983642578125|unsuper_loss: 0.0
+average reward score: 5.55078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.65%) |Training time=0.45s (20.69%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7035|ppo_ep: 1|act_loss: 0.00522613525390625|cri_loss: 0.0029888153076171875|unsuper_loss: 0.0
+average reward score: 4.90625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.80%) |Training time=0.44s (20.62%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7036|ppo_ep: 1|act_loss: -0.01873779296875|cri_loss: -0.0090789794921875|unsuper_loss: 0.0
+average reward score: 5.03125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.91%) |Training time=0.44s (20.43%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7037|ppo_ep: 1|act_loss: 0.0185394287109375|cri_loss: 0.00945281982421875|unsuper_loss: 0.0
+average reward score: 5.1875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.01%) |Training time=0.44s (20.40%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7038|ppo_ep: 1|act_loss: -0.0100555419921875|cri_loss: -0.004840850830078125|unsuper_loss: 0.0
+average reward score: 5.21484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.00%) |Training time=0.44s (20.35%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55
+[2023-04-14 13:05:35,490] [INFO] [logging.py:96:log_dist] [Rank 0] step=7040, skipped=88, lr=[5.989698708161196e-07, 5.989698708161196e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:05:35,509] [INFO] [timer.py:199:stop] epoch=0/micro_step=7040/global_step=7040, RunningAvgSamplesPerSec=105.6536996369175, CurrSamplesPerSec=118.7872625896097, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:05:35,601] [INFO] [logging.py:96:log_dist] [Rank 0] step=7040, skipped=120, lr=[3.253757908468269e-07, 3.253757908468269e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7039|ppo_ep: 1|act_loss: -0.0106964111328125|cri_loss: -0.005218505859375|unsuper_loss: 0.0
+average reward score: 5.48828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.62%) |Training time=0.43s (19.87%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7040|ppo_ep: 1|act_loss: -0.014617919921875|cri_loss: -0.006984710693359375|unsuper_loss: 0.0
+average reward score: 5.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.71s (76.03%) |Training time=0.44s (19.51%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.25 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7041|ppo_ep: 1|act_loss: 0.017822265625|cri_loss: 0.00927734375|unsuper_loss: 0.0
+average reward score: 5.41796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.94%) |Training time=0.44s (20.39%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7042|ppo_ep: 1|act_loss: -0.01445770263671875|cri_loss: -0.007171630859375|unsuper_loss: 0.0
+average reward score: 5.0859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.98%) |Training time=0.44s (20.36%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7043|ppo_ep: 1|act_loss: -0.0104522705078125|cri_loss: -0.0050811767578125|unsuper_loss: 0.0
+average reward score: 5.09375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.82%) |Training time=0.44s (20.51%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7044|ppo_ep: 1|act_loss: 0.016082763671875|cri_loss: 0.00823211669921875|unsuper_loss: 0.0
+average reward score: 5.6953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.31%) |Training time=0.43s (19.98%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7045|ppo_ep: 1|act_loss: -0.027099609375|cri_loss: -0.013214111328125|unsuper_loss: 0.0
+average reward score: 5.62890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.15%) |Training time=0.50s (22.40%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.43 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7046|ppo_ep: 1|act_loss: 0.0109405517578125|cri_loss: 0.005657196044921875|unsuper_loss: 0.0
+average reward score: 5.72265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.04%) |Training time=0.44s (20.30%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7047|ppo_ep: 1|act_loss: -0.02777099609375|cri_loss: -0.01348114013671875|unsuper_loss: 0.0
+average reward score: 5.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.05%) |Training time=0.44s (20.36%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7048|ppo_ep: 1|act_loss: -0.02685546875|cri_loss: -0.0132293701171875|unsuper_loss: 0.0
+average reward score: 5.546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.89%) |Training time=0.44s (20.43%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.55
+[2023-04-14 13:05:57,120] [INFO] [logging.py:96:log_dist] [Rank 0] step=7050, skipped=88, lr=[5.900370197465865e-07, 5.900370197465865e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:05:57,139] [INFO] [timer.py:199:stop] epoch=0/micro_step=7050/global_step=7050, RunningAvgSamplesPerSec=105.66728407954258, CurrSamplesPerSec=116.75401299782442, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:05:57,231] [INFO] [logging.py:96:log_dist] [Rank 0] step=7050, skipped=120, lr=[3.206437554569741e-07, 3.206437554569741e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7049|ppo_ep: 1|act_loss: 0.002655029296875|cri_loss: 0.001415252685546875|unsuper_loss: 0.0
+average reward score: 5.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.05%) |Training time=0.44s (20.37%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7050|ppo_ep: 1|act_loss: -0.005207061767578125|cri_loss: -0.002410888671875|unsuper_loss: 0.0
+average reward score: 5.62109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.94%) |Training time=0.44s (20.38%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7051|ppo_ep: 1|act_loss: 0.00331878662109375|cri_loss: 0.0018138885498046875|unsuper_loss: 0.0
+average reward score: 5.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.00%) |Training time=0.44s (20.41%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7052|ppo_ep: 1|act_loss: 0.0107879638671875|cri_loss: 0.005695343017578125|unsuper_loss: 0.0
+average reward score: 5.625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.98%) |Training time=0.44s (20.36%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7053|ppo_ep: 1|act_loss: -0.0012302398681640625|cri_loss: -0.0005526542663574219|unsuper_loss: 0.0
+average reward score: 6.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.50%) |Training time=0.43s (19.90%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7054|ppo_ep: 1|act_loss: -0.007366180419921875|cri_loss: -0.002979278564453125|unsuper_loss: 0.0
+average reward score: 5.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.93%) |Training time=0.44s (20.38%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7055|ppo_ep: 1|act_loss: 0.01214599609375|cri_loss: 0.006175994873046875|unsuper_loss: 0.0
+average reward score: 5.76171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.05%) |Training time=0.44s (20.41%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7056|ppo_ep: 1|act_loss: 0.006378173828125|cri_loss: 0.0032520294189453125|unsuper_loss: 0.0
+average reward score: 6.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.86%) |Training time=0.44s (20.48%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7057|ppo_ep: 1|act_loss: 0.006343841552734375|cri_loss: 0.003448486328125|unsuper_loss: 0.0
+average reward score: 5.9453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.13%) |Training time=0.44s (20.28%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7058|ppo_ep: 1|act_loss: 0.0113677978515625|cri_loss: 0.005924224853515625|unsuper_loss: 0.0
+average reward score: 5.15625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.17%) |Training time=0.43s (20.15%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55
+[2023-04-14 13:06:18,609] [INFO] [logging.py:96:log_dist] [Rank 0] step=7060, skipped=88, lr=[5.811669410447133e-07, 5.811669410447133e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:06:18,627] [INFO] [timer.py:199:stop] epoch=0/micro_step=7060/global_step=7060, RunningAvgSamplesPerSec=105.68131365475776, CurrSamplesPerSec=114.69336225070093, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:06:18,721] [INFO] [logging.py:96:log_dist] [Rank 0] step=7060, skipped=120, lr=[3.1594402338407633e-07, 3.1594402338407633e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7059|ppo_ep: 1|act_loss: 0.0213165283203125|cri_loss: 0.01114654541015625|unsuper_loss: 0.0
+average reward score: 5.72265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.86%) |Training time=0.44s (20.56%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7060|ppo_ep: 1|act_loss: -0.0136260986328125|cri_loss: -0.0066986083984375|unsuper_loss: 0.0
+average reward score: 5.3671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.62s (72.07%) |Training time=0.53s (23.48%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7061|ppo_ep: 1|act_loss: 0.0153656005859375|cri_loss: 0.00801849365234375|unsuper_loss: 0.0
+average reward score: 5.81640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.12%) |Training time=0.47s (21.37%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7062|ppo_ep: 1|act_loss: -0.003650665283203125|cri_loss: -0.0014925003051757812|unsuper_loss: 0.0
+average reward score: 5.703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.65%) |Training time=0.45s (20.70%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7063|ppo_ep: 1|act_loss: -0.0135955810546875|cri_loss: -0.00644683837890625|unsuper_loss: 0.0
+average reward score: 5.3046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.78%) |Training time=0.43s (19.65%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7064|ppo_ep: 1|act_loss: -0.035491943359375|cri_loss: -0.017486572265625|unsuper_loss: 0.0
+average reward score: 5.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.67s (77.20%) |Training time=0.39s (18.02%) |Others=0.10 (4.78%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.55
+[2023-04-14 13:06:31,684] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 7065|ppo_ep: 1|act_loss: 0.0009374618530273438|cri_loss: 0.0006699562072753906|unsuper_loss: 0.0
+average reward score: 5.7421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.76%) |Training time=0.42s (19.61%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7066|ppo_ep: 1|act_loss: -0.005615234375|cri_loss: -0.0027637481689453125|unsuper_loss: 0.0
+average reward score: 5.88671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.89%) |Training time=0.44s (20.38%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7067|ppo_ep: 1|act_loss: -0.010498046875|cri_loss: -0.004825592041015625|unsuper_loss: 0.0
+average reward score: 5.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.76%) |Training time=0.45s (20.65%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7068|ppo_ep: 1|act_loss: 0.00209808349609375|cri_loss: 0.00121307373046875|unsuper_loss: 0.0
+average reward score: 6.6171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.81%) |Training time=0.44s (20.50%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.55
+[2023-04-14 13:06:40,356] [INFO] [logging.py:96:log_dist] [Rank 0] step=7070, skipped=89, lr=[5.73237649271517e-07, 5.73237649271517e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:06:40,357] [INFO] [timer.py:199:stop] epoch=0/micro_step=7070/global_step=7070, RunningAvgSamplesPerSec=105.69375759915556, CurrSamplesPerSec=115.86235091814396, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:06:40,469] [INFO] [logging.py:96:log_dist] [Rank 0] step=7070, skipped=120, lr=[3.1127666428949986e-07, 3.1127666428949986e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7069|ppo_ep: 1|act_loss: 0.012603759765625|cri_loss: 0.00643157958984375|unsuper_loss: 0.0
+average reward score: 5.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.66s (74.89%) |Training time=0.45s (20.20%) |Others=0.11 (4.91%)|CurSamplesPerSec=14.45 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7070|ppo_ep: 1|act_loss: -0.01456451416015625|cri_loss: -0.00681304931640625|unsuper_loss: 0.0
+average reward score: 5.0078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.87%) |Training time=0.44s (20.44%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7071|ppo_ep: 1|act_loss: -0.023345947265625|cri_loss: -0.0114288330078125|unsuper_loss: 0.0
+average reward score: 6.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.38%) |Training time=0.45s (20.95%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7072|ppo_ep: 1|act_loss: 0.0129547119140625|cri_loss: 0.0068206787109375|unsuper_loss: 0.0
+average reward score: 6.015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.30%) |Training time=0.45s (21.01%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7073|ppo_ep: 1|act_loss: -0.00811004638671875|cri_loss: -0.003925323486328125|unsuper_loss: 0.0
+average reward score: 5.7109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.45s (20.86%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7074|ppo_ep: 1|act_loss: 0.0012836456298828125|cri_loss: 0.0007758140563964844|unsuper_loss: 0.0
+average reward score: 6.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.44%) |Training time=0.45s (20.90%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7075|ppo_ep: 1|act_loss: -0.0047607421875|cri_loss: -0.002140045166015625|unsuper_loss: 0.0
+average reward score: 5.55859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.88%) |Training time=0.44s (20.46%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7076|ppo_ep: 1|act_loss: -0.010772705078125|cri_loss: -0.005107879638671875|unsuper_loss: 0.0
+average reward score: 5.83984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.14%) |Training time=0.46s (19.57%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7077|ppo_ep: 1|act_loss: 0.005046844482421875|cri_loss: 0.0028209686279296875|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.43%) |Training time=0.45s (21.01%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7078|ppo_ep: 1|act_loss: -0.0077362060546875|cri_loss: -0.003391265869140625|unsuper_loss: 0.0
+average reward score: 5.203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.75%) |Training time=0.44s (20.58%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.55
+[2023-04-14 13:07:02,095] [INFO] [logging.py:96:log_dist] [Rank 0] step=7080, skipped=89, lr=[5.644871995139011e-07, 5.644871995139011e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:07:02,113] [INFO] [timer.py:199:stop] epoch=0/micro_step=7080/global_step=7080, RunningAvgSamplesPerSec=105.70222970266056, CurrSamplesPerSec=112.85040635914649, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:07:02,205] [INFO] [logging.py:96:log_dist] [Rank 0] step=7080, skipped=120, lr=[3.066417473547667e-07, 3.066417473547667e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7079|ppo_ep: 1|act_loss: 0.032135009765625|cri_loss: 0.0170440673828125|unsuper_loss: 0.0
+average reward score: 6.09375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.68%) |Training time=0.45s (20.69%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7080|ppo_ep: 1|act_loss: -0.023345947265625|cri_loss: -0.0113067626953125|unsuper_loss: 0.0
+average reward score: 5.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.48%) |Training time=0.45s (20.85%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7081|ppo_ep: 1|act_loss: 0.00737762451171875|cri_loss: 0.004241943359375|unsuper_loss: 0.0
+average reward score: 5.125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.87%) |Training time=0.44s (20.44%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7082|ppo_ep: 1|act_loss: -0.0083160400390625|cri_loss: -0.0031337738037109375|unsuper_loss: 0.0
+average reward score: 5.45703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.47s (21.59%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7083|ppo_ep: 1|act_loss: 0.008056640625|cri_loss: 0.00444793701171875|unsuper_loss: 0.0
+average reward score: 5.6484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.10%) |Training time=0.46s (21.29%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7084|ppo_ep: 1|act_loss: -0.0013437271118164062|cri_loss: -0.00020885467529296875|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.89%) |Training time=0.46s (21.46%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7085|ppo_ep: 1|act_loss: 0.0086517333984375|cri_loss: 0.00460052490234375|unsuper_loss: 0.0
+average reward score: 5.81640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.07%) |Training time=0.46s (21.26%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7086|ppo_ep: 1|act_loss: 0.0016613006591796875|cri_loss: 0.0010709762573242188|unsuper_loss: 0.0
+average reward score: 5.71484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.08%) |Training time=0.46s (21.26%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7087|ppo_ep: 1|act_loss: 0.016571044921875|cri_loss: 0.0089569091796875|unsuper_loss: 0.0
+average reward score: 5.015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.76%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7088|ppo_ep: 1|act_loss: -0.0264129638671875|cri_loss: -0.01276397705078125|unsuper_loss: 0.0
+average reward score: 5.6953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.65%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.55
+[2023-04-14 13:07:23,635] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
+[2023-04-14 13:07:23,636] [INFO] [logging.py:96:log_dist] [Rank 0] step=7090, skipped=90, lr=[5.566657852262237e-07, 5.566657852262237e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:07:23,636] [INFO] [timer.py:199:stop] epoch=0/micro_step=7090/global_step=7090, RunningAvgSamplesPerSec=105.70629971379196, CurrSamplesPerSec=114.99573149730027, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:07:23,728] [INFO] [logging.py:96:log_dist] [Rank 0] step=7090, skipped=120, lr=[3.020393412805259e-07, 3.020393412805259e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7089|ppo_ep: 1|act_loss: -0.05987548828125|cri_loss: -0.013641357421875|unsuper_loss: 0.0
+average reward score: 5.6640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.65%) |Training time=0.44s (20.68%) |Others=0.10 (4.67%)|CurSamplesPerSec=15.03 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7090|ppo_ep: 1|act_loss: 0.009307861328125|cri_loss: 0.004871368408203125|unsuper_loss: 0.0
+average reward score: 5.83203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.26%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7091|ppo_ep: 1|act_loss: 0.00020992755889892578|cri_loss: 0.000354766845703125|unsuper_loss: 0.0
+average reward score: 6.1015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.12%) |Training time=0.46s (19.62%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7092|ppo_ep: 1|act_loss: 0.005802154541015625|cri_loss: 0.0029811859130859375|unsuper_loss: 0.0
+average reward score: 4.9609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.71%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7093|ppo_ep: 1|act_loss: -0.00748443603515625|cri_loss: -0.0035839080810546875|unsuper_loss: 0.0
+average reward score: 5.609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.45%) |Training time=0.45s (20.86%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7094|ppo_ep: 1|act_loss: -0.003345489501953125|cri_loss: -0.0012674331665039062|unsuper_loss: 0.0
+average reward score: 5.34375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.51%) |Training time=0.44s (20.77%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7095|ppo_ep: 1|act_loss: 0.0189208984375|cri_loss: 0.01151275634765625|unsuper_loss: 0.0
+average reward score: 5.4453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.44s (20.76%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7096|ppo_ep: 1|act_loss: -0.01073455810546875|cri_loss: -0.00524139404296875|unsuper_loss: 0.0
+average reward score: 5.16796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.73%) |Training time=0.44s (20.58%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7097|ppo_ep: 1|act_loss: -0.009918212890625|cri_loss: -0.00482177734375|unsuper_loss: 0.0
+average reward score: 5.0234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.68%) |Training time=0.44s (20.61%) |Others=0.10 (4.71%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7098|ppo_ep: 1|act_loss: -0.014434814453125|cri_loss: -0.007038116455078125|unsuper_loss: 0.0
+average reward score: 5.91796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.84%) |Training time=0.46s (21.20%) |Others=0.11 (4.96%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55
+[2023-04-14 13:07:45,359] [INFO] [logging.py:96:log_dist] [Rank 0] step=7100, skipped=90, lr=[5.480354326383703e-07, 5.480354326383703e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:07:45,378] [INFO] [timer.py:199:stop] epoch=0/micro_step=7100/global_step=7100, RunningAvgSamplesPerSec=105.71372664296617, CurrSamplesPerSec=113.53677699652836, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:07:45,470] [INFO] [logging.py:96:log_dist] [Rank 0] step=7100, skipped=120, lr=[2.9746951428553884e-07, 2.9746951428553884e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7099|ppo_ep: 1|act_loss: -0.00740814208984375|cri_loss: -0.0036258697509765625|unsuper_loss: 0.0
+average reward score: 5.9609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.48%) |Training time=0.44s (19.75%) |Others=0.11 (4.77%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7100|ppo_ep: 1|act_loss: 0.008270263671875|cri_loss: 0.004364013671875|unsuper_loss: 0.0
+average reward score: 5.87109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.47%) |Training time=0.44s (20.83%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7101|ppo_ep: 1|act_loss: 0.00771331787109375|cri_loss: 0.003948211669921875|unsuper_loss: 0.0
+average reward score: 5.96875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.44s (20.75%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7102|ppo_ep: 1|act_loss: 0.02349853515625|cri_loss: 0.01323699951171875|unsuper_loss: 0.0
+average reward score: 5.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.38%) |Training time=0.45s (20.94%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7103|ppo_ep: 1|act_loss: 0.0160675048828125|cri_loss: 0.00824737548828125|unsuper_loss: 0.0
+average reward score: 5.453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.62%) |Training time=0.44s (20.79%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7104|ppo_ep: 1|act_loss: -0.020355224609375|cri_loss: -0.009979248046875|unsuper_loss: 0.0
+average reward score: 5.8046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.01%) |Training time=0.46s (21.33%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7105|ppo_ep: 1|act_loss: -0.00275421142578125|cri_loss: -0.00083160400390625|unsuper_loss: 0.0
+average reward score: 5.84765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.35%) |Training time=0.45s (20.96%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7106|ppo_ep: 1|act_loss: -0.00026607513427734375|cri_loss: 5.5789947509765625e-05|unsuper_loss: 0.0
+average reward score: 5.5
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.59%) |Training time=0.47s (20.10%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.55
+[2023-04-14 13:08:02,789] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 7107|ppo_ep: 1|act_loss: -0.019775390625|cri_loss: -0.00945281982421875|unsuper_loss: 0.0
+average reward score: 5.30078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.51%) |Training time=0.46s (21.28%) |Others=0.09 (4.21%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.55
+[2023-04-14 13:08:04,927] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 7108|ppo_ep: 1|act_loss: -0.02490234375|cri_loss: -0.01221466064453125|unsuper_loss: 0.0
+average reward score: 5.30078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.34%) |Training time=0.46s (21.50%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.55
+[2023-04-14 13:08:06,968] [INFO] [logging.py:96:log_dist] [Rank 0] step=7110, skipped=90, lr=[5.394684749830206e-07, 5.394684749830206e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:08:06,986] [INFO] [timer.py:199:stop] epoch=0/micro_step=7110/global_step=7110, RunningAvgSamplesPerSec=105.7198366319902, CurrSamplesPerSec=109.52436472834691, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:08:07,079] [INFO] [logging.py:96:log_dist] [Rank 0] step=7110, skipped=122, lr=[2.9383715516136083e-07, 2.9383715516136083e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7109|ppo_ep: 1|act_loss: -0.0279388427734375|cri_loss: -0.0134124755859375|unsuper_loss: 0.0
+average reward score: 5.4921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.22%) |Training time=0.45s (21.13%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7110|ppo_ep: 1|act_loss: -0.02520751953125|cri_loss: -0.0123291015625|unsuper_loss: 0.0
+average reward score: 5.69921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.05%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7111|ppo_ep: 1|act_loss: -0.004817962646484375|cri_loss: -0.0023059844970703125|unsuper_loss: 0.0
+average reward score: 5.05078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.46s (21.14%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7112|ppo_ep: 1|act_loss: 0.018310546875|cri_loss: 0.00978851318359375|unsuper_loss: 0.0
+average reward score: 5.0234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.08%) |Training time=0.45s (21.24%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7113|ppo_ep: 1|act_loss: 0.0018711090087890625|cri_loss: 0.0011491775512695312|unsuper_loss: 0.0
+average reward score: 5.59375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.12%) |Training time=0.45s (21.21%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7114|ppo_ep: 1|act_loss: -0.00603485107421875|cri_loss: -0.00254058837890625|unsuper_loss: 0.0
+average reward score: 5.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.15%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7115|ppo_ep: 1|act_loss: 0.011688232421875|cri_loss: 0.0062713623046875|unsuper_loss: 0.0
+average reward score: 5.57421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.46s (21.28%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7116|ppo_ep: 1|act_loss: 0.0015411376953125|cri_loss: 0.0009899139404296875|unsuper_loss: 0.0
+average reward score: 5.75
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.06%) |Training time=0.46s (21.27%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7117|ppo_ep: 1|act_loss: 0.0027523040771484375|cri_loss: 0.00145721435546875|unsuper_loss: 0.0
+average reward score: 5.8203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.24%) |Training time=0.45s (21.16%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7118|ppo_ep: 1|act_loss: 0.004467010498046875|cri_loss: 0.0024471282958984375|unsuper_loss: 0.0
+average reward score: 5.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.15%) |Training time=0.48s (22.22%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.55
+[2023-04-14 13:08:28,469] [INFO] [logging.py:96:log_dist] [Rank 0] step=7120, skipped=90, lr=[5.309650392431538e-07, 5.309650392431538e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:08:28,487] [INFO] [timer.py:199:stop] epoch=0/micro_step=7120/global_step=7120, RunningAvgSamplesPerSec=105.7216516885662, CurrSamplesPerSec=95.38179159059753, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:08:28,580] [INFO] [logging.py:96:log_dist] [Rank 0] step=7120, skipped=122, lr=[2.8932614087824774e-07, 2.8932614087824774e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7119|ppo_ep: 1|act_loss: -0.0005578994750976562|cri_loss: -0.00013780593872070312|unsuper_loss: 0.0
+average reward score: 5.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.66%) |Training time=0.50s (22.78%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7120|ppo_ep: 1|act_loss: 0.0099334716796875|cri_loss: 0.005100250244140625|unsuper_loss: 0.0
+average reward score: 5.40234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.49%) |Training time=0.50s (22.94%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7121|ppo_ep: 1|act_loss: -0.0159912109375|cri_loss: -0.0079345703125|unsuper_loss: 0.0
+average reward score: 6.109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.00%) |Training time=0.52s (23.56%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.42 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7122|ppo_ep: 1|act_loss: -0.0201416015625|cri_loss: -0.00991058349609375|unsuper_loss: 0.0
+average reward score: 5.5
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.17%) |Training time=0.46s (21.19%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7123|ppo_ep: 1|act_loss: -0.020599365234375|cri_loss: -0.00914764404296875|unsuper_loss: 0.0
+average reward score: 5.59375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.88%) |Training time=0.46s (21.50%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7124|ppo_ep: 1|act_loss: -0.00659942626953125|cri_loss: -0.0029087066650390625|unsuper_loss: 0.0
+average reward score: 5.4453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (21.94%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7125|ppo_ep: 1|act_loss: 0.0066680908203125|cri_loss: 0.003437042236328125|unsuper_loss: 0.0
+average reward score: 5.3203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.31%) |Training time=0.48s (22.16%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7126|ppo_ep: 1|act_loss: -0.01512908935546875|cri_loss: -0.0073089599609375|unsuper_loss: 0.0
+average reward score: 5.7265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.84%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7127|ppo_ep: 1|act_loss: -0.00734710693359375|cri_loss: -0.0032787322998046875|unsuper_loss: 0.0
+average reward score: 5.51171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (22.06%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7128|ppo_ep: 1|act_loss: 0.032562255859375|cri_loss: 0.0173492431640625|unsuper_loss: 0.0
+average reward score: 4.9921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.47%) |Training time=0.49s (22.00%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.45 |AvgSamplesPerSec=14.55
+[2023-04-14 13:08:50,384] [INFO] [logging.py:96:log_dist] [Rank 0] step=7130, skipped=90, lr=[5.225252514602012e-07, 5.225252514602012e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:08:50,402] [INFO] [timer.py:199:stop] epoch=0/micro_step=7130/global_step=7130, RunningAvgSamplesPerSec=105.71516173869826, CurrSamplesPerSec=97.46446357329275, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:08:50,494] [INFO] [logging.py:96:log_dist] [Rank 0] step=7130, skipped=122, lr=[2.848478941146873e-07, 2.848478941146873e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7129|ppo_ep: 1|act_loss: 0.0390625|cri_loss: 0.0198974609375|unsuper_loss: 0.0
+average reward score: 5.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.23%) |Training time=0.49s (21.49%) |Others=0.10 (4.28%)|CurSamplesPerSec=14.00 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7130|ppo_ep: 1|act_loss: 0.009613037109375|cri_loss: 0.0063323974609375|unsuper_loss: 0.0
+average reward score: 5.91015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.81%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7131|ppo_ep: 1|act_loss: -0.021148681640625|cri_loss: -0.0103607177734375|unsuper_loss: 0.0
+average reward score: 5.30859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.47s (21.68%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7132|ppo_ep: 1|act_loss: 0.0086669921875|cri_loss: 0.00457000732421875|unsuper_loss: 0.0
+average reward score: 5.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.30%) |Training time=0.48s (22.07%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7133|ppo_ep: 1|act_loss: -0.0162811279296875|cri_loss: -0.00804901123046875|unsuper_loss: 0.0
+average reward score: 5.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.79%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7134|ppo_ep: 1|act_loss: 0.007770538330078125|cri_loss: 0.00411224365234375|unsuper_loss: 0.0
+average reward score: 5.90234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (21.93%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7135|ppo_ep: 1|act_loss: 0.03326416015625|cri_loss: 0.017120361328125|unsuper_loss: 0.0
+average reward score: 5.515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.63%) |Training time=0.47s (21.84%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7136|ppo_ep: 1|act_loss: -0.002288818359375|cri_loss: -0.00075531005859375|unsuper_loss: 0.0
+average reward score: 5.41796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.03%) |Training time=0.48s (20.65%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.65 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7137|ppo_ep: 1|act_loss: 0.00754547119140625|cri_loss: 0.003917694091796875|unsuper_loss: 0.0
+average reward score: 5.13671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.18%) |Training time=0.49s (22.32%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7138|ppo_ep: 1|act_loss: 0.0069580078125|cri_loss: 0.004032135009765625|unsuper_loss: 0.0
+average reward score: 6.12109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.00%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
+[2023-04-14 13:09:12,227] [INFO] [logging.py:96:log_dist] [Rank 0] step=7140, skipped=90, lr=[5.141492367321766e-07, 5.141492367321766e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:09:12,245] [INFO] [timer.py:199:stop] epoch=0/micro_step=7140/global_step=7140, RunningAvgSamplesPerSec=105.71016137653531, CurrSamplesPerSec=104.10979883555152, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:09:12,338] [INFO] [logging.py:96:log_dist] [Rank 0] step=7140, skipped=122, lr=[2.804024812490991e-07, 2.804024812490991e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7139|ppo_ep: 1|act_loss: 0.005550384521484375|cri_loss: 0.0030155181884765625|unsuper_loss: 0.0
+average reward score: 5.609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.92%) |Training time=0.47s (21.58%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7140|ppo_ep: 1|act_loss: -0.011016845703125|cri_loss: -0.00543975830078125|unsuper_loss: 0.0
+average reward score: 6.203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.20%) |Training time=0.48s (22.18%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7141|ppo_ep: 1|act_loss: 0.0009465217590332031|cri_loss: 0.0014495849609375|unsuper_loss: 0.0
+average reward score: 5.08203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.32%) |Training time=0.48s (22.15%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7142|ppo_ep: 1|act_loss: -0.0117340087890625|cri_loss: -0.0057830810546875|unsuper_loss: 0.0
+average reward score: 5.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.16%) |Training time=0.48s (22.23%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7143|ppo_ep: 1|act_loss: -0.0034694671630859375|cri_loss: -0.0016698837280273438|unsuper_loss: 0.0
+average reward score: 5.84765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.11%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7144|ppo_ep: 1|act_loss: -0.0047454833984375|cri_loss: -0.0021305084228515625|unsuper_loss: 0.0
+average reward score: 5.3359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.21%) |Training time=0.48s (22.17%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7145|ppo_ep: 1|act_loss: -0.0008382797241210938|cri_loss: 0.00013303756713867188|unsuper_loss: 0.0
+average reward score: 5.53515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.20%) |Training time=0.48s (22.28%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7146|ppo_ep: 1|act_loss: 0.005992889404296875|cri_loss: 0.0034923553466796875|unsuper_loss: 0.0
+average reward score: 4.44921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.27%) |Training time=0.48s (22.11%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7147|ppo_ep: 1|act_loss: -0.013641357421875|cri_loss: -0.006656646728515625|unsuper_loss: 0.0
+average reward score: 5.35546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (22.09%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7148|ppo_ep: 1|act_loss: 0.071044921875|cri_loss: 0.0413818359375|unsuper_loss: 0.0
+average reward score: 5.31640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.46%) |Training time=0.47s (21.91%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55
+[2023-04-14 13:09:33,886] [INFO] [logging.py:96:log_dist] [Rank 0] step=7150, skipped=90, lr=[5.058371192118248e-07, 5.058371192118248e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:09:33,904] [INFO] [timer.py:199:stop] epoch=0/micro_step=7150/global_step=7150, RunningAvgSamplesPerSec=105.70305846331202, CurrSamplesPerSec=100.97480326808203, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:09:33,996] [INFO] [logging.py:96:log_dist] [Rank 0] step=7150, skipped=122, lr=[2.7598996817322614e-07, 2.7598996817322614e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7149|ppo_ep: 1|act_loss: -0.01239013671875|cri_loss: -0.00600433349609375|unsuper_loss: 0.0
+average reward score: 5.70703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (22.11%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7150|ppo_ep: 1|act_loss: 0.017181396484375|cri_loss: 0.00881195068359375|unsuper_loss: 0.0
+average reward score: 5.53515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.33%) |Training time=0.48s (22.06%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7151|ppo_ep: 1|act_loss: 0.00327301025390625|cri_loss: 0.00215911865234375|unsuper_loss: 0.0
+average reward score: 5.2734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.14%) |Training time=0.48s (20.35%) |Others=0.11 (4.51%)|CurSamplesPerSec=13.61 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7152|ppo_ep: 1|act_loss: -0.0084381103515625|cri_loss: -0.00391387939453125|unsuper_loss: 0.0
+average reward score: 5.6015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.04%) |Training time=0.49s (22.35%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7153|ppo_ep: 1|act_loss: 0.01629638671875|cri_loss: 0.009246826171875|unsuper_loss: 0.0
+average reward score: 5.73046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.62%) |Training time=0.47s (21.83%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7154|ppo_ep: 1|act_loss: 0.0115966796875|cri_loss: 0.006084442138671875|unsuper_loss: 0.0
+average reward score: 5.28125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.47s (21.91%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7155|ppo_ep: 1|act_loss: 0.0010461807250976562|cri_loss: 0.0006875991821289062|unsuper_loss: 0.0
+average reward score: 5.70703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.47%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7156|ppo_ep: 1|act_loss: 0.0706787109375|cri_loss: 0.04571533203125|unsuper_loss: 0.0
+average reward score: 5.87890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.47s (21.85%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7157|ppo_ep: 1|act_loss: 0.0011568069458007812|cri_loss: 0.0007429122924804688|unsuper_loss: 0.0
+average reward score: 5.8203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.62%) |Training time=0.47s (21.85%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7158|ppo_ep: 1|act_loss: -0.006542205810546875|cri_loss: -0.0032062530517578125|unsuper_loss: 0.0
+average reward score: 5.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.61%) |Training time=0.48s (21.80%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.55
+[2023-04-14 13:09:55,739] [INFO] [logging.py:96:log_dist] [Rank 0] step=7160, skipped=90, lr=[4.975890221047792e-07, 4.975890221047792e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:09:55,757] [INFO] [timer.py:199:stop] epoch=0/micro_step=7160/global_step=7160, RunningAvgSamplesPerSec=105.69891764973828, CurrSamplesPerSec=103.62230874288463, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:09:55,851] [INFO] [logging.py:96:log_dist] [Rank 0] step=7160, skipped=122, lr=[2.7161042029115706e-07, 2.7161042029115706e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7159|ppo_ep: 1|act_loss: 0.02386474609375|cri_loss: 0.012298583984375|unsuper_loss: 0.0
+average reward score: 5.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.81%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7160|ppo_ep: 1|act_loss: 0.00872802734375|cri_loss: 0.00505828857421875|unsuper_loss: 0.0
+average reward score: 5.12109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.49%) |Training time=0.47s (21.84%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7161|ppo_ep: 1|act_loss: -0.02520751953125|cri_loss: -0.00902557373046875|unsuper_loss: 0.0
+average reward score: 5.51171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.88%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7162|ppo_ep: 1|act_loss: -0.0174560546875|cri_loss: -0.0083770751953125|unsuper_loss: 0.0
+average reward score: 5.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.15%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7163|ppo_ep: 1|act_loss: -0.0013837814331054688|cri_loss: -0.0005974769592285156|unsuper_loss: 0.0
+average reward score: 5.96875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.74%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7164|ppo_ep: 1|act_loss: -0.01415252685546875|cri_loss: -0.0066070556640625|unsuper_loss: 0.0
+average reward score: 4.609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.51%) |Training time=0.47s (21.84%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7165|ppo_ep: 1|act_loss: -0.0080413818359375|cri_loss: -0.00374603271484375|unsuper_loss: 0.0
+average reward score: 5.30859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.82%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7166|ppo_ep: 1|act_loss: -0.006404876708984375|cri_loss: -0.003139495849609375|unsuper_loss: 0.0
+average reward score: 4.86328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.45%) |Training time=0.48s (21.94%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7167|ppo_ep: 1|act_loss: 0.0283203125|cri_loss: 0.0145416259765625|unsuper_loss: 0.0
+average reward score: 5.41015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.50%) |Training time=0.47s (20.24%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7168|ppo_ep: 1|act_loss: -0.0019178390502929688|cri_loss: -0.0005779266357421875|unsuper_loss: 0.0
+average reward score: 4.91015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.21%) |Training time=0.48s (22.18%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
+[2023-04-14 13:10:17,524] [INFO] [logging.py:96:log_dist] [Rank 0] step=7170, skipped=90, lr=[4.894050676677376e-07, 4.894050676677376e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:10:17,543] [INFO] [timer.py:199:stop] epoch=0/micro_step=7170/global_step=7170, RunningAvgSamplesPerSec=105.69460992636105, CurrSamplesPerSec=101.85904479156943, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:10:17,635] [INFO] [logging.py:96:log_dist] [Rank 0] step=7170, skipped=122, lr=[2.672639025183571e-07, 2.672639025183571e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7169|ppo_ep: 1|act_loss: 0.0014104843139648438|cri_loss: 0.0010814666748046875|unsuper_loss: 0.0
+average reward score: 5.2109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.47%) |Training time=0.48s (22.02%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7170|ppo_ep: 1|act_loss: 0.01959228515625|cri_loss: 0.00992584228515625|unsuper_loss: 0.0
+average reward score: 5.48046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.89%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7171|ppo_ep: 1|act_loss: 0.027862548828125|cri_loss: 0.01568603515625|unsuper_loss: 0.0
+average reward score: 5.57421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.58%) |Training time=0.47s (21.86%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7172|ppo_ep: 1|act_loss: 0.0726318359375|cri_loss: 0.040069580078125|unsuper_loss: 0.0
+average reward score: 5.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.47%) |Training time=0.48s (21.94%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7173|ppo_ep: 1|act_loss: 0.045166015625|cri_loss: 0.02301025390625|unsuper_loss: 0.0
+average reward score: 5.625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.48s (22.01%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7174|ppo_ep: 1|act_loss: 0.004367828369140625|cri_loss: 0.0023746490478515625|unsuper_loss: 0.0
+average reward score: 5.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (21.96%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7175|ppo_ep: 1|act_loss: 0.0097808837890625|cri_loss: 0.005435943603515625|unsuper_loss: 0.0
+average reward score: 6.01953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.79%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7176|ppo_ep: 1|act_loss: 0.0154571533203125|cri_loss: 0.00794219970703125|unsuper_loss: 0.0
+average reward score: 5.5859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.31%) |Training time=0.48s (22.07%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7177|ppo_ep: 1|act_loss: 0.004146575927734375|cri_loss: 0.00255584716796875|unsuper_loss: 0.0
+average reward score: 5.453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.88%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7178|ppo_ep: 1|act_loss: -0.03350830078125|cri_loss: -0.0157623291015625|unsuper_loss: 0.0
+average reward score: 4.9609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.47s (21.85%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.55
+[2023-04-14 13:10:39,135] [INFO] [logging.py:96:log_dist] [Rank 0] step=7180, skipped=90, lr=[4.812853772066454e-07, 4.812853772066454e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:10:39,153] [INFO] [timer.py:199:stop] epoch=0/micro_step=7180/global_step=7180, RunningAvgSamplesPerSec=105.69055579370145, CurrSamplesPerSec=102.88281263773706, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:10:39,246] [INFO] [logging.py:96:log_dist] [Rank 0] step=7180, skipped=122, lr=[2.629504792807036e-07, 2.629504792807036e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7179|ppo_ep: 1|act_loss: -0.004405975341796875|cri_loss: -0.0016450881958007812|unsuper_loss: 0.0
+average reward score: 4.6796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.63%) |Training time=0.47s (21.86%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7180|ppo_ep: 1|act_loss: -0.0251007080078125|cri_loss: -0.012298583984375|unsuper_loss: 0.0
+average reward score: 6.08203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.82%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7181|ppo_ep: 1|act_loss: 0.0293731689453125|cri_loss: 0.01560211181640625|unsuper_loss: 0.0
+average reward score: 5.6875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.76%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7182|ppo_ep: 1|act_loss: -0.002353668212890625|cri_loss: 0.0001239776611328125|unsuper_loss: 0.0
+average reward score: 5.7265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.47s (21.85%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7183|ppo_ep: 1|act_loss: 0.01068115234375|cri_loss: 0.00550079345703125|unsuper_loss: 0.0
+average reward score: 5.546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.54%) |Training time=0.47s (20.15%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.75 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7184|ppo_ep: 1|act_loss: -0.0238189697265625|cri_loss: -0.01168060302734375|unsuper_loss: 0.0
+average reward score: 5.46484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.46%) |Training time=0.47s (21.92%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7185|ppo_ep: 1|act_loss: 0.01348114013671875|cri_loss: 0.0068206787109375|unsuper_loss: 0.0
+average reward score: 5.89453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.69%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7186|ppo_ep: 1|act_loss: 0.0394287109375|cri_loss: 0.0218963623046875|unsuper_loss: 0.0
+average reward score: 5.8359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.47s (21.92%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7187|ppo_ep: 1|act_loss: 0.002780914306640625|cri_loss: 0.0017032623291015625|unsuper_loss: 0.0
+average reward score: 6.01171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.68%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7188|ppo_ep: 1|act_loss: -0.01236724853515625|cri_loss: -0.006107330322265625|unsuper_loss: 0.0
+average reward score: 4.80859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.76%) |Training time=0.47s (20.81%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.17 |AvgSamplesPerSec=14.55
+[2023-04-14 13:11:01,012] [INFO] [logging.py:96:log_dist] [Rank 0] step=7190, skipped=90, lr=[4.732300710749039e-07, 4.732300710749039e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:11:01,030] [INFO] [timer.py:199:stop] epoch=0/micro_step=7190/global_step=7190, RunningAvgSamplesPerSec=105.68814051767909, CurrSamplesPerSec=104.36301713908928, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:11:01,123] [INFO] [logging.py:96:log_dist] [Rank 0] step=7190, skipped=122, lr=[2.586702145135353e-07, 2.586702145135353e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7189|ppo_ep: 1|act_loss: -0.003192901611328125|cri_loss: -0.0009927749633789062|unsuper_loss: 0.0
+average reward score: 5.484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.78%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7190|ppo_ep: 1|act_loss: -0.00814056396484375|cri_loss: -0.003192901611328125|unsuper_loss: 0.0
+average reward score: 5.64453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.04%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7191|ppo_ep: 1|act_loss: -0.01434326171875|cri_loss: -0.0070953369140625|unsuper_loss: 0.0
+average reward score: 5.34375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.78%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7192|ppo_ep: 1|act_loss: -0.025177001953125|cri_loss: -0.01215362548828125|unsuper_loss: 0.0
+average reward score: 6.16015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.39%) |Training time=0.47s (21.98%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7193|ppo_ep: 1|act_loss: -0.01399993896484375|cri_loss: -0.00669097900390625|unsuper_loss: 0.0
+average reward score: 5.91796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.48s (22.02%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7194|ppo_ep: 1|act_loss: -0.01259613037109375|cri_loss: -0.0061798095703125|unsuper_loss: 0.0
+average reward score: 5.296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.71%) |Training time=0.47s (21.68%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7195|ppo_ep: 1|act_loss: -0.02081298828125|cri_loss: -0.01021575927734375|unsuper_loss: 0.0
+average reward score: 5.09765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.47s (21.95%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7196|ppo_ep: 1|act_loss: 0.01335906982421875|cri_loss: 0.00733184814453125|unsuper_loss: 0.0
+average reward score: 5.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.21%) |Training time=0.48s (22.16%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7197|ppo_ep: 1|act_loss: 0.0306243896484375|cri_loss: 0.01568603515625|unsuper_loss: 0.0
+average reward score: 5.125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.71%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7198|ppo_ep: 1|act_loss: 0.00463104248046875|cri_loss: 0.002452850341796875|unsuper_loss: 0.0
+average reward score: 5.73046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.18%) |Training time=0.48s (20.49%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.55
+[2023-04-14 13:11:22,828] [INFO] [logging.py:96:log_dist] [Rank 0] step=7200, skipped=90, lr=[4.6523926867158245e-07, 4.6523926867158245e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:11:22,846] [INFO] [timer.py:199:stop] epoch=0/micro_step=7200/global_step=7200, RunningAvgSamplesPerSec=105.68242647988721, CurrSamplesPerSec=93.17142805575108, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:11:22,939] [INFO] [logging.py:96:log_dist] [Rank 0] step=7200, skipped=122, lr=[2.544231716607015e-07, 2.544231716607015e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7199|ppo_ep: 1|act_loss: 0.0150604248046875|cri_loss: 0.008148193359375|unsuper_loss: 0.0
+average reward score: 5.79296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.32%) |Training time=0.51s (23.00%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.55
+[2023-04-14 13:11:24,992] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 7200|ppo_ep: 1|act_loss: 0.0025997161865234375|cri_loss: 0.0015277862548828125|unsuper_loss: 0.0
+average reward score: 5.96484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.66s (77.08%) |Training time=0.39s (18.09%) |Others=0.10 (4.83%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7201|ppo_ep: 1|act_loss: 0.027587890625|cri_loss: 0.01409149169921875|unsuper_loss: 0.0
+average reward score: 5.02734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.44%) |Training time=0.42s (18.98%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7202|ppo_ep: 1|act_loss: 0.0214385986328125|cri_loss: 0.01111602783203125|unsuper_loss: 0.0
+average reward score: 5.1171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.24%) |Training time=0.46s (21.11%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7203|ppo_ep: 1|act_loss: -0.0129547119140625|cri_loss: -0.00603485107421875|unsuper_loss: 0.0
+average reward score: 5.18359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.12%) |Training time=0.46s (21.23%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7204|ppo_ep: 1|act_loss: 0.003009796142578125|cri_loss: 0.0017175674438476562|unsuper_loss: 0.0
+average reward score: 6.03125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.92%) |Training time=0.47s (21.44%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7205|ppo_ep: 1|act_loss: 0.001621246337890625|cri_loss: 0.001251220703125|unsuper_loss: 0.0
+average reward score: 5.05078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.14%) |Training time=0.46s (21.05%) |Others=0.10 (4.81%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7206|ppo_ep: 1|act_loss: 0.001617431640625|cri_loss: 0.0009260177612304688|unsuper_loss: 0.0
+average reward score: 5.40234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.81%) |Training time=0.45s (20.44%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7207|ppo_ep: 1|act_loss: -3.62396240234375e-05|cri_loss: 0.0010194778442382812|unsuper_loss: 0.0
+average reward score: 5.8046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.40%) |Training time=0.46s (20.93%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7208|ppo_ep: 1|act_loss: 0.013427734375|cri_loss: 0.007190704345703125|unsuper_loss: 0.0
+average reward score: 5.234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.77%) |Training time=0.50s (22.64%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.55
+[2023-04-14 13:11:44,585] [INFO] [logging.py:96:log_dist] [Rank 0] step=7210, skipped=91, lr=[4.581027951092893e-07, 4.581027951092893e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:11:44,603] [INFO] [timer.py:199:stop] epoch=0/micro_step=7210/global_step=7210, RunningAvgSamplesPerSec=105.68967135042475, CurrSamplesPerSec=108.45140855085393, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:11:44,687] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+[2023-04-14 13:11:44,687] [INFO] [logging.py:96:log_dist] [Rank 0] step=7210, skipped=123, lr=[2.506292898694468e-07, 2.506292898694468e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7209|ppo_ep: 1|act_loss: 0.027099609375|cri_loss: 0.014129638671875|unsuper_loss: 0.0
+average reward score: 6.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.59%) |Training time=0.46s (21.25%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55
+[2023-04-14 13:11:46,852] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 7210|ppo_ep: 1|act_loss: 0.01174163818359375|cri_loss: 0.005977630615234375|unsuper_loss: 0.0
+average reward score: 5.55078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.51%) |Training time=0.48s (22.33%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7211|ppo_ep: 1|act_loss: -0.00045490264892578125|cri_loss: -0.00011813640594482422|unsuper_loss: 0.0
+average reward score: 5.1953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.12%) |Training time=0.48s (22.27%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7212|ppo_ep: 1|act_loss: 0.012725830078125|cri_loss: 0.00652313232421875|unsuper_loss: 0.0
+average reward score: 5.8203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.02%) |Training time=0.49s (22.25%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7213|ppo_ep: 1|act_loss: -0.0278472900390625|cri_loss: -0.01361083984375|unsuper_loss: 0.0
+average reward score: 5.76953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.15%) |Training time=0.49s (20.57%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.51 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7214|ppo_ep: 1|act_loss: 0.004974365234375|cri_loss: 0.0028705596923828125|unsuper_loss: 0.0
+average reward score: 6.58984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.47%) |Training time=0.48s (21.91%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55
+[2023-04-14 13:11:57,807] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384
+epoch: 0|step: 7215|ppo_ep: 1|act_loss: -0.00435638427734375|cri_loss: -0.0017786026000976562|unsuper_loss: 0.0
+average reward score: 4.76171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.43%) |Training time=0.45s (20.89%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7216|ppo_ep: 1|act_loss: -0.0201416015625|cri_loss: -0.00984954833984375|unsuper_loss: 0.0
+average reward score: 4.91796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.40%) |Training time=0.48s (21.79%) |Others=0.11 (4.82%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7217|ppo_ep: 1|act_loss: 0.019775390625|cri_loss: 0.0100860595703125|unsuper_loss: 0.0
+average reward score: 4.90234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.20%) |Training time=0.49s (22.23%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7218|ppo_ep: 1|act_loss: 0.0010919570922851562|cri_loss: 0.0005993843078613281|unsuper_loss: 0.0
+average reward score: 5.8125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.75%) |Training time=0.49s (22.66%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.55
+[2023-04-14 13:12:06,535] [INFO] [logging.py:96:log_dist] [Rank 0] step=7220, skipped=92, lr=[4.5101875119972634e-07, 4.5101875119972634e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:12:06,553] [INFO] [timer.py:199:stop] epoch=0/micro_step=7220/global_step=7220, RunningAvgSamplesPerSec=105.682454829117, CurrSamplesPerSec=98.92933678188963, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:12:06,646] [INFO] [logging.py:96:log_dist] [Rank 0] step=7220, skipped=124, lr=[2.4686241437572036e-07, 2.4686241437572036e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7219|ppo_ep: 1|act_loss: -0.0048065185546875|cri_loss: -0.002262115478515625|unsuper_loss: 0.0
+average reward score: 5.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.97%) |Training time=0.49s (22.40%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7220|ppo_ep: 1|act_loss: 0.0068359375|cri_loss: 0.00359344482421875|unsuper_loss: 0.0
+average reward score: 5.58984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.69%) |Training time=0.49s (22.68%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7221|ppo_ep: 1|act_loss: -0.020599365234375|cri_loss: -0.0101776123046875|unsuper_loss: 0.0
+average reward score: 5.59765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.79%) |Training time=0.49s (22.58%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7222|ppo_ep: 1|act_loss: 0.01129150390625|cri_loss: 0.00586700439453125|unsuper_loss: 0.0
+average reward score: 4.8984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.69%) |Training time=0.49s (22.70%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7223|ppo_ep: 1|act_loss: -0.0013971328735351562|cri_loss: -0.0006508827209472656|unsuper_loss: 0.0
+average reward score: 5.69921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.84%) |Training time=0.49s (22.56%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7224|ppo_ep: 1|act_loss: -0.02392578125|cri_loss: -0.01168060302734375|unsuper_loss: 0.0
+average reward score: 5.28125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.81%) |Training time=0.49s (22.58%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7225|ppo_ep: 1|act_loss: 0.01824951171875|cri_loss: 0.0111083984375|unsuper_loss: 0.0
+average reward score: 5.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.67%) |Training time=0.49s (22.73%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7226|ppo_ep: 1|act_loss: -0.014801025390625|cri_loss: -0.007198333740234375|unsuper_loss: 0.0
+average reward score: 5.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.80%) |Training time=0.49s (22.57%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7227|ppo_ep: 1|act_loss: 0.03509521484375|cri_loss: 0.018218994140625|unsuper_loss: 0.0
+average reward score: 6.125
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.06%) |Training time=0.49s (22.03%) |Others=0.15 (6.91%)|CurSamplesPerSec=14.36 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7228|ppo_ep: 1|act_loss: 0.00925445556640625|cri_loss: 0.004772186279296875|unsuper_loss: 0.0
+average reward score: 5.328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.62s (72.92%) |Training time=0.50s (22.49%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.44 |AvgSamplesPerSec=14.55
+[2023-04-14 13:12:28,345] [INFO] [logging.py:96:log_dist] [Rank 0] step=7230, skipped=92, lr=[4.4320918629398245e-07, 4.4320918629398245e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:12:28,364] [INFO] [timer.py:199:stop] epoch=0/micro_step=7230/global_step=7230, RunningAvgSamplesPerSec=105.67111864118128, CurrSamplesPerSec=109.30850571717106, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:12:28,456] [INFO] [logging.py:96:log_dist] [Rank 0] step=7230, skipped=124, lr=[2.427087262091782e-07, 2.427087262091782e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7229|ppo_ep: 1|act_loss: 0.0153045654296875|cri_loss: 0.008270263671875|unsuper_loss: 0.0
+average reward score: 5.546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.46s (21.19%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7230|ppo_ep: 1|act_loss: -0.0004405975341796875|cri_loss: -7.367134094238281e-05|unsuper_loss: 0.0
+average reward score: 4.96484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.26%) |Training time=0.54s (24.25%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.34 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7231|ppo_ep: 1|act_loss: -0.0009431838989257812|cri_loss: -0.00023365020751953125|unsuper_loss: 0.0
+average reward score: 5.45703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.12%) |Training time=0.49s (22.26%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7232|ppo_ep: 1|act_loss: 0.016845703125|cri_loss: 0.0087432861328125|unsuper_loss: 0.0
+average reward score: 4.96484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.99%) |Training time=0.49s (22.35%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7233|ppo_ep: 1|act_loss: 0.03729248046875|cri_loss: 0.019073486328125|unsuper_loss: 0.0
+average reward score: 5.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.83%) |Training time=0.49s (22.58%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7234|ppo_ep: 1|act_loss: 0.01044464111328125|cri_loss: 0.005260467529296875|unsuper_loss: 0.0
+average reward score: 5.5234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.05%) |Training time=0.49s (22.35%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7235|ppo_ep: 1|act_loss: -0.01177978515625|cri_loss: -0.00554656982421875|unsuper_loss: 0.0
+average reward score: 5.5703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.01%) |Training time=0.49s (22.40%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7236|ppo_ep: 1|act_loss: 0.01081085205078125|cri_loss: 0.005710601806640625|unsuper_loss: 0.0
+average reward score: 5.66796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.12%) |Training time=0.49s (22.27%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7237|ppo_ep: 1|act_loss: 0.0028858184814453125|cri_loss: 0.0016603469848632812|unsuper_loss: 0.0
+average reward score: 5.41796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.94%) |Training time=0.49s (22.48%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7238|ppo_ep: 1|act_loss: -0.0245361328125|cri_loss: -0.011871337890625|unsuper_loss: 0.0
+average reward score: 4.9375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.90%) |Training time=0.49s (22.52%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.55
+[2023-04-14 13:12:50,207] [INFO] [logging.py:96:log_dist] [Rank 0] step=7240, skipped=92, lr=[4.354645700985926e-07, 4.354645700985926e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:12:50,225] [INFO] [timer.py:199:stop] epoch=0/micro_step=7240/global_step=7240, RunningAvgSamplesPerSec=105.65752762833793, CurrSamplesPerSec=98.45495143921833, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:12:50,318] [INFO] [logging.py:96:log_dist] [Rank 0] step=7240, skipped=124, lr=[2.385884965447316e-07, 2.385884965447316e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7239|ppo_ep: 1|act_loss: -0.0081329345703125|cri_loss: -0.003597259521484375|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.95%) |Training time=0.49s (22.46%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7240|ppo_ep: 1|act_loss: -0.013763427734375|cri_loss: -0.0066986083984375|unsuper_loss: 0.0
+average reward score: 5.3125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.73%) |Training time=0.50s (22.68%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7241|ppo_ep: 1|act_loss: -0.0121917724609375|cri_loss: -0.00574493408203125|unsuper_loss: 0.0
+average reward score: 5.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.02%) |Training time=0.49s (22.38%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7242|ppo_ep: 1|act_loss: -0.025238037109375|cri_loss: -0.0124053955078125|unsuper_loss: 0.0
+average reward score: 5.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.59%) |Training time=0.50s (22.81%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7243|ppo_ep: 1|act_loss: 0.00975799560546875|cri_loss: 0.00499725341796875|unsuper_loss: 0.0
+average reward score: 5.484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.77s (74.78%) |Training time=0.49s (20.94%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.54 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7244|ppo_ep: 1|act_loss: -0.0159912109375|cri_loss: -0.007350921630859375|unsuper_loss: 0.0
+average reward score: 5.609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.83%) |Training time=0.49s (22.56%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7245|ppo_ep: 1|act_loss: -0.001018524169921875|cri_loss: -0.00044727325439453125|unsuper_loss: 0.0
+average reward score: 5.296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.48s (22.30%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7246|ppo_ep: 1|act_loss: 0.001873016357421875|cri_loss: 0.0011262893676757812|unsuper_loss: 0.0
+average reward score: 5.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.24%) |Training time=0.49s (22.23%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7247|ppo_ep: 1|act_loss: -0.004680633544921875|cri_loss: -0.0019683837890625|unsuper_loss: 0.0
+average reward score: 5.6328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.17%) |Training time=0.49s (21.44%) |Others=0.10 (4.39%)|CurSamplesPerSec=14.06 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7248|ppo_ep: 1|act_loss: 0.002910614013671875|cri_loss: 0.0017080307006835938|unsuper_loss: 0.0
+average reward score: 5.36328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.75%) |Training time=0.49s (22.66%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.55
+[2023-04-14 13:13:12,316] [INFO] [logging.py:96:log_dist] [Rank 0] step=7250, skipped=92, lr=[4.2778501740745257e-07, 4.2778501740745257e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:13:12,335] [INFO] [timer.py:199:stop] epoch=0/micro_step=7250/global_step=7250, RunningAvgSamplesPerSec=105.6449480402019, CurrSamplesPerSec=97.82626105957648, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:13:12,427] [INFO] [logging.py:96:log_dist] [Rank 0] step=7250, skipped=124, lr=[2.3450178645412518e-07, 2.3450178645412518e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7249|ppo_ep: 1|act_loss: 0.002838134765625|cri_loss: 0.00189971923828125|unsuper_loss: 0.0
+average reward score: 5.125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.08%) |Training time=0.49s (22.39%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7250|ppo_ep: 1|act_loss: -0.0059967041015625|cri_loss: -0.0029048919677734375|unsuper_loss: 0.0
+average reward score: 5.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.90%) |Training time=0.49s (22.46%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7251|ppo_ep: 1|act_loss: -0.0037975311279296875|cri_loss: -0.001712799072265625|unsuper_loss: 0.0
+average reward score: 5.0078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.96%) |Training time=0.49s (22.45%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7252|ppo_ep: 1|act_loss: -0.0023326873779296875|cri_loss: -0.0010633468627929688|unsuper_loss: 0.0
+average reward score: 5.73046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.61%) |Training time=0.50s (22.81%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7253|ppo_ep: 1|act_loss: 0.0113525390625|cri_loss: 0.00598907470703125|unsuper_loss: 0.0
+average reward score: 5.07421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.90%) |Training time=0.49s (22.49%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7254|ppo_ep: 1|act_loss: -0.0007228851318359375|cri_loss: 3.24249267578125e-05|unsuper_loss: 0.0
+average reward score: 5.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.81%) |Training time=0.49s (22.59%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7255|ppo_ep: 1|act_loss: -0.0020904541015625|cri_loss: -0.0009593963623046875|unsuper_loss: 0.0
+average reward score: 5.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.64%) |Training time=0.50s (22.79%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.55
+[2023-04-14 13:13:27,520] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192
+epoch: 0|step: 7256|ppo_ep: 1|act_loss: 0.0012178421020507812|cri_loss: 0.0009031295776367188|unsuper_loss: 0.0
+average reward score: 5.86328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.91%) |Training time=0.46s (21.43%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7257|ppo_ep: 1|act_loss: -0.0136871337890625|cri_loss: -0.006656646728515625|unsuper_loss: 0.0
+average reward score: 5.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.86%) |Training time=0.49s (22.54%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7258|ppo_ep: 1|act_loss: 0.031036376953125|cri_loss: 0.01690673828125|unsuper_loss: 0.0
+average reward score: 5.671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.19%) |Training time=0.48s (20.51%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.63 |AvgSamplesPerSec=14.55
+[2023-04-14 13:13:34,199] [INFO] [logging.py:96:log_dist] [Rank 0] step=7260, skipped=93, lr=[4.209291433815917e-07, 4.209291433815917e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:13:34,218] [INFO] [timer.py:199:stop] epoch=0/micro_step=7260/global_step=7260, RunningAvgSamplesPerSec=105.63492765670183, CurrSamplesPerSec=99.31827821724299, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:13:34,310] [INFO] [logging.py:96:log_dist] [Rank 0] step=7260, skipped=124, lr=[2.3044865651226017e-07, 2.3044865651226017e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7259|ppo_ep: 1|act_loss: -0.0121307373046875|cri_loss: -0.00601959228515625|unsuper_loss: 0.0
+average reward score: 5.58203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.16%) |Training time=0.48s (22.25%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7260|ppo_ep: 1|act_loss: 0.001026153564453125|cri_loss: 0.0007610321044921875|unsuper_loss: 0.0
+average reward score: 5.36328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.69%) |Training time=0.47s (21.71%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7261|ppo_ep: 1|act_loss: -0.027099609375|cri_loss: -0.013214111328125|unsuper_loss: 0.0
+average reward score: 5.8046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.76%) |Training time=0.47s (21.61%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7262|ppo_ep: 1|act_loss: -0.0026416778564453125|cri_loss: -0.00098419189453125|unsuper_loss: 0.0
+average reward score: 5.08203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.63%) |Training time=0.47s (21.76%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7263|ppo_ep: 1|act_loss: -0.000705718994140625|cri_loss: -0.0001647472381591797|unsuper_loss: 0.0
+average reward score: 6.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.30%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7264|ppo_ep: 1|act_loss: 0.00021183490753173828|cri_loss: 0.0003452301025390625|unsuper_loss: 0.0
+average reward score: 5.1875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (21.97%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7265|ppo_ep: 1|act_loss: 0.020904541015625|cri_loss: 0.01153564453125|unsuper_loss: 0.0
+average reward score: 5.17578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.70%) |Training time=0.47s (21.65%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7266|ppo_ep: 1|act_loss: 0.02044677734375|cri_loss: 0.01055145263671875|unsuper_loss: 0.0
+average reward score: 4.921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.75%) |Training time=0.47s (21.63%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7267|ppo_ep: 1|act_loss: -0.02435302734375|cri_loss: -0.011871337890625|unsuper_loss: 0.0
+average reward score: 5.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.86%) |Training time=0.47s (21.46%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7268|ppo_ep: 1|act_loss: -0.0102081298828125|cri_loss: -0.004817962646484375|unsuper_loss: 0.0
+average reward score: 5.4296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.74%) |Training time=0.47s (21.65%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.55
+[2023-04-14 13:13:55,869] [INFO] [logging.py:96:log_dist] [Rank 0] step=7270, skipped=93, lr=[4.133735241503523e-07, 4.133735241503523e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:13:55,887] [INFO] [timer.py:199:stop] epoch=0/micro_step=7270/global_step=7270, RunningAvgSamplesPerSec=105.63331027433297, CurrSamplesPerSec=105.13135218502553, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:13:55,980] [INFO] [logging.py:96:log_dist] [Rank 0] step=7270, skipped=124, lr=[2.2642916679630006e-07, 2.2642916679630006e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7269|ppo_ep: 1|act_loss: 0.02801513671875|cri_loss: 0.01450347900390625|unsuper_loss: 0.0
+average reward score: 5.1640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.57%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
+[2023-04-14 13:13:58,022] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096
+epoch: 0|step: 7270|ppo_ep: 1|act_loss: 0.0019197463989257812|cri_loss: 0.0013561248779296875|unsuper_loss: 0.0
+average reward score: 5.03125
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.71%) |Training time=0.44s (20.64%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7271|ppo_ep: 1|act_loss: 0.00724029541015625|cri_loss: 0.0037212371826171875|unsuper_loss: 0.0
+average reward score: 5.23828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.80%) |Training time=0.47s (21.58%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7272|ppo_ep: 1|act_loss: -0.00864410400390625|cri_loss: -0.004055023193359375|unsuper_loss: 0.0
+average reward score: 5.3125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.91%) |Training time=0.46s (21.45%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7273|ppo_ep: 1|act_loss: -0.0122833251953125|cri_loss: -0.00557708740234375|unsuper_loss: 0.0
+average reward score: 5.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.70%) |Training time=0.47s (19.98%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7274|ppo_ep: 1|act_loss: -0.02825927734375|cri_loss: -0.013885498046875|unsuper_loss: 0.0
+average reward score: 5.828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.78%) |Training time=0.47s (21.63%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7275|ppo_ep: 1|act_loss: -0.00447845458984375|cri_loss: -0.0021266937255859375|unsuper_loss: 0.0
+average reward score: 5.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.87%) |Training time=0.47s (21.52%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7276|ppo_ep: 1|act_loss: 0.0157470703125|cri_loss: 0.00909423828125|unsuper_loss: 0.0
+average reward score: 5.17578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.23%) |Training time=0.47s (20.37%) |Others=0.10 (4.40%)|CurSamplesPerSec=14.00 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7277|ppo_ep: 1|act_loss: 0.117919921875|cri_loss: 0.06317138671875|unsuper_loss: 0.0
+average reward score: 5.16015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.51%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7278|ppo_ep: 1|act_loss: 0.01934814453125|cri_loss: 0.00983428955078125|unsuper_loss: 0.0
+average reward score: 5.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.69%) |Training time=0.47s (21.71%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
+[2023-04-14 13:14:17,796] [INFO] [logging.py:96:log_dist] [Rank 0] step=7280, skipped=94, lr=[4.066293729300195e-07, 4.066293729300195e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:14:17,815] [INFO] [timer.py:199:stop] epoch=0/micro_step=7280/global_step=7280, RunningAvgSamplesPerSec=105.63432861140478, CurrSamplesPerSec=106.89784560000638, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:14:17,907] [INFO] [logging.py:96:log_dist] [Rank 0] step=7280, skipped=124, lr=[2.224433768847789e-07, 2.224433768847789e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7279|ppo_ep: 1|act_loss: 0.02032470703125|cri_loss: 0.01061248779296875|unsuper_loss: 0.0
+average reward score: 5.44140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.35%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7280|ppo_ep: 1|act_loss: 0.0042724609375|cri_loss: 0.00223541259765625|unsuper_loss: 0.0
+average reward score: 4.66015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.79%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7281|ppo_ep: 1|act_loss: -0.0147247314453125|cri_loss: -0.00708770751953125|unsuper_loss: 0.0
+average reward score: 5.74609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.85%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7282|ppo_ep: 1|act_loss: -0.009124755859375|cri_loss: -0.00443267822265625|unsuper_loss: 0.0
+average reward score: 4.9921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.76%) |Training time=0.47s (21.61%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7283|ppo_ep: 1|act_loss: 0.004364013671875|cri_loss: 0.0024471282958984375|unsuper_loss: 0.0
+average reward score: 4.83984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.73%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7284|ppo_ep: 1|act_loss: 0.001445770263671875|cri_loss: 0.00115203857421875|unsuper_loss: 0.0
+average reward score: 4.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (21.96%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7285|ppo_ep: 1|act_loss: 0.00766754150390625|cri_loss: 0.004055023193359375|unsuper_loss: 0.0
+average reward score: 5.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.93%) |Training time=0.46s (21.45%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7286|ppo_ep: 1|act_loss: -0.00945281982421875|cri_loss: -0.004619598388671875|unsuper_loss: 0.0
+average reward score: 4.65234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.86%) |Training time=0.47s (21.51%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7287|ppo_ep: 1|act_loss: -0.0136871337890625|cri_loss: -0.006744384765625|unsuper_loss: 0.0
+average reward score: 4.6875
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.60s (70.62%) |Training time=0.48s (21.03%) |Others=0.19 (8.35%)|CurSamplesPerSec=14.16 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7288|ppo_ep: 1|act_loss: -0.0018205642700195312|cri_loss: -0.0007228851318359375|unsuper_loss: 0.0
+average reward score: 5.4921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.86%) |Training time=0.46s (21.35%) |Others=0.10 (4.78%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.55
+[2023-04-14 13:14:39,554] [INFO] [logging.py:96:log_dist] [Rank 0] step=7290, skipped=94, lr=[3.9919809145031695e-07, 3.9919809145031695e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:14:39,573] [INFO] [timer.py:199:stop] epoch=0/micro_step=7290/global_step=7290, RunningAvgSamplesPerSec=105.63344822690242, CurrSamplesPerSec=110.83196229238267, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:14:39,665] [INFO] [logging.py:96:log_dist] [Rank 0] step=7290, skipped=124, lr=[2.1849134585671922e-07, 2.1849134585671922e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7289|ppo_ep: 1|act_loss: -0.03265380859375|cri_loss: -0.01580810546875|unsuper_loss: 0.0
+average reward score: 5.48046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.50%) |Training time=0.45s (20.88%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7290|ppo_ep: 1|act_loss: -0.0302276611328125|cri_loss: -0.014495849609375|unsuper_loss: 0.0
+average reward score: 4.96484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.07%) |Training time=0.46s (21.32%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7291|ppo_ep: 1|act_loss: 0.008209228515625|cri_loss: 0.004390716552734375|unsuper_loss: 0.0
+average reward score: 5.71484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.82%) |Training time=0.47s (21.54%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7292|ppo_ep: 1|act_loss: 0.00888824462890625|cri_loss: 0.00460052490234375|unsuper_loss: 0.0
+average reward score: 4.99609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.08%) |Training time=0.46s (21.28%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7293|ppo_ep: 1|act_loss: 0.02685546875|cri_loss: 0.01367950439453125|unsuper_loss: 0.0
+average reward score: 5.1015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.86%) |Training time=0.47s (21.52%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7294|ppo_ep: 1|act_loss: 0.0189208984375|cri_loss: 0.00954437255859375|unsuper_loss: 0.0
+average reward score: 5.55078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.86%) |Training time=0.47s (21.52%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7295|ppo_ep: 1|act_loss: 0.01593017578125|cri_loss: 0.00836181640625|unsuper_loss: 0.0
+average reward score: 5.109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.22%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7296|ppo_ep: 1|act_loss: 0.02227783203125|cri_loss: 0.0115814208984375|unsuper_loss: 0.0
+average reward score: 5.1171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.69%) |Training time=0.47s (21.71%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7297|ppo_ep: 1|act_loss: -0.00872802734375|cri_loss: -0.003902435302734375|unsuper_loss: 0.0
+average reward score: 5.80078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.39%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7298|ppo_ep: 1|act_loss: 0.0307159423828125|cri_loss: 0.015655517578125|unsuper_loss: 0.0
+average reward score: 4.703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.97%) |Training time=0.49s (22.49%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
+[2023-04-14 13:15:01,191] [INFO] [logging.py:96:log_dist] [Rank 0] step=7300, skipped=94, lr=[3.918324110315662e-07, 3.918324110315662e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:15:01,209] [INFO] [timer.py:199:stop] epoch=0/micro_step=7300/global_step=7300, RunningAvgSamplesPerSec=105.63232889516885, CurrSamplesPerSec=100.18558607917797, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:15:01,302] [INFO] [logging.py:96:log_dist] [Rank 0] step=7300, skipped=124, lr=[2.1457313229075322e-07, 2.1457313229075322e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7299|ppo_ep: 1|act_loss: -0.0007047653198242188|cri_loss: 0.00031280517578125|unsuper_loss: 0.0
+average reward score: 6.32421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.31%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7300|ppo_ep: 1|act_loss: 0.00514984130859375|cri_loss: 0.00290679931640625|unsuper_loss: 0.0
+average reward score: 4.609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.91%) |Training time=0.49s (22.47%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7301|ppo_ep: 1|act_loss: 0.013763427734375|cri_loss: 0.007228851318359375|unsuper_loss: 0.0
+average reward score: 5.28515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.49s (22.46%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7302|ppo_ep: 1|act_loss: 0.002460479736328125|cri_loss: 0.0013637542724609375|unsuper_loss: 0.0
+average reward score: 5.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.61s (70.38%) |Training time=0.57s (24.93%) |Others=0.11 (4.69%)|CurSamplesPerSec=14.01 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7303|ppo_ep: 1|act_loss: -0.0006494522094726562|cri_loss: -0.0001862049102783203|unsuper_loss: 0.0
+average reward score: 5.1171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.90%) |Training time=0.49s (22.58%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7304|ppo_ep: 1|act_loss: -0.025909423828125|cri_loss: -0.01270294189453125|unsuper_loss: 0.0
+average reward score: 5.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.96%) |Training time=0.47s (21.45%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7305|ppo_ep: 1|act_loss: -0.0158233642578125|cri_loss: -0.00772857666015625|unsuper_loss: 0.0
+average reward score: 6.1640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.72%) |Training time=0.48s (21.80%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7306|ppo_ep: 1|act_loss: 0.0014791488647460938|cri_loss: 0.0011234283447265625|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.68s (73.96%) |Training time=0.49s (21.66%) |Others=0.10 (4.39%)|CurSamplesPerSec=14.10 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7307|ppo_ep: 1|act_loss: 0.01947021484375|cri_loss: 0.01013946533203125|unsuper_loss: 0.0
+average reward score: 5.53515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.49s (22.42%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7308|ppo_ep: 1|act_loss: 0.00453948974609375|cri_loss: 0.002384185791015625|unsuper_loss: 0.0
+average reward score: 5.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.78%) |Training time=0.49s (22.61%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.55
+[2023-04-14 13:15:23,160] [INFO] [logging.py:96:log_dist] [Rank 0] step=7310, skipped=94, lr=[3.8453244085091747e-07, 3.8453244085091747e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:15:23,179] [INFO] [timer.py:199:stop] epoch=0/micro_step=7310/global_step=7310, RunningAvgSamplesPerSec=105.62016882536805, CurrSamplesPerSec=98.96179635438095, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:15:23,271] [INFO] [logging.py:96:log_dist] [Rank 0] step=7310, skipped=124, lr=[2.106887942642588e-07, 2.106887942642588e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7309|ppo_ep: 1|act_loss: 0.0206451416015625|cri_loss: 0.01088714599609375|unsuper_loss: 0.0
+average reward score: 5.9765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.27%) |Training time=0.49s (22.26%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7310|ppo_ep: 1|act_loss: -0.018463134765625|cri_loss: -0.0088043212890625|unsuper_loss: 0.0
+average reward score: 4.9921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.86%) |Training time=0.49s (22.54%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
+[2023-04-14 13:15:27,598] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 7311|ppo_ep: 1|act_loss: 0.0302734375|cri_loss: 0.0158538818359375|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.29%) |Training time=0.49s (22.66%) |Others=0.09 (4.05%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.55
+[2023-04-14 13:15:29,755] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 7312|ppo_ep: 1|act_loss: -0.0013437271118164062|cri_loss: -0.0005731582641601562|unsuper_loss: 0.0
+average reward score: 5.6796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.17%) |Training time=0.49s (22.71%) |Others=0.09 (4.12%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7313|ppo_ep: 1|act_loss: -0.0218505859375|cri_loss: -0.01042938232421875|unsuper_loss: 0.0
+average reward score: 5.234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.81%) |Training time=0.49s (22.67%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7314|ppo_ep: 1|act_loss: 0.013671875|cri_loss: 0.007049560546875|unsuper_loss: 0.0
+average reward score: 5.5234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.95%) |Training time=0.49s (22.42%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7315|ppo_ep: 1|act_loss: -0.041412353515625|cri_loss: -0.0198974609375|unsuper_loss: 0.0
+average reward score: 5.6953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.49s (22.47%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7316|ppo_ep: 1|act_loss: -0.01434326171875|cri_loss: -0.00691986083984375|unsuper_loss: 0.0
+average reward score: 5.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.91%) |Training time=0.49s (22.48%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7317|ppo_ep: 1|act_loss: 0.0110321044921875|cri_loss: 0.0056610107421875|unsuper_loss: 0.0
+average reward score: 5.3828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.17%) |Training time=0.48s (20.63%) |Others=0.10 (4.19%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7318|ppo_ep: 1|act_loss: -0.0601806640625|cri_loss: -0.00604248046875|unsuper_loss: 0.0
+average reward score: 5.953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.23%) |Training time=0.48s (22.16%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55
+[2023-04-14 13:15:45,007] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 4096, reducing to 2048
+[2023-04-14 13:15:45,008] [INFO] [logging.py:96:log_dist] [Rank 0] step=7320, skipped=95, lr=[3.78018739391981e-07, 3.78018739391981e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:15:45,008] [INFO] [timer.py:199:stop] epoch=0/micro_step=7320/global_step=7320, RunningAvgSamplesPerSec=105.61084278644113, CurrSamplesPerSec=106.58968233799237, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:15:45,100] [INFO] [logging.py:96:log_dist] [Rank 0] step=7320, skipped=126, lr=[2.0760575293895051e-07, 2.0760575293895051e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7319|ppo_ep: 1|act_loss: 0.024017333984375|cri_loss: 0.01222991943359375|unsuper_loss: 0.0
+average reward score: 5.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.44%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7320|ppo_ep: 1|act_loss: -0.010589599609375|cri_loss: -0.005062103271484375|unsuper_loss: 0.0
+average reward score: 5.375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (22.01%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7321|ppo_ep: 1|act_loss: 0.006988525390625|cri_loss: 0.00362396240234375|unsuper_loss: 0.0
+average reward score: 5.20703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.85%) |Training time=0.49s (22.57%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7322|ppo_ep: 1|act_loss: -6.532669067382812e-05|cri_loss: 0.00026035308837890625|unsuper_loss: 0.0
+average reward score: 5.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.49s (22.38%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7323|ppo_ep: 1|act_loss: 0.02740478515625|cri_loss: 0.01401519775390625|unsuper_loss: 0.0
+average reward score: 5.4921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.98%) |Training time=0.49s (22.51%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7324|ppo_ep: 1|act_loss: 0.03741455078125|cri_loss: 0.019012451171875|unsuper_loss: 0.0
+average reward score: 5.0
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.81%) |Training time=0.49s (22.43%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7325|ppo_ep: 1|act_loss: 0.029541015625|cri_loss: 0.0160675048828125|unsuper_loss: 0.0
+average reward score: 5.4296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.36%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7326|ppo_ep: 1|act_loss: -0.01361083984375|cri_loss: -0.00656890869140625|unsuper_loss: 0.0
+average reward score: 5.18359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.12%) |Training time=0.48s (22.25%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7327|ppo_ep: 1|act_loss: 0.006519317626953125|cri_loss: 0.003330230712890625|unsuper_loss: 0.0
+average reward score: 6.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.97%) |Training time=0.49s (22.50%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7328|ppo_ep: 1|act_loss: 0.0673828125|cri_loss: 0.03973388671875|unsuper_loss: 0.0
+average reward score: 5.75390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.66%) |Training time=0.49s (22.73%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.55
+[2023-04-14 13:16:06,691] [INFO] [logging.py:96:log_dist] [Rank 0] step=7330, skipped=95, lr=[3.708439159571659e-07, 3.708439159571659e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:16:06,709] [INFO] [timer.py:199:stop] epoch=0/micro_step=7330/global_step=7330, RunningAvgSamplesPerSec=105.60095077536732, CurrSamplesPerSec=99.02173837662687, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:16:06,802] [INFO] [logging.py:96:log_dist] [Rank 0] step=7330, skipped=126, lr=[2.0378253563519247e-07, 2.0378253563519247e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7329|ppo_ep: 1|act_loss: -0.0149078369140625|cri_loss: -0.00720977783203125|unsuper_loss: 0.0
+average reward score: 5.48046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.49s (22.43%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7330|ppo_ep: 1|act_loss: 0.022674560546875|cri_loss: 0.01198577880859375|unsuper_loss: 0.0
+average reward score: 5.4921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.63%) |Training time=0.49s (22.77%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7331|ppo_ep: 1|act_loss: -0.00815582275390625|cri_loss: -0.00391387939453125|unsuper_loss: 0.0
+average reward score: 5.17578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.94%) |Training time=0.49s (22.54%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7332|ppo_ep: 1|act_loss: -0.05584716796875|cri_loss: -0.021484375|unsuper_loss: 0.0
+average reward score: 5.11328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.61s (70.82%) |Training time=0.48s (21.26%) |Others=0.18 (7.93%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7333|ppo_ep: 1|act_loss: -0.01401519775390625|cri_loss: -0.006877899169921875|unsuper_loss: 0.0
+average reward score: 5.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.48%) |Training time=0.48s (21.45%) |Others=0.11 (5.07%)|CurSamplesPerSec=14.37 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7334|ppo_ep: 1|act_loss: 0.008270263671875|cri_loss: 0.004230499267578125|unsuper_loss: 0.0
+average reward score: 5.16015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.10%) |Training time=0.46s (20.64%) |Others=0.12 (5.26%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7335|ppo_ep: 1|act_loss: -0.0372314453125|cri_loss: -0.0173187255859375|unsuper_loss: 0.0
+average reward score: 5.53515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.10%) |Training time=0.47s (20.60%) |Others=0.10 (4.31%)|CurSamplesPerSec=14.02 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7336|ppo_ep: 1|act_loss: 0.006072998046875|cri_loss: 0.003406524658203125|unsuper_loss: 0.0
+average reward score: 5.8984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.71%) |Training time=0.47s (21.68%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7337|ppo_ep: 1|act_loss: -0.01271820068359375|cri_loss: -0.00617218017578125|unsuper_loss: 0.0
+average reward score: 4.96484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.83%) |Training time=0.47s (21.64%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7338|ppo_ep: 1|act_loss: 0.031219482421875|cri_loss: 0.0158233642578125|unsuper_loss: 0.0
+average reward score: 5.08984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.67%) |Training time=0.47s (21.71%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
+[2023-04-14 13:16:28,680] [INFO] [logging.py:96:log_dist] [Rank 0] step=7340, skipped=95, lr=[3.6373511386058315e-07, 3.6373511386058315e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:16:28,698] [INFO] [timer.py:199:stop] epoch=0/micro_step=7340/global_step=7340, RunningAvgSamplesPerSec=105.59720320971675, CurrSamplesPerSec=105.43584973840909, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:16:28,791] [INFO] [logging.py:96:log_dist] [Rank 0] step=7340, skipped=126, lr=[1.9999335381357953e-07, 1.9999335381357953e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7339|ppo_ep: 1|act_loss: -0.0055084228515625|cri_loss: -0.002353668212890625|unsuper_loss: 0.0
+average reward score: 5.65625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.97%) |Training time=0.47s (21.52%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7340|ppo_ep: 1|act_loss: -0.002819061279296875|cri_loss: -0.0006628036499023438|unsuper_loss: 0.0
+average reward score: 5.1796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.60%) |Training time=0.47s (21.81%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7341|ppo_ep: 1|act_loss: -0.01428985595703125|cri_loss: -0.006931304931640625|unsuper_loss: 0.0
+average reward score: 5.375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.85%) |Training time=0.47s (21.62%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7342|ppo_ep: 1|act_loss: 0.03863525390625|cri_loss: 0.0203704833984375|unsuper_loss: 0.0
+average reward score: 5.71484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.76%) |Training time=0.47s (21.64%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7343|ppo_ep: 1|act_loss: -0.00783538818359375|cri_loss: -0.003650665283203125|unsuper_loss: 0.0
+average reward score: 5.40625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.59%) |Training time=0.48s (21.90%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7344|ppo_ep: 1|act_loss: 0.00574493408203125|cri_loss: 0.0035114288330078125|unsuper_loss: 0.0
+average reward score: 5.34375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.85%) |Training time=0.47s (21.54%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7345|ppo_ep: 1|act_loss: -0.00555419921875|cri_loss: 0.001644134521484375|unsuper_loss: 0.0
+average reward score: 5.43359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.14%) |Training time=0.46s (21.34%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7346|ppo_ep: 1|act_loss: -0.0234527587890625|cri_loss: -0.011444091796875|unsuper_loss: 0.0
+average reward score: 5.39453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.35%) |Training time=0.45s (21.01%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7347|ppo_ep: 1|act_loss: 0.0014638900756835938|cri_loss: 0.0008153915405273438|unsuper_loss: 0.0
+average reward score: 6.046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.93%) |Training time=0.47s (21.52%) |Others=0.12 (5.55%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7348|ppo_ep: 1|act_loss: -0.0240478515625|cri_loss: -0.01140594482421875|unsuper_loss: 0.0
+average reward score: 5.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.18%) |Training time=0.46s (20.09%) |Others=0.11 (4.72%)|CurSamplesPerSec=14.02 |AvgSamplesPerSec=14.55
+[2023-04-14 13:16:50,529] [INFO] [logging.py:96:log_dist] [Rank 0] step=7350, skipped=95, lr=[3.5669243847182864e-07, 3.5669243847182864e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:16:50,547] [INFO] [timer.py:199:stop] epoch=0/micro_step=7350/global_step=7350, RunningAvgSamplesPerSec=105.59639078906564, CurrSamplesPerSec=104.471301040761, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:16:50,640] [INFO] [logging.py:96:log_dist] [Rank 0] step=7350, skipped=126, lr=[1.962382636389268e-07, 1.962382636389268e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7349|ppo_ep: 1|act_loss: -0.00226593017578125|cri_loss: -0.0006923675537109375|unsuper_loss: 0.0
+average reward score: 5.5703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.95%) |Training time=0.47s (21.54%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7350|ppo_ep: 1|act_loss: -0.0267333984375|cri_loss: -0.01296234130859375|unsuper_loss: 0.0
+average reward score: 5.69140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.38%) |Training time=0.48s (22.07%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7351|ppo_ep: 1|act_loss: -0.008514404296875|cri_loss: -0.0030345916748046875|unsuper_loss: 0.0
+average reward score: 5.5703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.89%) |Training time=0.47s (21.58%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7352|ppo_ep: 1|act_loss: -0.0154571533203125|cri_loss: -0.007434844970703125|unsuper_loss: 0.0
+average reward score: 5.89453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.02%) |Training time=0.46s (21.35%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7353|ppo_ep: 1|act_loss: -0.00461578369140625|cri_loss: -0.002185821533203125|unsuper_loss: 0.0
+average reward score: 5.3359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.62%) |Training time=0.48s (21.86%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7354|ppo_ep: 1|act_loss: -0.0004100799560546875|cri_loss: 8.821487426757812e-06|unsuper_loss: 0.0
+average reward score: 5.7578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.74%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7355|ppo_ep: 1|act_loss: 0.015960693359375|cri_loss: 0.00814056396484375|unsuper_loss: 0.0
+average reward score: 6.1015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.31%) |Training time=0.46s (21.15%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7356|ppo_ep: 1|act_loss: 0.0009984970092773438|cri_loss: 0.0006694793701171875|unsuper_loss: 0.0
+average reward score: 5.80859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.09%) |Training time=0.48s (21.66%) |Others=0.14 (6.25%)|CurSamplesPerSec=14.45 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7357|ppo_ep: 1|act_loss: 0.05804443359375|cri_loss: 0.0310516357421875|unsuper_loss: 0.0
+average reward score: 4.7578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.80%) |Training time=0.47s (21.66%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7358|ppo_ep: 1|act_loss: 0.003452301025390625|cri_loss: 0.0018177032470703125|unsuper_loss: 0.0
+average reward score: 5.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.06%) |Training time=0.48s (22.31%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.55
+[2023-04-14 13:17:12,274] [INFO] [logging.py:96:log_dist] [Rank 0] step=7360, skipped=95, lr=[3.4971599418034037e-07, 3.4971599418034037e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:17:12,293] [INFO] [timer.py:199:stop] epoch=0/micro_step=7360/global_step=7360, RunningAvgSamplesPerSec=105.59306648686497, CurrSamplesPerSec=97.36653804664417, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:17:12,386] [INFO] [logging.py:96:log_dist] [Rank 0] step=7360, skipped=126, lr=[1.9251732077072925e-07, 1.9251732077072925e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7359|ppo_ep: 1|act_loss: 0.00797271728515625|cri_loss: 0.00403594970703125|unsuper_loss: 0.0
+average reward score: 5.0078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.98%) |Training time=0.49s (22.55%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7360|ppo_ep: 1|act_loss: 0.01273345947265625|cri_loss: 0.0066986083984375|unsuper_loss: 0.0
+average reward score: 5.984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.30%) |Training time=0.47s (22.04%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7361|ppo_ep: 1|act_loss: -0.01342010498046875|cri_loss: -0.00612640380859375|unsuper_loss: 0.0
+average reward score: 5.4453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.55%) |Training time=0.47s (21.80%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7362|ppo_ep: 1|act_loss: 0.014251708984375|cri_loss: 0.0075225830078125|unsuper_loss: 0.0
+average reward score: 5.5546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.70%) |Training time=0.49s (21.01%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7363|ppo_ep: 1|act_loss: -7.724761962890625e-05|cri_loss: 0.0008220672607421875|unsuper_loss: 0.0
+average reward score: 4.68359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.92%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7364|ppo_ep: 1|act_loss: -0.0110931396484375|cri_loss: -0.005306243896484375|unsuper_loss: 0.0
+average reward score: 4.98046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.59%) |Training time=0.48s (21.81%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7365|ppo_ep: 1|act_loss: -0.002201080322265625|cri_loss: -0.0009555816650390625|unsuper_loss: 0.0
+average reward score: 5.24609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.67s (74.23%) |Training time=0.47s (20.97%) |Others=0.11 (4.79%)|CurSamplesPerSec=14.20 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7366|ppo_ep: 1|act_loss: -0.0089569091796875|cri_loss: -0.004367828369140625|unsuper_loss: 0.0
+average reward score: 5.59375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.78%) |Training time=0.46s (21.56%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7367|ppo_ep: 1|act_loss: -0.0165863037109375|cri_loss: -0.00789642333984375|unsuper_loss: 0.0
+average reward score: 5.66796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.33%) |Training time=0.48s (22.11%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7368|ppo_ep: 1|act_loss: -0.010040283203125|cri_loss: -0.004924774169921875|unsuper_loss: 0.0
+average reward score: 5.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.06%) |Training time=0.48s (22.31%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.55
+[2023-04-14 13:17:34,126] [INFO] [logging.py:96:log_dist] [Rank 0] step=7370, skipped=95, lr=[3.428058843938532e-07, 3.428058843938532e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:17:34,144] [INFO] [timer.py:199:stop] epoch=0/micro_step=7370/global_step=7370, RunningAvgSamplesPerSec=105.58833846823492, CurrSamplesPerSec=101.61320161196909, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:17:34,237] [INFO] [logging.py:96:log_dist] [Rank 0] step=7370, skipped=126, lr=[1.888305803623372e-07, 1.888305803623372e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7369|ppo_ep: 1|act_loss: -0.023406982421875|cri_loss: -0.0115966796875|unsuper_loss: 0.0
+average reward score: 5.31640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.24%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7370|ppo_ep: 1|act_loss: 0.0177001953125|cri_loss: 0.009124755859375|unsuper_loss: 0.0
+average reward score: 4.90625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.10%) |Training time=0.48s (22.28%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7371|ppo_ep: 1|act_loss: 0.00457763671875|cri_loss: 0.0029449462890625|unsuper_loss: 0.0
+average reward score: 5.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.33%) |Training time=0.47s (22.02%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7372|ppo_ep: 1|act_loss: -0.011962890625|cri_loss: -0.00588226318359375|unsuper_loss: 0.0
+average reward score: 5.16015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.99%) |Training time=0.48s (22.41%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7373|ppo_ep: 1|act_loss: -0.016937255859375|cri_loss: -0.00826263427734375|unsuper_loss: 0.0
+average reward score: 5.61328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.30%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7374|ppo_ep: 1|act_loss: -0.0300140380859375|cri_loss: -0.01454925537109375|unsuper_loss: 0.0
+average reward score: 5.08203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.49%) |Training time=0.47s (21.84%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7375|ppo_ep: 1|act_loss: 0.0208740234375|cri_loss: 0.01078033447265625|unsuper_loss: 0.0
+average reward score: 4.859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.47s (22.02%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7376|ppo_ep: 1|act_loss: 0.0167388916015625|cri_loss: 0.00910186767578125|unsuper_loss: 0.0
+average reward score: 5.23046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.87%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7377|ppo_ep: 1|act_loss: -0.005702972412109375|cri_loss: -0.0024814605712890625|unsuper_loss: 0.0
+average reward score: 5.5
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.47%) |Training time=0.47s (20.22%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.77 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7378|ppo_ep: 1|act_loss: -0.01103973388671875|cri_loss: -0.00537872314453125|unsuper_loss: 0.0
+average reward score: 5.4296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.61%) |Training time=0.47s (21.72%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.55
+[2023-04-14 13:17:55,852] [INFO] [logging.py:96:log_dist] [Rank 0] step=7380, skipped=95, lr=[3.359622115368647e-07, 3.359622115368647e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:17:55,870] [INFO] [timer.py:199:stop] epoch=0/micro_step=7380/global_step=7380, RunningAvgSamplesPerSec=105.58390759317957, CurrSamplesPerSec=101.95661566977104, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:17:55,967] [INFO] [logging.py:96:log_dist] [Rank 0] step=7380, skipped=126, lr=[1.851780970601366e-07, 1.851780970601366e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7379|ppo_ep: 1|act_loss: 0.0010738372802734375|cri_loss: 0.0008387565612792969|unsuper_loss: 0.0
+average reward score: 5.51171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.22%) |Training time=0.48s (22.08%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7380|ppo_ep: 1|act_loss: -0.0142974853515625|cri_loss: -0.006511688232421875|unsuper_loss: 0.0
+average reward score: 5.37890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.11%) |Training time=0.46s (21.22%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7381|ppo_ep: 1|act_loss: 0.0143585205078125|cri_loss: 0.0078277587890625|unsuper_loss: 0.0
+average reward score: 5.484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (20.95%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7382|ppo_ep: 1|act_loss: -0.0185394287109375|cri_loss: -0.0091094970703125|unsuper_loss: 0.0
+average reward score: 5.1796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.07%) |Training time=0.46s (21.28%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7383|ppo_ep: 1|act_loss: 0.018524169921875|cri_loss: 0.011260986328125|unsuper_loss: 0.0
+average reward score: 5.375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.26%) |Training time=0.45s (21.17%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7384|ppo_ep: 1|act_loss: -0.0389404296875|cri_loss: -0.0170745849609375|unsuper_loss: 0.0
+average reward score: 5.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.08%) |Training time=0.46s (21.29%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7385|ppo_ep: 1|act_loss: -0.00312042236328125|cri_loss: -0.0014448165893554688|unsuper_loss: 0.0
+average reward score: 5.80078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.15%) |Training time=0.46s (21.20%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7386|ppo_ep: 1|act_loss: -0.028533935546875|cri_loss: -0.0136566162109375|unsuper_loss: 0.0
+average reward score: 5.24609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.41%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7387|ppo_ep: 1|act_loss: 0.0108184814453125|cri_loss: 0.005504608154296875|unsuper_loss: 0.0
+average reward score: 5.8125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.17%) |Training time=0.46s (21.21%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7388|ppo_ep: 1|act_loss: -0.003536224365234375|cri_loss: -0.001667022705078125|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.02%) |Training time=0.46s (21.33%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55
+[2023-04-14 13:18:17,398] [INFO] [logging.py:96:log_dist] [Rank 0] step=7390, skipped=95, lr=[3.2918507704911455e-07, 3.2918507704911455e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:18:17,416] [INFO] [timer.py:199:stop] epoch=0/micro_step=7390/global_step=7390, RunningAvgSamplesPerSec=105.5878590176888, CurrSamplesPerSec=107.56306313887185, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:18:17,509] [INFO] [logging.py:96:log_dist] [Rank 0] step=7390, skipped=126, lr=[1.8155992500274112e-07, 1.8155992500274112e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7389|ppo_ep: 1|act_loss: 0.023468017578125|cri_loss: 0.01312255859375|unsuper_loss: 0.0
+average reward score: 5.6953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.11%) |Training time=0.46s (21.34%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7390|ppo_ep: 1|act_loss: 0.00725555419921875|cri_loss: 0.00370025634765625|unsuper_loss: 0.0
+average reward score: 5.94921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.17%) |Training time=0.46s (21.18%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7391|ppo_ep: 1|act_loss: -0.00531768798828125|cri_loss: -0.0025043487548828125|unsuper_loss: 0.0
+average reward score: 5.39453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.38%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7392|ppo_ep: 1|act_loss: 0.00354766845703125|cri_loss: 0.00273895263671875|unsuper_loss: 0.0
+average reward score: 5.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.68%) |Training time=0.46s (19.76%) |Others=0.11 (4.56%)|CurSamplesPerSec=13.77 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7393|ppo_ep: 1|act_loss: -0.00673675537109375|cri_loss: -0.0030460357666015625|unsuper_loss: 0.0
+average reward score: 5.19921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.65%) |Training time=0.45s (20.69%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7394|ppo_ep: 1|act_loss: 0.0222015380859375|cri_loss: 0.0114288330078125|unsuper_loss: 0.0
+average reward score: 6.484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.73%) |Training time=0.48s (20.87%) |Others=0.10 (4.40%)|CurSamplesPerSec=14.06 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7395|ppo_ep: 1|act_loss: -0.0157470703125|cri_loss: -0.00746917724609375|unsuper_loss: 0.0
+average reward score: 5.55078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.94%) |Training time=0.46s (21.39%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7396|ppo_ep: 1|act_loss: -0.01318359375|cri_loss: -0.005802154541015625|unsuper_loss: 0.0
+average reward score: 5.64453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.34%) |Training time=0.45s (20.99%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7397|ppo_ep: 1|act_loss: 0.004085540771484375|cri_loss: 0.00223541259765625|unsuper_loss: 0.0
+average reward score: 4.953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.21%) |Training time=0.45s (21.14%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7398|ppo_ep: 1|act_loss: 0.0054931640625|cri_loss: 0.0029010772705078125|unsuper_loss: 0.0
+average reward score: 5.41015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.46s (21.21%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55
+[2023-04-14 13:18:39,217] [INFO] [logging.py:96:log_dist] [Rank 0] step=7400, skipped=95, lr=[3.2247458138408513e-07, 3.2247458138408513e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:18:39,235] [INFO] [timer.py:199:stop] epoch=0/micro_step=7400/global_step=7400, RunningAvgSamplesPerSec=105.59212222294153, CurrSamplesPerSec=110.47226664581524, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:18:39,328] [INFO] [logging.py:96:log_dist] [Rank 0] step=7400, skipped=126, lr=[1.7797611782018942e-07, 1.7797611782018942e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7399|ppo_ep: 1|act_loss: 0.00040078163146972656|cri_loss: 0.0002589225769042969|unsuper_loss: 0.0
+average reward score: 5.84765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.30%) |Training time=0.45s (21.05%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7400|ppo_ep: 1|act_loss: 0.05859375|cri_loss: 0.03192138671875|unsuper_loss: 0.0
+average reward score: 5.3359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.40%) |Training time=0.45s (20.92%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7401|ppo_ep: 1|act_loss: 0.004222869873046875|cri_loss: 0.0022983551025390625|unsuper_loss: 0.0
+average reward score: 4.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.31%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7402|ppo_ep: 1|act_loss: -0.022674560546875|cri_loss: -0.01120758056640625|unsuper_loss: 0.0
+average reward score: 5.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.16%) |Training time=0.45s (21.10%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7403|ppo_ep: 1|act_loss: 0.02545166015625|cri_loss: 0.01396942138671875|unsuper_loss: 0.0
+average reward score: 5.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.46%) |Training time=0.45s (20.98%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7404|ppo_ep: 1|act_loss: 0.02581787109375|cri_loss: 0.01312255859375|unsuper_loss: 0.0
+average reward score: 5.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.09%) |Training time=0.46s (21.27%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7405|ppo_ep: 1|act_loss: 0.0230865478515625|cri_loss: 0.01181793212890625|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.49%) |Training time=0.47s (21.85%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7406|ppo_ep: 1|act_loss: 0.04376220703125|cri_loss: 0.0252227783203125|unsuper_loss: 0.0
+average reward score: 5.625
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.58s (69.98%) |Training time=0.49s (21.57%) |Others=0.19 (8.45%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7407|ppo_ep: 1|act_loss: 0.018585205078125|cri_loss: 0.009429931640625|unsuper_loss: 0.0
+average reward score: 5.03125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.05%) |Training time=0.47s (21.26%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7408|ppo_ep: 1|act_loss: 0.018463134765625|cri_loss: 0.00962066650390625|unsuper_loss: 0.0
+average reward score: 5.6875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.64%) |Training time=0.47s (21.71%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55
+[2023-04-14 13:19:00,873] [INFO] [logging.py:96:log_dist] [Rank 0] step=7410, skipped=95, lr=[3.158308240075113e-07, 3.158308240075113e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:19:00,892] [INFO] [timer.py:199:stop] epoch=0/micro_step=7410/global_step=7410, RunningAvgSamplesPerSec=105.59323097908113, CurrSamplesPerSec=103.80286557061606, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:19:00,985] [INFO] [logging.py:96:log_dist] [Rank 0] step=7410, skipped=126, lr=[1.744267286331497e-07, 1.744267286331497e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7409|ppo_ep: 1|act_loss: 0.0030670166015625|cri_loss: 0.0015916824340820312|unsuper_loss: 0.0
+average reward score: 5.86328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.45%) |Training time=0.47s (21.90%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7410|ppo_ep: 1|act_loss: -0.0164031982421875|cri_loss: -0.008026123046875|unsuper_loss: 0.0
+average reward score: 5.06640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (21.95%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7411|ppo_ep: 1|act_loss: -0.005390167236328125|cri_loss: -0.002544403076171875|unsuper_loss: 0.0
+average reward score: 5.328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.98%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7412|ppo_ep: 1|act_loss: 0.008453369140625|cri_loss: 0.00438690185546875|unsuper_loss: 0.0
+average reward score: 5.4296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.88%) |Training time=0.46s (21.42%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.55
+[2023-04-14 13:19:09,581] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 7413|ppo_ep: 1|act_loss: -0.0165252685546875|cri_loss: -0.00797271728515625|unsuper_loss: 0.0
+average reward score: 5.2109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.54%) |Training time=0.45s (21.23%) |Others=0.09 (4.23%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.55
+[2023-04-14 13:19:11,712] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 7414|ppo_ep: 1|act_loss: -0.00782012939453125|cri_loss: -0.0037174224853515625|unsuper_loss: 0.0
+average reward score: 5.5703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.31%) |Training time=0.46s (21.49%) |Others=0.09 (4.20%)|CurSamplesPerSec=15.02 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7415|ppo_ep: 1|act_loss: -0.019073486328125|cri_loss: -0.009246826171875|unsuper_loss: 0.0
+average reward score: 6.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.44%) |Training time=0.45s (20.91%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7416|ppo_ep: 1|act_loss: -0.026031494140625|cri_loss: -0.01288604736328125|unsuper_loss: 0.0
+average reward score: 5.0234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.88%) |Training time=0.46s (21.45%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7417|ppo_ep: 1|act_loss: -0.0194549560546875|cri_loss: -0.0088043212890625|unsuper_loss: 0.0
+average reward score: 5.34765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.92%) |Training time=0.46s (21.40%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7418|ppo_ep: 1|act_loss: -0.02740478515625|cri_loss: -0.01326751708984375|unsuper_loss: 0.0
+average reward score: 6.375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.78%) |Training time=0.46s (21.56%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55
+[2023-04-14 13:19:22,331] [INFO] [logging.py:96:log_dist] [Rank 0] step=7420, skipped=95, lr=[3.0925390339590433e-07, 3.0925390339590433e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:19:22,349] [INFO] [timer.py:199:stop] epoch=0/micro_step=7420/global_step=7420, RunningAvgSamplesPerSec=105.59626399643966, CurrSamplesPerSec=107.24917276422602, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:19:22,442] [INFO] [logging.py:96:log_dist] [Rank 0] step=7420, skipped=128, lr=[1.7161203361170996e-07, 1.7161203361170996e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7419|ppo_ep: 1|act_loss: -0.01311492919921875|cri_loss: -0.006343841552734375|unsuper_loss: 0.0
+average reward score: 5.234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.64%) |Training time=0.46s (21.47%) |Others=0.11 (4.89%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7420|ppo_ep: 1|act_loss: -0.0055694580078125|cri_loss: -0.0026264190673828125|unsuper_loss: 0.0
+average reward score: 5.65625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.85%) |Training time=0.46s (21.50%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7421|ppo_ep: 1|act_loss: -0.005157470703125|cri_loss: -0.002201080322265625|unsuper_loss: 0.0
+average reward score: 5.5546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.54%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7422|ppo_ep: 1|act_loss: -0.018798828125|cri_loss: -0.00916290283203125|unsuper_loss: 0.0
+average reward score: 5.54296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.88%) |Training time=0.46s (19.78%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7423|ppo_ep: 1|act_loss: 0.0019893646240234375|cri_loss: 0.0019683837890625|unsuper_loss: 0.0
+average reward score: 5.0234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.36%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7424|ppo_ep: 1|act_loss: -0.0025959014892578125|cri_loss: -0.0012378692626953125|unsuper_loss: 0.0
+average reward score: 6.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.68s (75.04%) |Training time=0.46s (20.45%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.29 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7425|ppo_ep: 1|act_loss: 0.00028014183044433594|cri_loss: 0.0005426406860351562|unsuper_loss: 0.0
+average reward score: 5.34375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.23%) |Training time=0.45s (21.18%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7426|ppo_ep: 1|act_loss: 0.028900146484375|cri_loss: 0.01474761962890625|unsuper_loss: 0.0
+average reward score: 5.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.83%) |Training time=0.46s (21.53%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7427|ppo_ep: 1|act_loss: 0.017486572265625|cri_loss: 0.0092010498046875|unsuper_loss: 0.0
+average reward score: 5.984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.82%) |Training time=0.46s (21.60%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7428|ppo_ep: 1|act_loss: 0.0107269287109375|cri_loss: 0.0064697265625|unsuper_loss: 0.0
+average reward score: 5.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.72%) |Training time=0.46s (21.61%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.55
+[2023-04-14 13:19:44,065] [INFO] [logging.py:96:log_dist] [Rank 0] step=7430, skipped=95, lr=[3.0274391703509267e-07, 3.0274391703509267e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:19:44,083] [INFO] [timer.py:199:stop] epoch=0/micro_step=7430/global_step=7430, RunningAvgSamplesPerSec=105.59870905382144, CurrSamplesPerSec=105.69964632309106, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:19:44,176] [INFO] [logging.py:96:log_dist] [Rank 0] step=7430, skipped=128, lr=[1.681247290517518e-07, 1.681247290517518e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7429|ppo_ep: 1|act_loss: 0.041107177734375|cri_loss: 0.02197265625|unsuper_loss: 0.0
+average reward score: 5.44140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.72%) |Training time=0.47s (21.71%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7430|ppo_ep: 1|act_loss: 0.0006413459777832031|cri_loss: 0.00042319297790527344|unsuper_loss: 0.0
+average reward score: 5.08203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.05%) |Training time=0.51s (23.39%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7431|ppo_ep: 1|act_loss: 0.0380859375|cri_loss: 0.0194244384765625|unsuper_loss: 0.0
+average reward score: 5.5546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.00%) |Training time=0.42s (19.45%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7432|ppo_ep: 1|act_loss: 0.008270263671875|cri_loss: 0.004383087158203125|unsuper_loss: 0.0
+average reward score: 5.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.81%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7433|ppo_ep: 1|act_loss: -0.0036334991455078125|cri_loss: -0.0017004013061523438|unsuper_loss: 0.0
+average reward score: 4.9921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.98%) |Training time=0.47s (21.38%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7434|ppo_ep: 1|act_loss: -0.002941131591796875|cri_loss: -0.0012197494506835938|unsuper_loss: 0.0
+average reward score: 5.09375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.92%) |Training time=0.46s (21.42%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7435|ppo_ep: 1|act_loss: -0.00490570068359375|cri_loss: -0.0023326873779296875|unsuper_loss: 0.0
+average reward score: 5.29296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.76%) |Training time=0.47s (21.61%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7436|ppo_ep: 1|act_loss: 0.0081329345703125|cri_loss: 0.00434112548828125|unsuper_loss: 0.0
+average reward score: 5.39453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.42%) |Training time=0.48s (20.28%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.55 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7437|ppo_ep: 1|act_loss: 0.00555419921875|cri_loss: 0.0030307769775390625|unsuper_loss: 0.0
+average reward score: 4.88671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.49%) |Training time=0.45s (20.86%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7438|ppo_ep: 1|act_loss: -0.00777435302734375|cri_loss: -0.0036983489990234375|unsuper_loss: 0.0
+average reward score: 4.9609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.65%) |Training time=0.47s (21.70%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
+[2023-04-14 13:20:05,976] [INFO] [logging.py:96:log_dist] [Rank 0] step=7440, skipped=95, lr=[2.9630096141877935e-07, 2.9630096141877935e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:20:05,994] [INFO] [timer.py:199:stop] epoch=0/micro_step=7440/global_step=7440, RunningAvgSamplesPerSec=105.59770834207129, CurrSamplesPerSec=103.76915880769367, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:20:06,087] [INFO] [logging.py:96:log_dist] [Rank 0] step=7440, skipped=128, lr=[1.646719885086523e-07, 1.646719885086523e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7439|ppo_ep: 1|act_loss: -0.0347900390625|cri_loss: -0.017120361328125|unsuper_loss: 0.0
+average reward score: 5.25
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.71%) |Training time=0.47s (21.76%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7440|ppo_ep: 1|act_loss: -0.00661468505859375|cri_loss: -0.0029449462890625|unsuper_loss: 0.0
+average reward score: 6.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.58%) |Training time=0.47s (21.77%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7441|ppo_ep: 1|act_loss: -0.016204833984375|cri_loss: -0.00795745849609375|unsuper_loss: 0.0
+average reward score: 5.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.41%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7442|ppo_ep: 1|act_loss: 0.031158447265625|cri_loss: 0.016265869140625|unsuper_loss: 0.0
+average reward score: 5.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.99%) |Training time=0.46s (21.36%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55
+epoch: 0|step: 7443|ppo_ep: 1|act_loss: -0.027191162109375|cri_loss: -0.0133209228515625|unsuper_loss: 0.0
+average reward score: 5.6484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.33%) |Training time=0.45s (21.10%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7444|ppo_ep: 1|act_loss: 0.01097869873046875|cri_loss: 0.0056304931640625|unsuper_loss: 0.0
+average reward score: 5.0859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.45s (21.11%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7445|ppo_ep: 1|act_loss: 0.030792236328125|cri_loss: 0.016082763671875|unsuper_loss: 0.0
+average reward score: 5.91796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.78%) |Training time=0.44s (20.65%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7446|ppo_ep: 1|act_loss: -0.001796722412109375|cri_loss: -0.0007958412170410156|unsuper_loss: 0.0
+average reward score: 5.43359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.85%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7447|ppo_ep: 1|act_loss: -0.0030612945556640625|cri_loss: -0.0014820098876953125|unsuper_loss: 0.0
+average reward score: 5.9140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.52%) |Training time=0.46s (20.97%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7448|ppo_ep: 1|act_loss: 0.0092315673828125|cri_loss: 0.00507354736328125|unsuper_loss: 0.0
+average reward score: 5.92578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.51%) |Training time=0.45s (20.81%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56
+[2023-04-14 13:20:27,533] [INFO] [logging.py:96:log_dist] [Rank 0] step=7450, skipped=95, lr=[2.89925132047109e-07, 2.89925132047109e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:20:27,552] [INFO] [timer.py:199:stop] epoch=0/micro_step=7450/global_step=7450, RunningAvgSamplesPerSec=105.60254928538421, CurrSamplesPerSec=110.05864481747192, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:20:27,644] [INFO] [logging.py:96:log_dist] [Rank 0] step=7450, skipped=128, lr=[1.6125386316035496e-07, 1.6125386316035496e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7449|ppo_ep: 1|act_loss: -0.0038928985595703125|cri_loss: -0.0017147064208984375|unsuper_loss: 0.0
+average reward score: 5.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.27%) |Training time=0.45s (21.17%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7450|ppo_ep: 1|act_loss: 0.0303497314453125|cri_loss: 0.016021728515625|unsuper_loss: 0.0
+average reward score: 5.95703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.80%) |Training time=0.46s (21.54%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7451|ppo_ep: 1|act_loss: 0.006317138671875|cri_loss: 0.0035724639892578125|unsuper_loss: 0.0
+average reward score: 5.42578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.06%) |Training time=0.46s (21.36%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7452|ppo_ep: 1|act_loss: 0.04852294921875|cri_loss: 0.026397705078125|unsuper_loss: 0.0
+average reward score: 5.05078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.20%) |Training time=0.45s (19.47%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.80 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7453|ppo_ep: 1|act_loss: -0.019744873046875|cri_loss: -0.0097503662109375|unsuper_loss: 0.0
+average reward score: 5.6171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.46%) |Training time=0.46s (20.60%) |Others=0.13 (5.95%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7454|ppo_ep: 1|act_loss: 0.013275146484375|cri_loss: 0.0069122314453125|unsuper_loss: 0.0
+average reward score: 5.0
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.51%) |Training time=0.46s (20.95%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7455|ppo_ep: 1|act_loss: 0.0143890380859375|cri_loss: 0.007434844970703125|unsuper_loss: 0.0
+average reward score: 5.35546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.11%) |Training time=0.46s (21.30%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7456|ppo_ep: 1|act_loss: 0.00958251953125|cri_loss: 0.005680084228515625|unsuper_loss: 0.0
+average reward score: 5.5
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.46s (21.45%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7457|ppo_ep: 1|act_loss: 0.0296630859375|cri_loss: 0.0150909423828125|unsuper_loss: 0.0
+average reward score: 4.953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.95%) |Training time=0.49s (22.54%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7458|ppo_ep: 1|act_loss: 0.00717926025390625|cri_loss: 0.0038280487060546875|unsuper_loss: 0.0
+average reward score: 5.3515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.42%) |Training time=0.50s (23.00%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.56
+[2023-04-14 13:20:49,459] [INFO] [logging.py:96:log_dist] [Rank 0] step=7460, skipped=95, lr=[2.8361652342525515e-07, 2.8361652342525515e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:20:49,477] [INFO] [timer.py:199:stop] epoch=0/micro_step=7460/global_step=7460, RunningAvgSamplesPerSec=105.5989674076688, CurrSamplesPerSec=88.6655405472359, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:20:49,570] [INFO] [logging.py:96:log_dist] [Rank 0] step=7460, skipped=128, lr=[1.5787040367172379e-07, 1.5787040367172379e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7459|ppo_ep: 1|act_loss: 0.006500244140625|cri_loss: 0.0033111572265625|unsuper_loss: 0.0
+average reward score: 5.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.89%) |Training time=0.52s (23.68%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7460|ppo_ep: 1|act_loss: -0.029205322265625|cri_loss: -0.013946533203125|unsuper_loss: 0.0
+average reward score: 5.76171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.13%) |Training time=0.48s (22.25%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7461|ppo_ep: 1|act_loss: 0.010284423828125|cri_loss: 0.005336761474609375|unsuper_loss: 0.0
+average reward score: 5.64453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.48s (22.07%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7462|ppo_ep: 1|act_loss: 0.016448974609375|cri_loss: 0.008453369140625|unsuper_loss: 0.0
+average reward score: 5.2734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.14%) |Training time=0.48s (22.25%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7463|ppo_ep: 1|act_loss: 0.0008373260498046875|cri_loss: 0.0004830360412597656|unsuper_loss: 0.0
+average reward score: 5.63671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.46%) |Training time=0.48s (22.00%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7464|ppo_ep: 1|act_loss: 0.019256591796875|cri_loss: 0.01026153564453125|unsuper_loss: 0.0
+average reward score: 5.9453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.48s (22.28%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7465|ppo_ep: 1|act_loss: 0.00318145751953125|cri_loss: 0.0030651092529296875|unsuper_loss: 0.0
+average reward score: 5.6171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.48%) |Training time=0.48s (21.98%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7466|ppo_ep: 1|act_loss: 0.0126190185546875|cri_loss: 0.006465911865234375|unsuper_loss: 0.0
+average reward score: 5.3671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.06%) |Training time=0.51s (21.69%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.50 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7467|ppo_ep: 1|act_loss: 0.009735107421875|cri_loss: 0.004962921142578125|unsuper_loss: 0.0
+average reward score: 5.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.29%) |Training time=0.48s (22.20%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7468|ppo_ep: 1|act_loss: -0.019805908203125|cri_loss: -0.00965118408203125|unsuper_loss: 0.0
+average reward score: 5.40625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.22%) |Training time=0.48s (22.19%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.56
+[2023-04-14 13:21:11,390] [INFO] [logging.py:96:log_dist] [Rank 0] step=7470, skipped=95, lr=[2.7737522906201384e-07, 2.7737522906201384e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:21:11,408] [INFO] [timer.py:199:stop] epoch=0/micro_step=7470/global_step=7470, RunningAvgSamplesPerSec=105.58988139804403, CurrSamplesPerSec=98.6508434562043, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:21:11,501] [INFO] [logging.py:96:log_dist] [Rank 0] step=7470, skipped=128, lr=[1.5452166019378989e-07, 1.5452166019378989e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7469|ppo_ep: 1|act_loss: -0.00325775146484375|cri_loss: -0.001453399658203125|unsuper_loss: 0.0
+average reward score: 5.12890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.17%) |Training time=0.49s (22.33%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7470|ppo_ep: 1|act_loss: 0.00650787353515625|cri_loss: 0.0034236907958984375|unsuper_loss: 0.0
+average reward score: 5.0078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.17%) |Training time=0.49s (22.28%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.49 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7471|ppo_ep: 1|act_loss: 0.00421905517578125|cri_loss: 0.003101348876953125|unsuper_loss: 0.0
+average reward score: 5.09375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.45%) |Training time=0.48s (22.00%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7472|ppo_ep: 1|act_loss: 0.02142333984375|cri_loss: 0.011138916015625|unsuper_loss: 0.0
+average reward score: 5.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.25%) |Training time=0.48s (22.04%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7473|ppo_ep: 1|act_loss: 0.015045166015625|cri_loss: 0.00791168212890625|unsuper_loss: 0.0
+average reward score: 5.03125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.34%) |Training time=0.48s (22.14%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7474|ppo_ep: 1|act_loss: 6.431341171264648e-05|cri_loss: 0.00018262863159179688|unsuper_loss: 0.0
+average reward score: 5.609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.90%) |Training time=0.49s (22.51%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7475|ppo_ep: 1|act_loss: 0.01222991943359375|cri_loss: 0.006435394287109375|unsuper_loss: 0.0
+average reward score: 5.23828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.21%) |Training time=0.48s (22.27%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7476|ppo_ep: 1|act_loss: 0.0091552734375|cri_loss: 0.00495147705078125|unsuper_loss: 0.0
+average reward score: 5.109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.78%) |Training time=0.49s (22.64%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7477|ppo_ep: 1|act_loss: -0.018341064453125|cri_loss: -0.009063720703125|unsuper_loss: 0.0
+average reward score: 5.07421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.05%) |Training time=0.49s (22.43%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7478|ppo_ep: 1|act_loss: 0.0106048583984375|cri_loss: 0.005496978759765625|unsuper_loss: 0.0
+average reward score: 5.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.79%) |Training time=0.49s (22.60%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.56
+[2023-04-14 13:21:33,187] [INFO] [logging.py:96:log_dist] [Rank 0] step=7480, skipped=95, lr=[2.71201341468426e-07, 2.71201341468426e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:21:33,206] [INFO] [timer.py:199:stop] epoch=0/micro_step=7480/global_step=7480, RunningAvgSamplesPerSec=105.5795860028403, CurrSamplesPerSec=96.38076143670229, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:21:33,298] [INFO] [logging.py:96:log_dist] [Rank 0] step=7480, skipped=128, lr=[1.5120768236301042e-07, 1.5120768236301042e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7479|ppo_ep: 1|act_loss: -0.0081634521484375|cri_loss: -0.0033721923828125|unsuper_loss: 0.0
+average reward score: 5.27734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.75%) |Training time=0.50s (22.75%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7480|ppo_ep: 1|act_loss: 0.0211944580078125|cri_loss: 0.01141357421875|unsuper_loss: 0.0
+average reward score: 5.00390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.88%) |Training time=0.49s (22.53%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7481|ppo_ep: 1|act_loss: 0.0014867782592773438|cri_loss: 0.0010585784912109375|unsuper_loss: 0.0
+average reward score: 6.0859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.59%) |Training time=0.49s (22.41%) |Others=0.11 (5.00%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7482|ppo_ep: 1|act_loss: 0.006160736083984375|cri_loss: 0.003284454345703125|unsuper_loss: 0.0
+average reward score: 6.0078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.88%) |Training time=0.49s (20.82%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.70 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7483|ppo_ep: 1|act_loss: -0.007537841796875|cri_loss: -0.00336456298828125|unsuper_loss: 0.0
+average reward score: 5.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.69s (75.17%) |Training time=0.46s (20.45%) |Others=0.10 (4.37%)|CurSamplesPerSec=14.27 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7484|ppo_ep: 1|act_loss: 0.002811431884765625|cri_loss: 0.0015897750854492188|unsuper_loss: 0.0
+average reward score: 5.72265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.91%) |Training time=0.46s (21.42%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7485|ppo_ep: 1|act_loss: -0.0172119140625|cri_loss: -0.00836944580078125|unsuper_loss: 0.0
+average reward score: 5.4453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.40%) |Training time=0.45s (20.98%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7486|ppo_ep: 1|act_loss: 0.004638671875|cri_loss: 0.0026397705078125|unsuper_loss: 0.0
+average reward score: 5.3359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.90%) |Training time=0.46s (21.42%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7487|ppo_ep: 1|act_loss: -0.00982666015625|cri_loss: -0.004459381103515625|unsuper_loss: 0.0
+average reward score: 4.921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.49%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7488|ppo_ep: 1|act_loss: -0.0114898681640625|cri_loss: -0.00560760498046875|unsuper_loss: 0.0
+average reward score: 5.58203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.96%) |Training time=0.49s (22.41%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.56
+[2023-04-14 13:21:55,054] [INFO] [logging.py:96:log_dist] [Rank 0] step=7490, skipped=95, lr=[2.6509495215639946e-07, 2.6509495215639946e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:21:55,073] [INFO] [timer.py:199:stop] epoch=0/micro_step=7490/global_step=7490, RunningAvgSamplesPerSec=105.57695172591919, CurrSamplesPerSec=101.3811761080209, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:21:55,165] [INFO] [logging.py:96:log_dist] [Rank 0] step=7490, skipped=128, lr=[1.4792851930053116e-07, 1.4792851930053116e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7489|ppo_ep: 1|act_loss: -0.01031494140625|cri_loss: -0.0049896240234375|unsuper_loss: 0.0
+average reward score: 4.97265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.13%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7490|ppo_ep: 1|act_loss: 0.003208160400390625|cri_loss: 0.0019092559814453125|unsuper_loss: 0.0
+average reward score: 5.1875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.96%) |Training time=0.49s (22.40%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7491|ppo_ep: 1|act_loss: -0.0019426345825195312|cri_loss: -0.000942230224609375|unsuper_loss: 0.0
+average reward score: 5.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.96%) |Training time=0.47s (21.43%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7492|ppo_ep: 1|act_loss: -0.01708984375|cri_loss: -0.00830078125|unsuper_loss: 0.0
+average reward score: 6.19921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.84%) |Training time=0.45s (20.47%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7493|ppo_ep: 1|act_loss: -0.003551483154296875|cri_loss: -0.0016069412231445312|unsuper_loss: 0.0
+average reward score: 5.31640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.16%) |Training time=0.46s (21.29%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7494|ppo_ep: 1|act_loss: -0.00821685791015625|cri_loss: -0.003978729248046875|unsuper_loss: 0.0
+average reward score: 5.8515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.98%) |Training time=0.46s (21.32%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7495|ppo_ep: 1|act_loss: 0.030670166015625|cri_loss: 0.0157470703125|unsuper_loss: 0.0
+average reward score: 5.19140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.86%) |Training time=0.47s (21.49%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7496|ppo_ep: 1|act_loss: 0.013885498046875|cri_loss: 0.007472991943359375|unsuper_loss: 0.0
+average reward score: 5.64453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.63s (71.27%) |Training time=0.45s (19.92%) |Others=0.20 (8.81%)|CurSamplesPerSec=14.01 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7497|ppo_ep: 1|act_loss: -0.0068511962890625|cri_loss: -0.0031795501708984375|unsuper_loss: 0.0
+average reward score: 5.50390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.92%) |Training time=0.45s (20.53%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7498|ppo_ep: 1|act_loss: -0.0098114013671875|cri_loss: -0.0047607421875|unsuper_loss: 0.0
+average reward score: 5.3671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.71%) |Training time=0.47s (21.60%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.56
+[2023-04-14 13:22:16,928] [INFO] [logging.py:96:log_dist] [Rank 0] step=7500, skipped=95, lr=[2.5905615163735286e-07, 2.5905615163735286e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:22:16,946] [INFO] [timer.py:199:stop] epoch=0/micro_step=7500/global_step=7500, RunningAvgSamplesPerSec=105.57782550562153, CurrSamplesPerSec=104.88119826961066, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:22:17,039] [INFO] [logging.py:96:log_dist] [Rank 0] step=7500, skipped=128, lr=[1.4468421961145924e-07, 1.4468421961145924e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7499|ppo_ep: 1|act_loss: 0.00513458251953125|cri_loss: 0.0026531219482421875|unsuper_loss: 0.0
+average reward score: 5.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.80%) |Training time=0.47s (21.64%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7500|ppo_ep: 1|act_loss: -0.0026226043701171875|cri_loss: -0.0012683868408203125|unsuper_loss: 0.0
+average reward score: 5.91796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.44%) |Training time=0.45s (20.88%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7501|ppo_ep: 1|act_loss: -0.0023021697998046875|cri_loss: 8.96453857421875e-05|unsuper_loss: 0.0
+average reward score: 5.4296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.97%) |Training time=0.46s (21.29%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7502|ppo_ep: 1|act_loss: 0.0137939453125|cri_loss: 0.007843017578125|unsuper_loss: 0.0
+average reward score: 4.91015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.03%) |Training time=0.44s (20.38%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7503|ppo_ep: 1|act_loss: -0.0096435546875|cri_loss: -0.00400543212890625|unsuper_loss: 0.0
+average reward score: 5.3671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.85%) |Training time=0.44s (20.57%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7504|ppo_ep: 1|act_loss: -0.02020263671875|cri_loss: -0.00994873046875|unsuper_loss: 0.0
+average reward score: 5.62109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.19%) |Training time=0.43s (20.13%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7505|ppo_ep: 1|act_loss: -0.0022716522216796875|cri_loss: -0.0006518363952636719|unsuper_loss: 0.0
+average reward score: 5.23046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.55%) |Training time=0.43s (19.86%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7506|ppo_ep: 1|act_loss: 0.011077880859375|cri_loss: 0.00572967529296875|unsuper_loss: 0.0
+average reward score: 5.21484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.83%) |Training time=0.42s (19.46%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7507|ppo_ep: 1|act_loss: 0.001827239990234375|cri_loss: 0.0010480880737304688|unsuper_loss: 0.0
+average reward score: 5.3359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.81%) |Training time=0.42s (19.60%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7508|ppo_ep: 1|act_loss: 0.00127410888671875|cri_loss: 0.0009026527404785156|unsuper_loss: 0.0
+average reward score: 5.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.28%) |Training time=0.43s (20.06%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.56
+[2023-04-14 13:22:38,417] [INFO] [logging.py:96:log_dist] [Rank 0] step=7510, skipped=95, lr=[2.5308502942087883e-07, 2.5308502942087883e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:22:38,436] [INFO] [timer.py:199:stop] epoch=0/micro_step=7510/global_step=7510, RunningAvgSamplesPerSec=105.59227493403863, CurrSamplesPerSec=120.76162267067473, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:22:38,528] [INFO] [logging.py:96:log_dist] [Rank 0] step=7510, skipped=128, lr=[1.4147483138414269e-07, 1.4147483138414269e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7509|ppo_ep: 1|act_loss: 0.0016269683837890625|cri_loss: 0.0010166168212890625|unsuper_loss: 0.0
+average reward score: 5.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.44%) |Training time=0.43s (19.97%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7510|ppo_ep: 1|act_loss: -0.020721435546875|cri_loss: -0.01023101806640625|unsuper_loss: 0.0
+average reward score: 5.625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.98%) |Training time=0.44s (20.35%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7511|ppo_ep: 1|act_loss: -0.04547119140625|cri_loss: -0.01995849609375|unsuper_loss: 0.0
+average reward score: 5.30078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.43%) |Training time=0.43s (20.00%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7512|ppo_ep: 1|act_loss: -0.01806640625|cri_loss: -0.00852203369140625|unsuper_loss: 0.0
+average reward score: 5.9765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.03%) |Training time=0.44s (18.67%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.70 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7513|ppo_ep: 1|act_loss: 0.0228271484375|cri_loss: 0.01172637939453125|unsuper_loss: 0.0
+average reward score: 5.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.71s (76.25%) |Training time=0.44s (19.37%) |Others=0.10 (4.38%)|CurSamplesPerSec=14.23 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7514|ppo_ep: 1|act_loss: -0.031982421875|cri_loss: -0.01556396484375|unsuper_loss: 0.0
+average reward score: 5.7265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.88%) |Training time=0.44s (20.46%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
+[2023-04-14 13:22:51,702] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 7515|ppo_ep: 1|act_loss: 0.0291748046875|cri_loss: 0.0157318115234375|unsuper_loss: 0.0
+average reward score: 5.1015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.37%) |Training time=0.44s (20.51%) |Others=0.09 (4.11%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.56
+[2023-04-14 13:22:53,844] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 7516|ppo_ep: 1|act_loss: -0.0148773193359375|cri_loss: -0.006542205810546875|unsuper_loss: 0.0
+average reward score: 5.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.22%) |Training time=0.44s (20.61%) |Others=0.09 (4.17%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7517|ppo_ep: 1|act_loss: 0.0228118896484375|cri_loss: 0.01151275634765625|unsuper_loss: 0.0
+average reward score: 5.05859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.59%) |Training time=0.45s (20.87%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7518|ppo_ep: 1|act_loss: 0.0023403167724609375|cri_loss: 0.0014944076538085938|unsuper_loss: 0.0
+average reward score: 5.0859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.83%) |Training time=0.44s (20.50%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
+[2023-04-14 13:23:00,186] [INFO] [logging.py:96:log_dist] [Rank 0] step=7520, skipped=95, lr=[2.471816740134132e-07, 2.471816740134132e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:23:00,204] [INFO] [timer.py:199:stop] epoch=0/micro_step=7520/global_step=7520, RunningAvgSamplesPerSec=105.6047174355037, CurrSamplesPerSec=117.03275525160899, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:23:00,297] [INFO] [logging.py:96:log_dist] [Rank 0] step=7520, skipped=130, lr=[1.3893248903978695e-07, 1.3893248903978695e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7519|ppo_ep: 1|act_loss: -0.02703857421875|cri_loss: -0.01279449462890625|unsuper_loss: 0.0
+average reward score: 5.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.10%) |Training time=0.44s (20.33%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7520|ppo_ep: 1|act_loss: 0.0146331787109375|cri_loss: 0.00946044921875|unsuper_loss: 0.0
+average reward score: 5.61328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.85%) |Training time=0.44s (20.49%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7521|ppo_ep: 1|act_loss: 0.033538818359375|cri_loss: 0.0172271728515625|unsuper_loss: 0.0
+average reward score: 6.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.28%) |Training time=0.43s (20.13%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7522|ppo_ep: 1|act_loss: 0.00530242919921875|cri_loss: 0.0028667449951171875|unsuper_loss: 0.0
+average reward score: 5.40234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.99%) |Training time=0.44s (20.35%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7523|ppo_ep: 1|act_loss: 0.003238677978515625|cri_loss: 0.0018138885498046875|unsuper_loss: 0.0
+average reward score: 5.19921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.26%) |Training time=0.43s (20.07%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7524|ppo_ep: 1|act_loss: -0.0134124755859375|cri_loss: -0.006565093994140625|unsuper_loss: 0.0
+average reward score: 5.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.26%) |Training time=0.43s (20.06%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7525|ppo_ep: 1|act_loss: 0.0013933181762695312|cri_loss: 0.0007691383361816406|unsuper_loss: 0.0
+average reward score: 4.94140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.72%) |Training time=0.45s (20.73%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7526|ppo_ep: 1|act_loss: 0.00327301025390625|cri_loss: 0.001712799072265625|unsuper_loss: 0.0
+average reward score: 5.6171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.20%) |Training time=0.43s (20.13%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7527|ppo_ep: 1|act_loss: -0.00595855712890625|cri_loss: -0.0021209716796875|unsuper_loss: 0.0
+average reward score: 5.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.78s (77.04%) |Training time=0.43s (18.67%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.88 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7528|ppo_ep: 1|act_loss: 0.03192138671875|cri_loss: 0.0162353515625|unsuper_loss: 0.0
+average reward score: 5.6953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.19%) |Training time=0.43s (20.11%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56
+[2023-04-14 13:23:21,842] [INFO] [logging.py:96:log_dist] [Rank 0] step=7530, skipped=95, lr=[2.413461729169248e-07, 2.413461729169248e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:23:21,861] [INFO] [timer.py:199:stop] epoch=0/micro_step=7530/global_step=7530, RunningAvgSamplesPerSec=105.61827921516286, CurrSamplesPerSec=111.98603613450567, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:23:21,953] [INFO] [logging.py:96:log_dist] [Rank 0] step=7530, skipped=130, lr=[1.357860609740455e-07, 1.357860609740455e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7529|ppo_ep: 1|act_loss: -0.0020847320556640625|cri_loss: 0.0007724761962890625|unsuper_loss: 0.0
+average reward score: 5.8984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.60%) |Training time=0.45s (20.83%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7530|ppo_ep: 1|act_loss: -0.008270263671875|cri_loss: -0.00394439697265625|unsuper_loss: 0.0
+average reward score: 4.8828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.35%) |Training time=0.45s (21.00%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7531|ppo_ep: 1|act_loss: 0.037750244140625|cri_loss: 0.0194091796875|unsuper_loss: 0.0
+average reward score: 5.27734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.88%) |Training time=0.44s (20.54%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7532|ppo_ep: 1|act_loss: 0.01190185546875|cri_loss: 0.00634765625|unsuper_loss: 0.0
+average reward score: 5.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.73%) |Training time=0.44s (20.61%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7533|ppo_ep: 1|act_loss: 0.0234375|cri_loss: 0.0126495361328125|unsuper_loss: 0.0
+average reward score: 5.609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.95%) |Training time=0.44s (20.45%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7534|ppo_ep: 1|act_loss: -0.00909423828125|cri_loss: -0.004352569580078125|unsuper_loss: 0.0
+average reward score: 5.9609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.94%) |Training time=0.44s (20.39%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7535|ppo_ep: 1|act_loss: 0.018157958984375|cri_loss: 0.0093536376953125|unsuper_loss: 0.0
+average reward score: 4.9921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.31%) |Training time=0.43s (20.10%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7536|ppo_ep: 1|act_loss: 0.01336669921875|cri_loss: 0.007061004638671875|unsuper_loss: 0.0
+average reward score: 5.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.58%) |Training time=0.45s (20.77%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7537|ppo_ep: 1|act_loss: -0.016876220703125|cri_loss: -0.00801849365234375|unsuper_loss: 0.0
+average reward score: 4.66796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.74%) |Training time=0.44s (20.69%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7538|ppo_ep: 1|act_loss: -0.024139404296875|cri_loss: -0.0118560791015625|unsuper_loss: 0.0
+average reward score: 4.9765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.63%) |Training time=0.44s (20.71%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
+[2023-04-14 13:23:43,304] [INFO] [logging.py:96:log_dist] [Rank 0] step=7540, skipped=95, lr=[2.355786126276159e-07, 2.355786126276159e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:23:43,322] [INFO] [timer.py:199:stop] epoch=0/micro_step=7540/global_step=7540, RunningAvgSamplesPerSec=105.62894102683586, CurrSamplesPerSec=112.07852142696694, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:23:43,414] [INFO] [logging.py:96:log_dist] [Rank 0] step=7540, skipped=130, lr=[1.3267467626223606e-07, 1.3267467626223606e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7539|ppo_ep: 1|act_loss: 0.0186767578125|cri_loss: 0.00942230224609375|unsuper_loss: 0.0
+average reward score: 5.96484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.60%) |Training time=0.45s (20.85%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7540|ppo_ep: 1|act_loss: -0.011962890625|cri_loss: -0.005840301513671875|unsuper_loss: 0.0
+average reward score: 5.5546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.59%) |Training time=0.44s (20.74%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7541|ppo_ep: 1|act_loss: -0.002826690673828125|cri_loss: -0.001079559326171875|unsuper_loss: 0.0
+average reward score: 5.16796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.35%) |Training time=0.43s (20.07%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7542|ppo_ep: 1|act_loss: 0.0143280029296875|cri_loss: 0.00728607177734375|unsuper_loss: 0.0
+average reward score: 5.67578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.40s |Gather latency=0.00s (0.00%) |Generate time=1.81s (75.39%) |Training time=0.44s (18.29%) |Others=0.15 (6.33%)|CurSamplesPerSec=13.36 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7543|ppo_ep: 1|act_loss: -0.0010290145874023438|cri_loss: 0.00040149688720703125|unsuper_loss: 0.0
+average reward score: 5.13671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.83%) |Training time=0.44s (20.59%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7544|ppo_ep: 1|act_loss: -0.026947021484375|cri_loss: -0.01317596435546875|unsuper_loss: 0.0
+average reward score: 4.73828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.70%) |Training time=0.44s (20.63%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7545|ppo_ep: 1|act_loss: -0.00775146484375|cri_loss: -0.0036716461181640625|unsuper_loss: 0.0
+average reward score: 5.8046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.00%) |Training time=0.44s (20.32%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7546|ppo_ep: 1|act_loss: -0.00421142578125|cri_loss: -0.0018768310546875|unsuper_loss: 0.0
+average reward score: 5.3515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.58%) |Training time=0.45s (20.85%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7547|ppo_ep: 1|act_loss: 0.026153564453125|cri_loss: 0.01334381103515625|unsuper_loss: 0.0
+average reward score: 5.39453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.13%) |Training time=0.43s (20.23%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7548|ppo_ep: 1|act_loss: -0.0076904296875|cri_loss: -0.003749847412109375|unsuper_loss: 0.0
+average reward score: 5.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.10%) |Training time=0.43s (20.20%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56
+[2023-04-14 13:24:05,009] [INFO] [logging.py:96:log_dist] [Rank 0] step=7550, skipped=95, lr=[2.298790786346439e-07, 2.298790786346439e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:24:05,027] [INFO] [timer.py:199:stop] epoch=0/micro_step=7550/global_step=7550, RunningAvgSamplesPerSec=105.64110098341945, CurrSamplesPerSec=114.46428324718376, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:24:05,120] [INFO] [logging.py:96:log_dist] [Rank 0] step=7550, skipped=130, lr=[1.2959838102258537e-07, 1.2959838102258537e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7549|ppo_ep: 1|act_loss: -0.001949310302734375|cri_loss: -0.0006542205810546875|unsuper_loss: 0.0
+average reward score: 5.19921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.79%) |Training time=0.44s (20.64%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7550|ppo_ep: 1|act_loss: -0.004299163818359375|cri_loss: -0.0019626617431640625|unsuper_loss: 0.0
+average reward score: 4.98828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.95%) |Training time=0.44s (20.37%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7551|ppo_ep: 1|act_loss: 0.004322052001953125|cri_loss: 0.002300262451171875|unsuper_loss: 0.0
+average reward score: 5.3359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.15%) |Training time=0.43s (20.26%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7552|ppo_ep: 1|act_loss: -0.00215911865234375|cri_loss: -0.0009260177612304688|unsuper_loss: 0.0
+average reward score: 5.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.82%) |Training time=0.44s (20.52%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7553|ppo_ep: 1|act_loss: 0.02783203125|cri_loss: 0.01480865478515625|unsuper_loss: 0.0
+average reward score: 5.58984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.74%) |Training time=0.44s (20.69%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7554|ppo_ep: 1|act_loss: 0.00394439697265625|cri_loss: 0.002223968505859375|unsuper_loss: 0.0
+average reward score: 5.671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.09%) |Training time=0.43s (20.23%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7555|ppo_ep: 1|act_loss: -0.0018434524536132812|cri_loss: -0.0006570816040039062|unsuper_loss: 0.0
+average reward score: 5.90625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.36%) |Training time=0.43s (20.06%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7556|ppo_ep: 1|act_loss: 0.0039520263671875|cri_loss: 0.002269744873046875|unsuper_loss: 0.0
+average reward score: 5.03125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.48%) |Training time=0.45s (20.88%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7557|ppo_ep: 1|act_loss: 0.0239105224609375|cri_loss: 0.012115478515625|unsuper_loss: 0.0
+average reward score: 5.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.00%) |Training time=0.45s (20.26%) |Others=0.17 (7.75%)|CurSamplesPerSec=14.38 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7558|ppo_ep: 1|act_loss: -0.02239990234375|cri_loss: -0.01085662841796875|unsuper_loss: 0.0
+average reward score: 5.34375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.22%) |Training time=0.43s (20.09%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56
+[2023-04-14 13:24:26,536] [INFO] [logging.py:96:log_dist] [Rank 0] step=7560, skipped=95, lr=[2.242476554188525e-07, 2.242476554188525e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:24:26,554] [INFO] [timer.py:199:stop] epoch=0/micro_step=7560/global_step=7560, RunningAvgSamplesPerSec=105.65329125870058, CurrSamplesPerSec=115.2066690872322, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:24:26,647] [INFO] [logging.py:96:log_dist] [Rank 0] step=7560, skipped=130, lr=[1.2655722085321064e-07, 1.2655722085321064e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7559|ppo_ep: 1|act_loss: -0.02569580078125|cri_loss: -0.012420654296875|unsuper_loss: 0.0
+average reward score: 6.05859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.85%) |Training time=0.44s (20.57%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7560|ppo_ep: 1|act_loss: 0.004360198974609375|cri_loss: 0.0026397705078125|unsuper_loss: 0.0
+average reward score: 5.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.61%) |Training time=0.44s (20.72%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7561|ppo_ep: 1|act_loss: -0.00481414794921875|cri_loss: -0.002254486083984375|unsuper_loss: 0.0
+average reward score: 5.64453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.81%) |Training time=0.44s (20.51%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7562|ppo_ep: 1|act_loss: -0.0236053466796875|cri_loss: -0.01155853271484375|unsuper_loss: 0.0
+average reward score: 5.35546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.64%) |Training time=0.45s (20.81%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7563|ppo_ep: 1|act_loss: 0.015167236328125|cri_loss: 0.007965087890625|unsuper_loss: 0.0
+average reward score: 5.40625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.81%) |Training time=0.44s (20.62%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7564|ppo_ep: 1|act_loss: -0.0111083984375|cri_loss: -0.00547027587890625|unsuper_loss: 0.0
+average reward score: 4.9609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.76%) |Training time=0.44s (20.58%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7565|ppo_ep: 1|act_loss: 0.00646209716796875|cri_loss: 0.003376007080078125|unsuper_loss: 0.0
+average reward score: 5.390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.82%) |Training time=0.44s (20.52%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7566|ppo_ep: 1|act_loss: -0.0036830902099609375|cri_loss: -0.001728057861328125|unsuper_loss: 0.0
+average reward score: 4.80859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.72%) |Training time=0.44s (20.60%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7567|ppo_ep: 1|act_loss: -0.0219268798828125|cri_loss: -0.0107879638671875|unsuper_loss: 0.0
+average reward score: 5.24609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.78%) |Training time=0.44s (20.56%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7568|ppo_ep: 1|act_loss: -0.01464080810546875|cri_loss: -0.006877899169921875|unsuper_loss: 0.0
+average reward score: 5.51171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.68%) |Training time=0.44s (20.67%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56
+[2023-04-14 13:24:47,993] [INFO] [logging.py:96:log_dist] [Rank 0] step=7570, skipped=95, lr=[2.186844264515187e-07, 2.186844264515187e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:24:48,012] [INFO] [timer.py:199:stop] epoch=0/micro_step=7570/global_step=7570, RunningAvgSamplesPerSec=105.66395711527322, CurrSamplesPerSec=114.59886133490893, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:24:48,104] [INFO] [logging.py:96:log_dist] [Rank 0] step=7570, skipped=130, lr=[1.235512408314418e-07, 1.235512408314418e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7569|ppo_ep: 1|act_loss: 0.05194091796875|cri_loss: 0.0263824462890625|unsuper_loss: 0.0
+average reward score: 5.3359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.76%) |Training time=0.44s (20.58%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7570|ppo_ep: 1|act_loss: -0.0175018310546875|cri_loss: -0.00848388671875|unsuper_loss: 0.0
+average reward score: 5.18359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.63%) |Training time=0.44s (20.71%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7571|ppo_ep: 1|act_loss: 0.0053863525390625|cri_loss: 0.0031070709228515625|unsuper_loss: 0.0
+average reward score: 5.046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.95%) |Training time=0.44s (20.23%) |Others=0.10 (4.81%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7572|ppo_ep: 1|act_loss: -0.00743865966796875|cri_loss: -0.0034351348876953125|unsuper_loss: 0.0
+average reward score: 5.4453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.69s (72.01%) |Training time=0.56s (23.73%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.63 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7573|ppo_ep: 1|act_loss: 0.01708984375|cri_loss: 0.0089111328125|unsuper_loss: 0.0
+average reward score: 5.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.65%) |Training time=0.44s (20.68%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7574|ppo_ep: 1|act_loss: 0.00736236572265625|cri_loss: 0.00399017333984375|unsuper_loss: 0.0
+average reward score: 5.34375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.45s (20.78%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7575|ppo_ep: 1|act_loss: -0.009674072265625|cri_loss: -0.004669189453125|unsuper_loss: 0.0
+average reward score: 4.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.71%) |Training time=0.44s (20.63%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7576|ppo_ep: 1|act_loss: 0.016326904296875|cri_loss: 0.00838470458984375|unsuper_loss: 0.0
+average reward score: 6.1953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.91%) |Training time=0.44s (20.37%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7577|ppo_ep: 1|act_loss: 0.002529144287109375|cri_loss: 0.0013599395751953125|unsuper_loss: 0.0
+average reward score: 5.19140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.07%) |Training time=0.44s (20.33%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7578|ppo_ep: 1|act_loss: 0.126953125|cri_loss: 0.07867431640625|unsuper_loss: 0.0
+average reward score: 5.875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.40%) |Training time=0.45s (20.97%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56
+[2023-04-14 13:25:09,680] [INFO] [logging.py:96:log_dist] [Rank 0] step=7580, skipped=95, lr=[2.131894741931159e-07, 2.131894741931159e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:25:09,699] [INFO] [timer.py:199:stop] epoch=0/micro_step=7580/global_step=7580, RunningAvgSamplesPerSec=105.6733901747893, CurrSamplesPerSec=118.37709679268664, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:25:09,791] [INFO] [logging.py:96:log_dist] [Rank 0] step=7580, skipped=130, lr=[1.2058048551315455e-07, 1.2058048551315455e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7579|ppo_ep: 1|act_loss: -0.00325775146484375|cri_loss: -0.0013904571533203125|unsuper_loss: 0.0
+average reward score: 5.60546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.10%) |Training time=0.43s (20.23%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7580|ppo_ep: 1|act_loss: -0.01525115966796875|cri_loss: -0.0068359375|unsuper_loss: 0.0
+average reward score: 5.8984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.08%) |Training time=0.43s (20.24%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7581|ppo_ep: 1|act_loss: 0.06732177734375|cri_loss: 0.038665771484375|unsuper_loss: 0.0
+average reward score: 5.6640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.27%) |Training time=0.43s (20.10%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7582|ppo_ep: 1|act_loss: 0.00707244873046875|cri_loss: 0.0036716461181640625|unsuper_loss: 0.0
+average reward score: 5.8515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.95%) |Training time=0.44s (20.38%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7583|ppo_ep: 1|act_loss: 0.040618896484375|cri_loss: 0.0207366943359375|unsuper_loss: 0.0
+average reward score: 5.34375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.15%) |Training time=0.43s (20.17%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7584|ppo_ep: 1|act_loss: -0.0211181640625|cri_loss: -0.01018524169921875|unsuper_loss: 0.0
+average reward score: 4.8203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.80%) |Training time=0.44s (20.52%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7585|ppo_ep: 1|act_loss: -0.0008382797241210938|cri_loss: -0.0003597736358642578|unsuper_loss: 0.0
+average reward score: 5.27734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.13%) |Training time=0.43s (20.19%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7586|ppo_ep: 1|act_loss: 0.00284576416015625|cri_loss: 0.0018253326416015625|unsuper_loss: 0.0
+average reward score: 5.23046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.99%) |Training time=0.44s (20.34%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7587|ppo_ep: 1|act_loss: -0.0142364501953125|cri_loss: -0.00701904296875|unsuper_loss: 0.0
+average reward score: 5.42578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.19%) |Training time=0.43s (20.12%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7588|ppo_ep: 1|act_loss: -0.00010442733764648438|cri_loss: 0.0002875328063964844|unsuper_loss: 0.0
+average reward score: 5.3671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.76%) |Training time=0.44s (18.90%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.86 |AvgSamplesPerSec=14.56
+[2023-04-14 13:25:31,281] [INFO] [logging.py:96:log_dist] [Rank 0] step=7590, skipped=95, lr=[2.0776288009209243e-07, 2.0776288009209243e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:25:31,300] [INFO] [timer.py:199:stop] epoch=0/micro_step=7590/global_step=7590, RunningAvgSamplesPerSec=105.68754108352529, CurrSamplesPerSec=118.10116994467049, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:25:31,392] [INFO] [logging.py:96:log_dist] [Rank 0] step=7590, skipped=130, lr=[1.1764499893210879e-07, 1.1764499893210879e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7589|ppo_ep: 1|act_loss: -0.02447509765625|cri_loss: -0.01074981689453125|unsuper_loss: 0.0
+average reward score: 4.8828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.08%) |Training time=0.43s (20.25%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7590|ppo_ep: 1|act_loss: -0.0019626617431640625|cri_loss: -0.0008349418640136719|unsuper_loss: 0.0
+average reward score: 5.88671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.66%) |Training time=0.44s (20.70%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7591|ppo_ep: 1|act_loss: -0.0298004150390625|cri_loss: -0.01467132568359375|unsuper_loss: 0.0
+average reward score: 4.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.93%) |Training time=0.44s (20.40%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7592|ppo_ep: 1|act_loss: -0.0089111328125|cri_loss: -0.0044097900390625|unsuper_loss: 0.0
+average reward score: 6.21875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.89%) |Training time=0.44s (20.42%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7593|ppo_ep: 1|act_loss: -0.00467681884765625|cri_loss: -0.001857757568359375|unsuper_loss: 0.0
+average reward score: 5.453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.54%) |Training time=0.45s (20.79%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7594|ppo_ep: 1|act_loss: 0.01416015625|cri_loss: 0.00722503662109375|unsuper_loss: 0.0
+average reward score: 5.8359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.45s (20.78%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7595|ppo_ep: 1|act_loss: -0.00719451904296875|cri_loss: -0.0034542083740234375|unsuper_loss: 0.0
+average reward score: 5.82421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.22%) |Training time=0.43s (20.10%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7596|ppo_ep: 1|act_loss: -0.0211029052734375|cri_loss: -0.0099334716796875|unsuper_loss: 0.0
+average reward score: 4.90234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.08%) |Training time=0.43s (20.23%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7597|ppo_ep: 1|act_loss: 0.003650665283203125|cri_loss: 0.002460479736328125|unsuper_loss: 0.0
+average reward score: 5.1875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.91%) |Training time=0.44s (20.41%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7598|ppo_ep: 1|act_loss: 0.00899505615234375|cri_loss: 0.0050506591796875|unsuper_loss: 0.0
+average reward score: 5.9453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.03%) |Training time=0.43s (20.29%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.56
+[2023-04-14 13:25:52,727] [INFO] [logging.py:96:log_dist] [Rank 0] step=7600, skipped=95, lr=[2.024047245836643e-07, 2.024047245836643e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:25:52,746] [INFO] [timer.py:199:stop] epoch=0/micro_step=7600/global_step=7600, RunningAvgSamplesPerSec=105.69993310975197, CurrSamplesPerSec=117.69169550112328, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:25:52,838] [INFO] [logging.py:96:log_dist] [Rank 0] step=7600, skipped=130, lr=[1.1474482459929714e-07, 1.1474482459929714e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7599|ppo_ep: 1|act_loss: -0.0093536376953125|cri_loss: -0.0044097900390625|unsuper_loss: 0.0
+average reward score: 5.42578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.02%) |Training time=0.43s (20.30%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7600|ppo_ep: 1|act_loss: -0.025604248046875|cri_loss: -0.0126190185546875|unsuper_loss: 0.0
+average reward score: 5.3828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.76%) |Training time=0.44s (20.58%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7601|ppo_ep: 1|act_loss: 0.00301361083984375|cri_loss: 0.0016078948974609375|unsuper_loss: 0.0
+average reward score: 5.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.00%) |Training time=0.44s (20.33%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7602|ppo_ep: 1|act_loss: -0.020599365234375|cri_loss: -0.00994873046875|unsuper_loss: 0.0
+average reward score: 6.44140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.72s (73.39%) |Training time=0.45s (19.01%) |Others=0.18 (7.60%)|CurSamplesPerSec=13.65 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7603|ppo_ep: 1|act_loss: -0.0031585693359375|cri_loss: -0.001377105712890625|unsuper_loss: 0.0
+average reward score: 5.58984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.18%) |Training time=0.44s (19.92%) |Others=0.11 (4.89%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7604|ppo_ep: 1|act_loss: 0.00724029541015625|cri_loss: 0.0037631988525390625|unsuper_loss: 0.0
+average reward score: 5.109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.55%) |Training time=0.45s (20.80%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7605|ppo_ep: 1|act_loss: -0.00554656982421875|cri_loss: -0.0021820068359375|unsuper_loss: 0.0
+average reward score: 5.44921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.96%) |Training time=0.44s (20.36%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7606|ppo_ep: 1|act_loss: -0.0665283203125|cri_loss: -0.032135009765625|unsuper_loss: 0.0
+average reward score: 5.16015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.99%) |Training time=0.44s (20.33%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7607|ppo_ep: 1|act_loss: -0.0015411376953125|cri_loss: -0.0003371238708496094|unsuper_loss: 0.0
+average reward score: 5.3515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.79%) |Training time=0.44s (20.62%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7608|ppo_ep: 1|act_loss: -0.01561737060546875|cri_loss: -0.00743865966796875|unsuper_loss: 0.0
+average reward score: 5.3671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.21%) |Training time=0.43s (19.99%) |Others=0.10 (4.80%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56
+[2023-04-14 13:26:14,473] [INFO] [logging.py:96:log_dist] [Rank 0] step=7610, skipped=95, lr=[1.9711508708862154e-07, 1.9711508708862154e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:26:14,491] [INFO] [timer.py:199:stop] epoch=0/micro_step=7610/global_step=7610, RunningAvgSamplesPerSec=105.71107588458801, CurrSamplesPerSec=105.46318940792834, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:26:14,583] [INFO] [logging.py:96:log_dist] [Rank 0] step=7610, skipped=130, lr=[1.1188000550230005e-07, 1.1188000550230005e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7609|ppo_ep: 1|act_loss: -0.0077056884765625|cri_loss: -0.0035419464111328125|unsuper_loss: 0.0
+average reward score: 5.48046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.22%) |Training time=0.47s (21.22%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7610|ppo_ep: 1|act_loss: 0.0023365020751953125|cri_loss: 0.0012693405151367188|unsuper_loss: 0.0
+average reward score: 5.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.99%) |Training time=0.46s (21.39%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7611|ppo_ep: 1|act_loss: 0.020172119140625|cri_loss: 0.010772705078125|unsuper_loss: 0.0
+average reward score: 5.0078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.22%) |Training time=0.46s (21.15%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7612|ppo_ep: 1|act_loss: -0.016632080078125|cri_loss: -0.0080718994140625|unsuper_loss: 0.0
+average reward score: 5.25390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.29%) |Training time=0.46s (21.08%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56
+[2023-04-14 13:26:23,123] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 7613|ppo_ep: 1|act_loss: -0.006011962890625|cri_loss: -0.0027866363525390625|unsuper_loss: 0.0
+average reward score: 5.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.06%) |Training time=0.43s (20.27%) |Others=0.10 (4.67%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7614|ppo_ep: 1|act_loss: -0.00437164306640625|cri_loss: -0.001934051513671875|unsuper_loss: 0.0
+average reward score: 5.13671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.94%) |Training time=0.46s (21.45%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7615|ppo_ep: 1|act_loss: -0.00489044189453125|cri_loss: -0.0023822784423828125|unsuper_loss: 0.0
+average reward score: 4.67578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.26%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7616|ppo_ep: 1|act_loss: -0.01690673828125|cri_loss: -0.00826263427734375|unsuper_loss: 0.0
+average reward score: 5.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.33%) |Training time=0.46s (21.02%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56
+[2023-04-14 13:26:32,062] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 7617|ppo_ep: 1|act_loss: 0.01526641845703125|cri_loss: 0.0093231201171875|unsuper_loss: 0.0
+average reward score: 5.3515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.44%) |Training time=0.49s (20.76%) |Others=0.09 (3.81%)|CurSamplesPerSec=13.61 |AvgSamplesPerSec=14.56
+[2023-04-14 13:26:34,217] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 7618|ppo_ep: 1|act_loss: 0.0286865234375|cri_loss: 0.0150604248046875|unsuper_loss: 0.0
+average reward score: 5.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.31%) |Training time=0.46s (21.55%) |Others=0.09 (4.15%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.56
+[2023-04-14 13:26:36,272] [INFO] [logging.py:96:log_dist] [Rank 0] step=7620, skipped=96, lr=[1.9241306106741956e-07, 1.9241306106741956e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:26:36,290] [INFO] [timer.py:199:stop] epoch=0/micro_step=7620/global_step=7620, RunningAvgSamplesPerSec=105.71308353833173, CurrSamplesPerSec=105.99807301686585, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:26:36,383] [INFO] [logging.py:96:log_dist] [Rank 0] step=7620, skipped=132, lr=[1.0961363454760482e-07, 1.0961363454760482e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7619|ppo_ep: 1|act_loss: 0.0058441162109375|cri_loss: 0.00321197509765625|unsuper_loss: 0.0
+average reward score: 4.8046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.96%) |Training time=0.46s (21.42%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7620|ppo_ep: 1|act_loss: -0.006824493408203125|cri_loss: -0.00334930419921875|unsuper_loss: 0.0
+average reward score: 5.609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.89%) |Training time=0.47s (21.49%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7621|ppo_ep: 1|act_loss: -0.006504058837890625|cri_loss: -0.0030193328857421875|unsuper_loss: 0.0
+average reward score: 5.19921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.91%) |Training time=0.47s (21.47%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7622|ppo_ep: 1|act_loss: 0.010223388671875|cri_loss: 0.005458831787109375|unsuper_loss: 0.0
+average reward score: 5.01953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.04%) |Training time=0.46s (21.33%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7623|ppo_ep: 1|act_loss: 0.007648468017578125|cri_loss: 0.003936767578125|unsuper_loss: 0.0
+average reward score: 5.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.46%) |Training time=0.48s (21.92%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7624|ppo_ep: 1|act_loss: 0.0036411285400390625|cri_loss: 0.002719879150390625|unsuper_loss: 0.0
+average reward score: 5.54296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.55%) |Training time=0.48s (21.86%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7625|ppo_ep: 1|act_loss: -0.0020275115966796875|cri_loss: -0.0009050369262695312|unsuper_loss: 0.0
+average reward score: 5.45703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.42%) |Training time=0.45s (20.93%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7626|ppo_ep: 1|act_loss: 0.0220794677734375|cri_loss: 0.01171112060546875|unsuper_loss: 0.0
+average reward score: 5.48828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.99%) |Training time=0.46s (21.39%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7627|ppo_ep: 1|act_loss: 0.004680633544921875|cri_loss: 0.0029010772705078125|unsuper_loss: 0.0
+average reward score: 5.28515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.02%) |Training time=0.46s (21.36%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7628|ppo_ep: 1|act_loss: 0.018707275390625|cri_loss: 0.00984954833984375|unsuper_loss: 0.0
+average reward score: 4.9140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.93%) |Training time=0.46s (21.45%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56
+[2023-04-14 13:26:57,936] [INFO] [logging.py:96:log_dist] [Rank 0] step=7630, skipped=96, lr=[1.8725382296377066e-07, 1.8725382296377066e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:26:57,955] [INFO] [timer.py:199:stop] epoch=0/micro_step=7630/global_step=7630, RunningAvgSamplesPerSec=105.71314051584926, CurrSamplesPerSec=106.23861904871212, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:26:58,047] [INFO] [logging.py:96:log_dist] [Rank 0] step=7630, skipped=132, lr=[1.0681256153060565e-07, 1.0681256153060565e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7629|ppo_ep: 1|act_loss: -0.0301513671875|cri_loss: -0.0148773193359375|unsuper_loss: 0.0
+average reward score: 5.94140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.95%) |Training time=0.46s (21.45%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7630|ppo_ep: 1|act_loss: -0.017547607421875|cri_loss: -0.008636474609375|unsuper_loss: 0.0
+average reward score: 5.5234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.27%) |Training time=0.46s (21.12%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7631|ppo_ep: 1|act_loss: 0.0010089874267578125|cri_loss: 0.0006146430969238281|unsuper_loss: 0.0
+average reward score: 5.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.46%) |Training time=0.46s (20.99%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7632|ppo_ep: 1|act_loss: -0.004638671875|cri_loss: -0.002239227294921875|unsuper_loss: 0.0
+average reward score: 5.6640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.24%) |Training time=0.46s (20.33%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7633|ppo_ep: 1|act_loss: 0.0055999755859375|cri_loss: 0.0029544830322265625|unsuper_loss: 0.0
+average reward score: 5.4921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.68%) |Training time=0.47s (20.03%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7634|ppo_ep: 1|act_loss: 0.028350830078125|cri_loss: 0.014739990234375|unsuper_loss: 0.0
+average reward score: 5.52734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.74%) |Training time=0.47s (21.65%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7635|ppo_ep: 1|act_loss: 0.00738525390625|cri_loss: 0.00392913818359375|unsuper_loss: 0.0
+average reward score: 4.8828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.13%) |Training time=0.46s (21.11%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7636|ppo_ep: 1|act_loss: 0.0267791748046875|cri_loss: 0.01430511474609375|unsuper_loss: 0.0
+average reward score: 5.1015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.11%) |Training time=0.46s (21.25%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7637|ppo_ep: 1|act_loss: 0.01326751708984375|cri_loss: 0.00701904296875|unsuper_loss: 0.0
+average reward score: 5.26171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.25%) |Training time=0.46s (21.15%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7638|ppo_ep: 1|act_loss: 0.0040130615234375|cri_loss: 0.0021495819091796875|unsuper_loss: 0.0
+average reward score: 5.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.44%) |Training time=0.46s (20.94%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56
+[2023-04-14 13:27:19,954] [INFO] [logging.py:96:log_dist] [Rank 0] step=7640, skipped=96, lr=[1.8216332744641935e-07, 1.8216332744641935e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:27:19,973] [INFO] [timer.py:199:stop] epoch=0/micro_step=7640/global_step=7640, RunningAvgSamplesPerSec=105.71413966238659, CurrSamplesPerSec=101.97505208246275, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:27:20,065] [INFO] [logging.py:96:log_dist] [Rank 0] step=7640, skipped=132, lr=[1.0404696132469445e-07, 1.0404696132469445e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7639|ppo_ep: 1|act_loss: 0.00672149658203125|cri_loss: 0.00396728515625|unsuper_loss: 0.0
+average reward score: 6.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.54%) |Training time=0.48s (21.90%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7640|ppo_ep: 1|act_loss: 0.00762939453125|cri_loss: 0.00408935546875|unsuper_loss: 0.0
+average reward score: 5.82421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.46s (21.58%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7641|ppo_ep: 1|act_loss: -0.010955810546875|cri_loss: -0.00516510009765625|unsuper_loss: 0.0
+average reward score: 5.5234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.01%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7642|ppo_ep: 1|act_loss: -0.00540924072265625|cri_loss: -0.002593994140625|unsuper_loss: 0.0
+average reward score: 5.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.47%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7643|ppo_ep: 1|act_loss: 0.006183624267578125|cri_loss: 0.0033664703369140625|unsuper_loss: 0.0
+average reward score: 5.4453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.33%) |Training time=0.48s (22.05%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7644|ppo_ep: 1|act_loss: 0.01395416259765625|cri_loss: 0.00766754150390625|unsuper_loss: 0.0
+average reward score: 5.77734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.98%) |Training time=0.46s (21.36%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7645|ppo_ep: 1|act_loss: -0.030029296875|cri_loss: -0.01447296142578125|unsuper_loss: 0.0
+average reward score: 5.91015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.59%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7646|ppo_ep: 1|act_loss: -0.01416015625|cri_loss: -0.0068206787109375|unsuper_loss: 0.0
+average reward score: 5.796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.88%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7647|ppo_ep: 1|act_loss: 0.0216522216796875|cri_loss: 0.01099395751953125|unsuper_loss: 0.0
+average reward score: 5.23828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.48s (21.94%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7648|ppo_ep: 1|act_loss: 0.013641357421875|cri_loss: 0.0073089599609375|unsuper_loss: 0.0
+average reward score: 5.34375
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.50%) |Training time=0.50s (21.24%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.57 |AvgSamplesPerSec=14.56
+[2023-04-14 13:27:41,752] [INFO] [logging.py:96:log_dist] [Rank 0] step=7650, skipped=96, lr=[1.7714164996878907e-07, 1.7714164996878907e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:27:41,770] [INFO] [timer.py:199:stop] epoch=0/micro_step=7650/global_step=7650, RunningAvgSamplesPerSec=105.71081829530702, CurrSamplesPerSec=105.03040784290403, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:27:41,863] [INFO] [logging.py:96:log_dist] [Rank 0] step=7650, skipped=132, lr=[1.0131687492273862e-07, 1.0131687492273862e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7649|ppo_ep: 1|act_loss: 0.0198211669921875|cri_loss: 0.01026153564453125|unsuper_loss: 0.0
+average reward score: 5.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.67%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7650|ppo_ep: 1|act_loss: -0.01372528076171875|cri_loss: -0.006732940673828125|unsuper_loss: 0.0
+average reward score: 5.77734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.83%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
+[2023-04-14 13:27:46,070] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096
+epoch: 0|step: 7651|ppo_ep: 1|act_loss: 0.006175994873046875|cri_loss: 0.0032939910888671875|unsuper_loss: 0.0
+average reward score: 5.40625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.55%) |Training time=0.44s (20.78%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7652|ppo_ep: 1|act_loss: -0.00801849365234375|cri_loss: -0.003887176513671875|unsuper_loss: 0.0
+average reward score: 5.17578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.65%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7653|ppo_ep: 1|act_loss: -0.01218414306640625|cri_loss: -0.0047760009765625|unsuper_loss: 0.0
+average reward score: 5.953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.18%) |Training time=0.48s (22.15%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7654|ppo_ep: 1|act_loss: -0.01444244384765625|cri_loss: -0.007080078125|unsuper_loss: 0.0
+average reward score: 5.1875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.07%) |Training time=0.46s (21.27%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7655|ppo_ep: 1|act_loss: 0.0143585205078125|cri_loss: 0.007495880126953125|unsuper_loss: 0.0
+average reward score: 5.27734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.07%) |Training time=0.49s (22.41%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7656|ppo_ep: 1|act_loss: -0.0041656494140625|cri_loss: -0.001190185546875|unsuper_loss: 0.0
+average reward score: 5.484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.80%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7657|ppo_ep: 1|act_loss: 0.039520263671875|cri_loss: 0.022735595703125|unsuper_loss: 0.0
+average reward score: 6.01171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.70%) |Training time=0.47s (21.74%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7658|ppo_ep: 1|act_loss: -0.0272674560546875|cri_loss: -0.01178741455078125|unsuper_loss: 0.0
+average reward score: 5.625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.68%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56
+[2023-04-14 13:28:03,367] [INFO] [logging.py:96:log_dist] [Rank 0] step=7660, skipped=97, lr=[1.7268104120316125e-07, 1.7268104120316125e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:28:03,385] [INFO] [timer.py:199:stop] epoch=0/micro_step=7660/global_step=7660, RunningAvgSamplesPerSec=105.70880371115669, CurrSamplesPerSec=103.66849002303269, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:28:03,480] [INFO] [logging.py:96:log_dist] [Rank 0] step=7660, skipped=132, lr=[9.862234279120419e-08, 9.862234279120419e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7659|ppo_ep: 1|act_loss: 0.022857666015625|cri_loss: 0.0119781494140625|unsuper_loss: 0.0
+average reward score: 4.9453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.81%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7660|ppo_ep: 1|act_loss: -0.0127716064453125|cri_loss: -0.00572967529296875|unsuper_loss: 0.0
+average reward score: 5.04296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.72%) |Training time=0.46s (21.39%) |Others=0.11 (4.89%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7661|ppo_ep: 1|act_loss: -0.017333984375|cri_loss: -0.00823974609375|unsuper_loss: 0.0
+average reward score: 4.890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.66s (72.76%) |Training time=0.51s (22.55%) |Others=0.11 (4.69%)|CurSamplesPerSec=14.05 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7662|ppo_ep: 1|act_loss: 0.043792724609375|cri_loss: 0.0233612060546875|unsuper_loss: 0.0
+average reward score: 5.37109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.85%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7663|ppo_ep: 1|act_loss: -0.024261474609375|cri_loss: -0.0117645263671875|unsuper_loss: 0.0
+average reward score: 5.40625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.87%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7664|ppo_ep: 1|act_loss: 0.0703125|cri_loss: 0.03778076171875|unsuper_loss: 0.0
+average reward score: 5.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.49%) |Training time=0.53s (24.00%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.40 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7665|ppo_ep: 1|act_loss: 0.0843505859375|cri_loss: 0.0477294921875|unsuper_loss: 0.0
+average reward score: 4.77734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.83%) |Training time=0.47s (21.62%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7666|ppo_ep: 1|act_loss: -0.00772857666015625|cri_loss: -0.0037326812744140625|unsuper_loss: 0.0
+average reward score: 5.58203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.30%) |Training time=0.48s (22.03%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7667|ppo_ep: 1|act_loss: 0.020172119140625|cri_loss: 0.0110626220703125|unsuper_loss: 0.0
+average reward score: 5.43359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.70%) |Training time=0.47s (21.76%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7668|ppo_ep: 1|act_loss: 0.0711669921875|cri_loss: 0.037109375|unsuper_loss: 0.0
+average reward score: 5.53515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.49%) |Training time=0.48s (21.88%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56
+[2023-04-14 13:28:25,190] [INFO] [logging.py:96:log_dist] [Rank 0] step=7670, skipped=97, lr=[1.677903222209867e-07, 1.677903222209867e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:28:25,208] [INFO] [timer.py:199:stop] epoch=0/micro_step=7670/global_step=7670, RunningAvgSamplesPerSec=105.70321438555533, CurrSamplesPerSec=105.26079819809787, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:28:25,301] [INFO] [logging.py:96:log_dist] [Rank 0] step=7670, skipped=132, lr=[9.596340486955818e-08, 9.596340486955818e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7669|ppo_ep: 1|act_loss: -0.00418853759765625|cri_loss: -0.001926422119140625|unsuper_loss: 0.0
+average reward score: 5.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.97%) |Training time=0.47s (21.51%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7670|ppo_ep: 1|act_loss: -0.0084381103515625|cri_loss: -0.003810882568359375|unsuper_loss: 0.0
+average reward score: 5.91796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.55%) |Training time=0.47s (21.84%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7671|ppo_ep: 1|act_loss: -0.00292205810546875|cri_loss: -0.001384735107421875|unsuper_loss: 0.0
+average reward score: 5.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.74%) |Training time=0.47s (21.71%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7672|ppo_ep: 1|act_loss: -0.005619049072265625|cri_loss: -0.0026702880859375|unsuper_loss: 0.0
+average reward score: 5.44140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.71%) |Training time=0.47s (21.66%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7673|ppo_ep: 1|act_loss: -0.0075225830078125|cri_loss: -0.003704071044921875|unsuper_loss: 0.0
+average reward score: 5.58203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.55%) |Training time=0.47s (21.88%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7674|ppo_ep: 1|act_loss: -0.0134735107421875|cri_loss: -0.006656646728515625|unsuper_loss: 0.0
+average reward score: 4.83984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.75%) |Training time=0.47s (21.62%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7675|ppo_ep: 1|act_loss: 0.0252227783203125|cri_loss: 0.01290130615234375|unsuper_loss: 0.0
+average reward score: 5.35546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.26%) |Training time=0.46s (21.20%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7676|ppo_ep: 1|act_loss: -0.00504302978515625|cri_loss: -0.0024242401123046875|unsuper_loss: 0.0
+average reward score: 6.1875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.47%) |Training time=0.45s (20.89%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7677|ppo_ep: 1|act_loss: -0.0011739730834960938|cri_loss: -0.0005507469177246094|unsuper_loss: 0.0
+average reward score: 5.24609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.46s (21.18%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7678|ppo_ep: 1|act_loss: -0.03155517578125|cri_loss: -0.0152740478515625|unsuper_loss: 0.0
+average reward score: 5.546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.47%) |Training time=0.45s (20.87%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.56
+[2023-04-14 13:28:46,830] [INFO] [logging.py:96:log_dist] [Rank 0] step=7680, skipped=97, lr=[1.629686343211429e-07, 1.629686343211429e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:28:46,848] [INFO] [timer.py:199:stop] epoch=0/micro_step=7680/global_step=7680, RunningAvgSamplesPerSec=105.70482233763329, CurrSamplesPerSec=112.06785304851931, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:28:46,941] [INFO] [logging.py:96:log_dist] [Rank 0] step=7680, skipped=132, lr=[9.334010056967408e-08, 9.334010056967408e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7679|ppo_ep: 1|act_loss: -0.0015592575073242188|cri_loss: -0.0006017684936523438|unsuper_loss: 0.0
+average reward score: 5.12890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.34%) |Training time=0.49s (22.17%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7680|ppo_ep: 1|act_loss: 0.0016498565673828125|cri_loss: 0.0011415481567382812|unsuper_loss: 0.0
+average reward score: 5.609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.42%) |Training time=0.45s (20.93%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7681|ppo_ep: 1|act_loss: 0.012451171875|cri_loss: 0.006500244140625|unsuper_loss: 0.0
+average reward score: 6.28125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.25%) |Training time=0.46s (21.21%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7682|ppo_ep: 1|act_loss: 0.0462646484375|cri_loss: 0.0240325927734375|unsuper_loss: 0.0
+average reward score: 6.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.40%) |Training time=0.45s (20.95%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7683|ppo_ep: 1|act_loss: -0.0667724609375|cri_loss: -0.01611328125|unsuper_loss: 0.0
+average reward score: 5.12890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.46%) |Training time=0.45s (20.97%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7684|ppo_ep: 1|act_loss: -0.0005736351013183594|cri_loss: -0.00011801719665527344|unsuper_loss: 0.0
+average reward score: 5.16796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.85%) |Training time=0.47s (21.55%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7685|ppo_ep: 1|act_loss: 0.015594482421875|cri_loss: 0.0088348388671875|unsuper_loss: 0.0
+average reward score: 6.34765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.08%) |Training time=0.46s (21.39%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7686|ppo_ep: 1|act_loss: -0.006443023681640625|cri_loss: -0.0030460357666015625|unsuper_loss: 0.0
+average reward score: 5.4921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.61%) |Training time=0.47s (21.66%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7687|ppo_ep: 1|act_loss: 0.0017538070678710938|cri_loss: 0.0012903213500976562|unsuper_loss: 0.0
+average reward score: 5.6875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.76%) |Training time=0.47s (21.69%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7688|ppo_ep: 1|act_loss: -0.0147247314453125|cri_loss: -0.0071258544921875|unsuper_loss: 0.0
+average reward score: 5.1796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.82%) |Training time=0.47s (21.55%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56
+[2023-04-14 13:29:08,495] [INFO] [logging.py:96:log_dist] [Rank 0] step=7690, skipped=97, lr=[1.5821604897267761e-07, 1.5821604897267761e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:29:08,514] [INFO] [timer.py:199:stop] epoch=0/micro_step=7690/global_step=7690, RunningAvgSamplesPerSec=105.70594568464799, CurrSamplesPerSec=104.29189793176543, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:29:08,609] [INFO] [logging.py:96:log_dist] [Rank 0] step=7690, skipped=132, lr=[9.075246877525034e-08, 9.075246877525034e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7689|ppo_ep: 1|act_loss: -0.00147247314453125|cri_loss: 0.00119781494140625|unsuper_loss: 0.0
+average reward score: 5.33984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.80%) |Training time=0.47s (21.60%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7690|ppo_ep: 1|act_loss: 0.002208709716796875|cri_loss: 0.0012598037719726562|unsuper_loss: 0.0
+average reward score: 5.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.63%) |Training time=0.47s (21.52%) |Others=0.11 (4.85%)|CurSamplesPerSec=14.54 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7691|ppo_ep: 1|act_loss: 0.0294189453125|cri_loss: 0.014984130859375|unsuper_loss: 0.0
+average reward score: 5.1640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.71s (73.93%) |Training time=0.50s (21.81%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7692|ppo_ep: 1|act_loss: 0.0054931640625|cri_loss: 0.0031280517578125|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.76%) |Training time=0.48s (21.69%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.54 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7693|ppo_ep: 1|act_loss: 0.051361083984375|cri_loss: 0.02728271484375|unsuper_loss: 0.0
+average reward score: 5.859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.00%) |Training time=0.47s (21.47%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7694|ppo_ep: 1|act_loss: 0.024505615234375|cri_loss: 0.0128173828125|unsuper_loss: 0.0
+average reward score: 5.4296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.84%) |Training time=0.47s (21.53%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7695|ppo_ep: 1|act_loss: -0.010711669921875|cri_loss: -0.0051727294921875|unsuper_loss: 0.0
+average reward score: 5.23046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.10%) |Training time=0.49s (20.74%) |Others=0.10 (4.16%)|CurSamplesPerSec=13.55 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7696|ppo_ep: 1|act_loss: -0.0024051666259765625|cri_loss: -0.0011453628540039062|unsuper_loss: 0.0
+average reward score: 5.12890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.94%) |Training time=0.47s (21.44%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7697|ppo_ep: 1|act_loss: 0.009765625|cri_loss: 0.004962921142578125|unsuper_loss: 0.0
+average reward score: 4.58203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.14%) |Training time=0.46s (21.29%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7698|ppo_ep: 1|act_loss: 0.045196533203125|cri_loss: 0.02325439453125|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.31%) |Training time=0.46s (21.06%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.56
+[2023-04-14 13:29:30,655] [INFO] [logging.py:96:log_dist] [Rank 0] step=7700, skipped=97, lr=[1.5353263662036897e-07, 1.5353263662036897e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:29:30,673] [INFO] [timer.py:199:stop] epoch=0/micro_step=7700/global_step=7700, RunningAvgSamplesPerSec=105.7013104552143, CurrSamplesPerSec=98.56926587346759, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:29:30,766] [INFO] [logging.py:96:log_dist] [Rank 0] step=7700, skipped=132, lr=[8.820054784123288e-08, 8.820054784123288e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7699|ppo_ep: 1|act_loss: 0.003551483154296875|cri_loss: 0.0018930435180664062|unsuper_loss: 0.0
+average reward score: 5.65234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.31%) |Training time=0.49s (22.24%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7700|ppo_ep: 1|act_loss: -0.022552490234375|cri_loss: -0.009979248046875|unsuper_loss: 0.0
+average reward score: 5.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.46s (21.38%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7701|ppo_ep: 1|act_loss: 0.0167694091796875|cri_loss: 0.00882720947265625|unsuper_loss: 0.0
+average reward score: 5.36328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.75%) |Training time=0.47s (21.64%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7702|ppo_ep: 1|act_loss: -0.024078369140625|cri_loss: -0.01169586181640625|unsuper_loss: 0.0
+average reward score: 5.09375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (21.99%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7703|ppo_ep: 1|act_loss: -0.004787445068359375|cri_loss: -0.00232696533203125|unsuper_loss: 0.0
+average reward score: 5.28125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.13%) |Training time=0.49s (22.36%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7704|ppo_ep: 1|act_loss: -0.046112060546875|cri_loss: -0.019561767578125|unsuper_loss: 0.0
+average reward score: 6.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.56%) |Training time=0.47s (21.80%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7705|ppo_ep: 1|act_loss: -0.0125885009765625|cri_loss: -0.00618743896484375|unsuper_loss: 0.0
+average reward score: 5.00390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.47%) |Training time=0.48s (22.00%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7706|ppo_ep: 1|act_loss: -0.0083465576171875|cri_loss: -0.0037059783935546875|unsuper_loss: 0.0
+average reward score: 5.76953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.12%) |Training time=0.48s (22.27%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7707|ppo_ep: 1|act_loss: -0.0010509490966796875|cri_loss: -0.0004482269287109375|unsuper_loss: 0.0
+average reward score: 5.61328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.48s (21.95%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7708|ppo_ep: 1|act_loss: -0.00623321533203125|cri_loss: -0.002918243408203125|unsuper_loss: 0.0
+average reward score: 5.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.03%) |Training time=0.49s (22.34%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.56
+[2023-04-14 13:29:52,369] [INFO] [logging.py:96:log_dist] [Rank 0] step=7710, skipped=97, lr=[1.489184666836874e-07, 1.489184666836874e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:29:52,387] [INFO] [timer.py:199:stop] epoch=0/micro_step=7710/global_step=7710, RunningAvgSamplesPerSec=105.69619158975686, CurrSamplesPerSec=102.90253227929671, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:29:52,479] [INFO] [logging.py:96:log_dist] [Rank 0] step=7710, skipped=132, lr=[8.568437559324572e-08, 8.568437559324572e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7709|ppo_ep: 1|act_loss: 0.001888275146484375|cri_loss: 0.00121307373046875|unsuper_loss: 0.0
+average reward score: 5.328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.66%) |Training time=0.47s (21.82%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7710|ppo_ep: 1|act_loss: 0.0034389495849609375|cri_loss: 0.0018596649169921875|unsuper_loss: 0.0
+average reward score: 5.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.81%) |Training time=0.49s (20.76%) |Others=0.10 (4.43%)|CurSamplesPerSec=13.57 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7711|ppo_ep: 1|act_loss: 0.0123748779296875|cri_loss: 0.006412506103515625|unsuper_loss: 0.0
+average reward score: 4.93359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.85%) |Training time=0.47s (21.61%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7712|ppo_ep: 1|act_loss: 0.006988525390625|cri_loss: 0.00377655029296875|unsuper_loss: 0.0
+average reward score: 5.87890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.90%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7713|ppo_ep: 1|act_loss: -0.0089569091796875|cri_loss: -0.00347900390625|unsuper_loss: 0.0
+average reward score: 5.6484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.80%) |Training time=0.47s (21.64%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7714|ppo_ep: 1|act_loss: 0.007404327392578125|cri_loss: 0.00394439697265625|unsuper_loss: 0.0
+average reward score: 5.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.35%) |Training time=0.48s (22.01%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7715|ppo_ep: 1|act_loss: 0.020233154296875|cri_loss: 0.0104522705078125|unsuper_loss: 0.0
+average reward score: 5.3515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.74%) |Training time=0.47s (21.72%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7716|ppo_ep: 1|act_loss: 0.0013427734375|cri_loss: 0.000820159912109375|unsuper_loss: 0.0
+average reward score: 5.015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.87%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7717|ppo_ep: 1|act_loss: -0.0102996826171875|cri_loss: -0.0048675537109375|unsuper_loss: 0.0
+average reward score: 5.0078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.96%) |Training time=0.49s (22.51%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7718|ppo_ep: 1|act_loss: 0.0143280029296875|cri_loss: 0.00757598876953125|unsuper_loss: 0.0
+average reward score: 5.59375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.98%) |Training time=0.48s (22.38%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56
+[2023-04-14 13:30:14,266] [INFO] [logging.py:96:log_dist] [Rank 0] step=7720, skipped=97, lr=[1.4437360755576166e-07, 1.4437360755576166e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:30:14,285] [INFO] [timer.py:199:stop] epoch=0/micro_step=7720/global_step=7720, RunningAvgSamplesPerSec=105.68932583761074, CurrSamplesPerSec=93.3791178439091, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:30:14,369] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+[2023-04-14 13:30:14,369] [INFO] [logging.py:96:log_dist] [Rank 0] step=7720, skipped=133, lr=[8.345041653250202e-08, 8.345041653250202e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7719|ppo_ep: 1|act_loss: 0.0103302001953125|cri_loss: 0.005672454833984375|unsuper_loss: 0.0
+average reward score: 5.03515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.82%) |Training time=0.50s (23.17%) |Others=0.09 (4.02%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.56
+[2023-04-14 13:30:16,708] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 7720|ppo_ep: 1|act_loss: 0.002162933349609375|cri_loss: 0.00196075439453125|unsuper_loss: 0.0
+average reward score: 5.2109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.69s (72.27%) |Training time=0.56s (23.90%) |Others=0.09 (3.83%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7721|ppo_ep: 1|act_loss: -0.01486968994140625|cri_loss: -0.0072174072265625|unsuper_loss: 0.0
+average reward score: 5.69140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.93%) |Training time=0.49s (22.55%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7722|ppo_ep: 1|act_loss: -0.00432586669921875|cri_loss: -0.0019664764404296875|unsuper_loss: 0.0
+average reward score: 5.30859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.66%) |Training time=0.49s (22.74%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7723|ppo_ep: 1|act_loss: 0.0094451904296875|cri_loss: 0.005096435546875|unsuper_loss: 0.0
+average reward score: 5.671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.84%) |Training time=0.47s (21.67%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7724|ppo_ep: 1|act_loss: -2.2649765014648438e-05|cri_loss: 0.0001900196075439453|unsuper_loss: 0.0
+average reward score: 4.9609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (22.01%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7725|ppo_ep: 1|act_loss: 0.022003173828125|cri_loss: 0.011322021484375|unsuper_loss: 0.0
+average reward score: 5.52734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.77s (74.95%) |Training time=0.49s (20.88%) |Others=0.10 (4.16%)|CurSamplesPerSec=13.57 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7726|ppo_ep: 1|act_loss: -0.0091705322265625|cri_loss: -0.0045166015625|unsuper_loss: 0.0
+average reward score: 5.671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.31%) |Training time=0.46s (21.06%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7727|ppo_ep: 1|act_loss: 0.0120849609375|cri_loss: 0.0061798095703125|unsuper_loss: 0.0
+average reward score: 5.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.73%) |Training time=0.45s (20.72%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7728|ppo_ep: 1|act_loss: -0.005695343017578125|cri_loss: -0.0025634765625|unsuper_loss: 0.0
+average reward score: 5.38671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.17%) |Training time=0.46s (21.19%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56
+[2023-04-14 13:30:36,328] [INFO] [logging.py:96:log_dist] [Rank 0] step=7730, skipped=97, lr=[1.398981266023686e-07, 1.398981266023686e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:30:36,346] [INFO] [timer.py:199:stop] epoch=0/micro_step=7730/global_step=7730, RunningAvgSamplesPerSec=105.68184628578439, CurrSamplesPerSec=101.17140408416168, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:30:36,439] [INFO] [logging.py:96:log_dist] [Rank 0] step=7730, skipped=134, lr=[8.124547094506025e-08, 8.124547094506025e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7729|ppo_ep: 1|act_loss: 0.00047397613525390625|cri_loss: 0.00033283233642578125|unsuper_loss: 0.0
+average reward score: 5.609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.67%) |Training time=0.48s (21.85%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7730|ppo_ep: 1|act_loss: 0.013671875|cri_loss: 0.006999969482421875|unsuper_loss: 0.0
+average reward score: 5.87109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.29%) |Training time=0.45s (20.99%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7731|ppo_ep: 1|act_loss: -0.02764892578125|cri_loss: -0.01317596435546875|unsuper_loss: 0.0
+average reward score: 5.47265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.26%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7732|ppo_ep: 1|act_loss: -0.032623291015625|cri_loss: -0.0156707763671875|unsuper_loss: 0.0
+average reward score: 5.41796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.18%) |Training time=0.46s (21.20%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7733|ppo_ep: 1|act_loss: -0.005161285400390625|cri_loss: -0.002452850341796875|unsuper_loss: 0.0
+average reward score: 5.03515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.67%) |Training time=0.45s (20.77%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7734|ppo_ep: 1|act_loss: 0.048309326171875|cri_loss: 0.0247039794921875|unsuper_loss: 0.0
+average reward score: 5.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.44%) |Training time=0.45s (20.94%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7735|ppo_ep: 1|act_loss: -0.01873779296875|cri_loss: -0.0092926025390625|unsuper_loss: 0.0
+average reward score: 5.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.31%) |Training time=0.46s (21.14%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7736|ppo_ep: 1|act_loss: -0.00949859619140625|cri_loss: -0.0045318603515625|unsuper_loss: 0.0
+average reward score: 6.01171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.20%) |Training time=0.46s (21.18%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7737|ppo_ep: 1|act_loss: -0.021820068359375|cri_loss: -0.004364013671875|unsuper_loss: 0.0
+average reward score: 5.234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.50%) |Training time=0.45s (20.94%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7738|ppo_ep: 1|act_loss: -0.026641845703125|cri_loss: -0.013214111328125|unsuper_loss: 0.0
+average reward score: 5.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.18%) |Training time=0.46s (21.21%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
+[2023-04-14 13:30:57,937] [INFO] [logging.py:96:log_dist] [Rank 0] step=7740, skipped=97, lr=[1.354920901609319e-07, 1.354920901609319e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:30:57,955] [INFO] [timer.py:199:stop] epoch=0/micro_step=7740/global_step=7740, RunningAvgSamplesPerSec=105.68622715622114, CurrSamplesPerSec=109.06858467666899, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:30:58,048] [INFO] [logging.py:96:log_dist] [Rank 0] step=7740, skipped=134, lr=[7.882959173781374e-08, 7.882959173781374e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7739|ppo_ep: 1|act_loss: -0.001194000244140625|cri_loss: -0.0004506111145019531|unsuper_loss: 0.0
+average reward score: 5.8203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.33%) |Training time=0.46s (21.12%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7740|ppo_ep: 1|act_loss: 0.00063323974609375|cri_loss: 0.0004687309265136719|unsuper_loss: 0.0
+average reward score: 6.19140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.61%) |Training time=0.47s (20.11%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.62 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7741|ppo_ep: 1|act_loss: 0.02520751953125|cri_loss: 0.012786865234375|unsuper_loss: 0.0
+average reward score: 5.8515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.28%) |Training time=0.46s (21.19%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7742|ppo_ep: 1|act_loss: -0.0069122314453125|cri_loss: -0.0032215118408203125|unsuper_loss: 0.0
+average reward score: 5.71875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.31%) |Training time=0.46s (21.07%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7743|ppo_ep: 1|act_loss: -0.00384521484375|cri_loss: -0.0016021728515625|unsuper_loss: 0.0
+average reward score: 4.95703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.24%) |Training time=0.46s (21.21%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7744|ppo_ep: 1|act_loss: 0.006366729736328125|cri_loss: 0.0032444000244140625|unsuper_loss: 0.0
+average reward score: 5.375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.45%) |Training time=0.45s (20.83%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7745|ppo_ep: 1|act_loss: 0.0296478271484375|cri_loss: 0.0156097412109375|unsuper_loss: 0.0
+average reward score: 5.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.11%) |Training time=0.46s (21.34%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7746|ppo_ep: 1|act_loss: -0.00768280029296875|cri_loss: -0.0029754638671875|unsuper_loss: 0.0
+average reward score: 5.71875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.07%) |Training time=0.46s (21.31%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7747|ppo_ep: 1|act_loss: 0.0009546279907226562|cri_loss: 0.002101898193359375|unsuper_loss: 0.0
+average reward score: 4.83203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.46%) |Training time=0.46s (21.03%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7748|ppo_ep: 1|act_loss: 0.00020587444305419922|cri_loss: 0.00020742416381835938|unsuper_loss: 0.0
+average reward score: 5.21875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.90%) |Training time=0.47s (21.52%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.56
+[2023-04-14 13:31:19,832] [INFO] [logging.py:96:log_dist] [Rank 0] step=7750, skipped=97, lr=[1.3115556353954087e-07, 1.3115556353954087e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:31:19,851] [INFO] [timer.py:199:stop] epoch=0/micro_step=7750/global_step=7750, RunningAvgSamplesPerSec=105.68781927307126, CurrSamplesPerSec=104.16773291257819, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:31:19,943] [INFO] [logging.py:96:log_dist] [Rank 0] step=7750, skipped=134, lr=[7.644960011677305e-08, 7.644960011677305e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7749|ppo_ep: 1|act_loss: 0.00521087646484375|cri_loss: 0.0029754638671875|unsuper_loss: 0.0
+average reward score: 5.125
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.25%) |Training time=0.47s (21.31%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7750|ppo_ep: 1|act_loss: 0.004547119140625|cri_loss: 0.0027065277099609375|unsuper_loss: 0.0
+average reward score: 5.1015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.28%) |Training time=0.46s (20.31%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7751|ppo_ep: 1|act_loss: -0.00215911865234375|cri_loss: -0.00099945068359375|unsuper_loss: 0.0
+average reward score: 5.48828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.16%) |Training time=0.46s (21.32%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7752|ppo_ep: 1|act_loss: 0.0113067626953125|cri_loss: 0.0062255859375|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.09%) |Training time=0.46s (21.28%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7753|ppo_ep: 1|act_loss: -0.0093536376953125|cri_loss: -0.004302978515625|unsuper_loss: 0.0
+average reward score: 5.34375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.37%) |Training time=0.46s (21.10%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7754|ppo_ep: 1|act_loss: 0.0090179443359375|cri_loss: 0.004871368408203125|unsuper_loss: 0.0
+average reward score: 6.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.28%) |Training time=0.46s (21.09%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7755|ppo_ep: 1|act_loss: 0.05841064453125|cri_loss: 0.031768798828125|unsuper_loss: 0.0
+average reward score: 5.1953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.84%) |Training time=0.47s (19.97%) |Others=0.10 (4.19%)|CurSamplesPerSec=13.65 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7756|ppo_ep: 1|act_loss: -0.0202178955078125|cri_loss: -0.0099334716796875|unsuper_loss: 0.0
+average reward score: 5.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.11%) |Training time=0.46s (21.27%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7757|ppo_ep: 1|act_loss: 0.00994873046875|cri_loss: 0.00514984130859375|unsuper_loss: 0.0
+average reward score: 5.3125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.32%) |Training time=0.46s (21.14%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7758|ppo_ep: 1|act_loss: 0.0001895427703857422|cri_loss: 0.00017499923706054688|unsuper_loss: 0.0
+average reward score: 5.12890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.77%) |Training time=0.47s (21.61%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56
+[2023-04-14 13:31:41,798] [INFO] [logging.py:96:log_dist] [Rank 0] step=7760, skipped=97, lr=[1.2688861101598217e-07, 1.2688861101598217e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:31:41,816] [INFO] [timer.py:199:stop] epoch=0/micro_step=7760/global_step=7760, RunningAvgSamplesPerSec=105.68886827324046, CurrSamplesPerSec=102.76134876744779, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:31:41,909] [INFO] [logging.py:96:log_dist] [Rank 0] step=7760, skipped=134, lr=[7.410553135915572e-08, 7.410553135915572e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7759|ppo_ep: 1|act_loss: -0.013427734375|cri_loss: -0.006565093994140625|unsuper_loss: 0.0
+average reward score: 5.68359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.77%) |Training time=0.47s (21.71%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7760|ppo_ep: 1|act_loss: 0.020599365234375|cri_loss: 0.0105438232421875|unsuper_loss: 0.0
+average reward score: 5.6796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.02%) |Training time=0.46s (21.36%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7761|ppo_ep: 1|act_loss: -0.002109527587890625|cri_loss: -0.0009775161743164062|unsuper_loss: 0.0
+average reward score: 5.94921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.09%) |Training time=0.46s (21.20%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7762|ppo_ep: 1|act_loss: -0.01495361328125|cri_loss: -0.007282257080078125|unsuper_loss: 0.0
+average reward score: 6.00390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.16%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7763|ppo_ep: 1|act_loss: 0.0152587890625|cri_loss: 0.00789642333984375|unsuper_loss: 0.0
+average reward score: 5.45703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.22%) |Training time=0.46s (21.24%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7764|ppo_ep: 1|act_loss: 0.0494384765625|cri_loss: 0.02557373046875|unsuper_loss: 0.0
+average reward score: 5.34375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.22%) |Training time=0.46s (21.14%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7765|ppo_ep: 1|act_loss: -0.00846099853515625|cri_loss: -0.0038127899169921875|unsuper_loss: 0.0
+average reward score: 5.59375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.26%) |Training time=0.46s (21.19%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7766|ppo_ep: 1|act_loss: -0.014495849609375|cri_loss: -0.00702667236328125|unsuper_loss: 0.0
+average reward score: 4.88671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.32%) |Training time=0.45s (21.04%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7767|ppo_ep: 1|act_loss: 0.02105712890625|cri_loss: 0.01104736328125|unsuper_loss: 0.0
+average reward score: 5.6484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.30%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7768|ppo_ep: 1|act_loss: -0.0157470703125|cri_loss: -0.0063934326171875|unsuper_loss: 0.0
+average reward score: 5.47265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.25%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56
+[2023-04-14 13:32:03,410] [INFO] [logging.py:96:log_dist] [Rank 0] step=7770, skipped=97, lr=[1.2269129583678402e-07, 1.2269129583678402e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:32:03,429] [INFO] [timer.py:199:stop] epoch=0/micro_step=7770/global_step=7770, RunningAvgSamplesPerSec=105.69178947512732, CurrSamplesPerSec=107.38045341847963, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:32:03,521] [INFO] [logging.py:96:log_dist] [Rank 0] step=7770, skipped=134, lr=[7.17974202097152e-08, 7.17974202097152e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7769|ppo_ep: 1|act_loss: -0.012664794921875|cri_loss: -0.006145477294921875|unsuper_loss: 0.0
+average reward score: 5.06640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.07%) |Training time=0.46s (21.31%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7770|ppo_ep: 1|act_loss: 0.01064300537109375|cri_loss: 0.0056610107421875|unsuper_loss: 0.0
+average reward score: 5.171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.60s (70.19%) |Training time=0.58s (25.50%) |Others=0.10 (4.31%)|CurSamplesPerSec=14.04 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7771|ppo_ep: 1|act_loss: -0.0009503364562988281|cri_loss: -0.00011730194091796875|unsuper_loss: 0.0
+average reward score: 5.25
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.07%) |Training time=0.46s (21.39%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7772|ppo_ep: 1|act_loss: -0.03314208984375|cri_loss: -0.0156707763671875|unsuper_loss: 0.0
+average reward score: 5.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.10%) |Training time=0.46s (21.27%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7773|ppo_ep: 1|act_loss: -0.012664794921875|cri_loss: -0.006103515625|unsuper_loss: 0.0
+average reward score: 4.87890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.13%) |Training time=0.46s (21.31%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7774|ppo_ep: 1|act_loss: 0.0161590576171875|cri_loss: 0.00826263427734375|unsuper_loss: 0.0
+average reward score: 5.40234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.94%) |Training time=0.47s (21.42%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7775|ppo_ep: 1|act_loss: -0.0257110595703125|cri_loss: -0.0124664306640625|unsuper_loss: 0.0
+average reward score: 4.8984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.43%) |Training time=0.46s (21.06%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7776|ppo_ep: 1|act_loss: -0.032073974609375|cri_loss: -0.014984130859375|unsuper_loss: 0.0
+average reward score: 5.5703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.03%) |Training time=0.46s (21.29%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7777|ppo_ep: 1|act_loss: -0.016937255859375|cri_loss: -0.007793426513671875|unsuper_loss: 0.0
+average reward score: 5.93359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.11%) |Training time=0.46s (21.33%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7778|ppo_ep: 1|act_loss: -0.0005383491516113281|cri_loss: -0.00024020671844482422|unsuper_loss: 0.0
+average reward score: 5.74609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.08%) |Training time=0.46s (21.32%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56
+[2023-04-14 13:32:25,299] [INFO] [logging.py:96:log_dist] [Rank 0] step=7780, skipped=97, lr=[1.1856368021628366e-07, 1.1856368021628366e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:32:25,318] [INFO] [timer.py:199:stop] epoch=0/micro_step=7780/global_step=7780, RunningAvgSamplesPerSec=105.68915657133162, CurrSamplesPerSec=104.81370925924769, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:32:25,410] [INFO] [logging.py:96:log_dist] [Rank 0] step=7780, skipped=134, lr=[6.952530088022713e-08, 6.952530088022713e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7779|ppo_ep: 1|act_loss: 0.00714874267578125|cri_loss: 0.0036907196044921875|unsuper_loss: 0.0
+average reward score: 5.87109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.23%) |Training time=0.47s (20.48%) |Others=0.10 (4.29%)|CurSamplesPerSec=14.01 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7780|ppo_ep: 1|act_loss: -0.004230499267578125|cri_loss: -0.0018758773803710938|unsuper_loss: 0.0
+average reward score: 5.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.24%) |Training time=0.46s (21.13%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7781|ppo_ep: 1|act_loss: 0.04168701171875|cri_loss: 0.0229034423828125|unsuper_loss: 0.0
+average reward score: 5.515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.52%) |Training time=0.45s (20.92%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7782|ppo_ep: 1|act_loss: -0.016357421875|cri_loss: -0.00785064697265625|unsuper_loss: 0.0
+average reward score: 6.0078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (20.97%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7783|ppo_ep: 1|act_loss: 0.010711669921875|cri_loss: 0.005565643310546875|unsuper_loss: 0.0
+average reward score: 5.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.44%) |Training time=0.45s (20.91%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7784|ppo_ep: 1|act_loss: -0.024505615234375|cri_loss: -0.0116729736328125|unsuper_loss: 0.0
+average reward score: 5.15625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.36%) |Training time=0.45s (21.00%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7785|ppo_ep: 1|act_loss: 0.01143646240234375|cri_loss: 0.00632476806640625|unsuper_loss: 0.0
+average reward score: 5.28125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.44%) |Training time=0.45s (20.91%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7786|ppo_ep: 1|act_loss: -0.0153656005859375|cri_loss: -0.007503509521484375|unsuper_loss: 0.0
+average reward score: 5.53515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.46%) |Training time=0.44s (19.20%) |Others=0.10 (4.34%)|CurSamplesPerSec=13.82 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7787|ppo_ep: 1|act_loss: -0.03143310546875|cri_loss: -0.014678955078125|unsuper_loss: 0.0
+average reward score: 5.1328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.37%) |Training time=0.45s (21.07%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7788|ppo_ep: 1|act_loss: -0.008697509765625|cri_loss: -0.0040130615234375|unsuper_loss: 0.0
+average reward score: 5.13671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.91%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.56
+[2023-04-14 13:32:47,019] [INFO] [logging.py:96:log_dist] [Rank 0] step=7790, skipped=97, lr=[1.1450582533570157e-07, 1.1450582533570157e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:32:47,038] [INFO] [timer.py:199:stop] epoch=0/micro_step=7790/global_step=7790, RunningAvgSamplesPerSec=105.69526710763964, CurrSamplesPerSec=110.04501902986237, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:32:47,131] [INFO] [logging.py:96:log_dist] [Rank 0] step=7790, skipped=134, lr=[6.72892070489814e-08, 6.72892070489814e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7789|ppo_ep: 1|act_loss: 0.032135009765625|cri_loss: 0.016845703125|unsuper_loss: 0.0
+average reward score: 5.25390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.39%) |Training time=0.45s (20.95%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7790|ppo_ep: 1|act_loss: 0.01026153564453125|cri_loss: 0.005901336669921875|unsuper_loss: 0.0
+average reward score: 5.9765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.46s (21.17%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56
+[2023-04-14 13:32:51,323] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 7791|ppo_ep: 1|act_loss: 0.0299835205078125|cri_loss: 0.01529693603515625|unsuper_loss: 0.0
+average reward score: 6.15625
+-------------------------------------------------------------------------------------
+|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.65%) |Training time=0.42s (19.73%) |Others=0.10 (4.62%)|CurSamplesPerSec=15.07 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7792|ppo_ep: 1|act_loss: 0.0181121826171875|cri_loss: 0.00939178466796875|unsuper_loss: 0.0
+average reward score: 5.65625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.91%) |Training time=0.44s (20.41%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7793|ppo_ep: 1|act_loss: 0.05157470703125|cri_loss: 0.028289794921875|unsuper_loss: 0.0
+average reward score: 5.875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.64%) |Training time=0.45s (20.79%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7794|ppo_ep: 1|act_loss: 0.0053863525390625|cri_loss: 0.00293731689453125|unsuper_loss: 0.0
+average reward score: 5.12109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.55%) |Training time=0.45s (20.79%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7795|ppo_ep: 1|act_loss: 0.0040130615234375|cri_loss: 0.0021514892578125|unsuper_loss: 0.0
+average reward score: 5.46484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.76%) |Training time=0.44s (20.67%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7796|ppo_ep: 1|act_loss: -0.007541656494140625|cri_loss: -0.0037384033203125|unsuper_loss: 0.0
+average reward score: 5.26171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.53%) |Training time=0.45s (20.82%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7797|ppo_ep: 1|act_loss: 0.0087738037109375|cri_loss: 0.00452423095703125|unsuper_loss: 0.0
+average reward score: 6.40625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.63%) |Training time=0.45s (20.80%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7798|ppo_ep: 1|act_loss: -0.00363922119140625|cri_loss: -0.0017547607421875|unsuper_loss: 0.0
+average reward score: 5.765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.44%) |Training time=0.45s (20.90%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.56
+[2023-04-14 13:33:08,492] [INFO] [logging.py:96:log_dist] [Rank 0] step=7800, skipped=98, lr=[1.1091345110884795e-07, 1.1091345110884795e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:33:08,510] [INFO] [timer.py:199:stop] epoch=0/micro_step=7800/global_step=7800, RunningAvgSamplesPerSec=105.7045714232828, CurrSamplesPerSec=112.16469150844927, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:33:08,603] [INFO] [logging.py:96:log_dist] [Rank 0] step=7800, skipped=134, lr=[6.508917186028196e-08, 6.508917186028196e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7799|ppo_ep: 1|act_loss: 0.03814697265625|cri_loss: 0.020233154296875|unsuper_loss: 0.0
+average reward score: 5.65625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.50%) |Training time=0.45s (20.85%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7800|ppo_ep: 1|act_loss: -0.0081634521484375|cri_loss: -0.003772735595703125|unsuper_loss: 0.0
+average reward score: 4.70703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.48%) |Training time=0.45s (20.82%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7801|ppo_ep: 1|act_loss: -0.009796142578125|cri_loss: -0.0047454833984375|unsuper_loss: 0.0
+average reward score: 5.51171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.42%) |Training time=0.45s (19.35%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.56
+[2023-04-14 13:33:15,111] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096
+epoch: 0|step: 7802|ppo_ep: 1|act_loss: -0.0110321044921875|cri_loss: -0.005035400390625|unsuper_loss: 0.0
+average reward score: 5.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.41%) |Training time=0.42s (19.85%) |Others=0.10 (4.74%)|CurSamplesPerSec=15.08 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7803|ppo_ep: 1|act_loss: 0.02301025390625|cri_loss: 0.01174163818359375|unsuper_loss: 0.0
+average reward score: 5.55078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.80%) |Training time=0.45s (20.66%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7804|ppo_ep: 1|act_loss: 0.0006136894226074219|cri_loss: 0.00036263465881347656|unsuper_loss: 0.0
+average reward score: 5.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.19%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7805|ppo_ep: 1|act_loss: -0.0016498565673828125|cri_loss: -0.0007781982421875|unsuper_loss: 0.0
+average reward score: 6.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.71%) |Training time=0.44s (20.64%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7806|ppo_ep: 1|act_loss: 0.015472412109375|cri_loss: 0.00795745849609375|unsuper_loss: 0.0
+average reward score: 5.4609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.51%) |Training time=0.45s (20.83%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7807|ppo_ep: 1|act_loss: 0.0377197265625|cri_loss: 0.021759033203125|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.23%) |Training time=0.46s (21.22%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7808|ppo_ep: 1|act_loss: -0.02886962890625|cri_loss: -0.014007568359375|unsuper_loss: 0.0
+average reward score: 5.26171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.04%) |Training time=0.47s (21.37%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.56
+[2023-04-14 13:33:30,325] [INFO] [logging.py:96:log_dist] [Rank 0] step=7810, skipped=99, lr=[1.073776749444405e-07, 1.073776749444405e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:33:30,344] [INFO] [timer.py:199:stop] epoch=0/micro_step=7810/global_step=7810, RunningAvgSamplesPerSec=105.71154895925527, CurrSamplesPerSec=110.02490234735241, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:33:30,436] [INFO] [logging.py:96:log_dist] [Rank 0] step=7810, skipped=134, lr=[6.292522792395812e-08, 6.292522792395812e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7809|ppo_ep: 1|act_loss: -0.0090484619140625|cri_loss: -0.004367828369140625|unsuper_loss: 0.0
+average reward score: 5.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.65%) |Training time=0.45s (20.02%) |Others=0.10 (4.33%)|CurSamplesPerSec=14.13 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7810|ppo_ep: 1|act_loss: 0.044677734375|cri_loss: 0.022857666015625|unsuper_loss: 0.0
+average reward score: 5.76953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.90%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7811|ppo_ep: 1|act_loss: -0.0088348388671875|cri_loss: -0.00435638427734375|unsuper_loss: 0.0
+average reward score: 5.28515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.79%) |Training time=0.44s (20.62%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7812|ppo_ep: 1|act_loss: -0.028411865234375|cri_loss: -0.01348114013671875|unsuper_loss: 0.0
+average reward score: 5.10546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.80%) |Training time=0.44s (20.54%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7813|ppo_ep: 1|act_loss: -0.03485107421875|cri_loss: -0.0166015625|unsuper_loss: 0.0
+average reward score: 5.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.51%) |Training time=0.45s (20.84%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7814|ppo_ep: 1|act_loss: 0.0007061958312988281|cri_loss: 0.0004711151123046875|unsuper_loss: 0.0
+average reward score: 5.078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.52%) |Training time=0.45s (20.83%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7815|ppo_ep: 1|act_loss: -0.08453369140625|cri_loss: -0.03948974609375|unsuper_loss: 0.0
+average reward score: 5.54296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.42%) |Training time=0.46s (20.95%) |Others=0.15 (6.64%)|CurSamplesPerSec=14.43 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7816|ppo_ep: 1|act_loss: 0.001010894775390625|cri_loss: 0.0006361007690429688|unsuper_loss: 0.0
+average reward score: 4.92578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.67s (75.08%) |Training time=0.45s (20.05%) |Others=0.11 (4.87%)|CurSamplesPerSec=14.36 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7817|ppo_ep: 1|act_loss: -0.04296875|cri_loss: -0.021209716796875|unsuper_loss: 0.0
+average reward score: 4.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.58%) |Training time=0.45s (20.80%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7818|ppo_ep: 1|act_loss: -0.0019664764404296875|cri_loss: -0.0007276535034179688|unsuper_loss: 0.0
+average reward score: 5.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.47%) |Training time=0.45s (20.88%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.56
+[2023-04-14 13:33:51,976] [INFO] [logging.py:96:log_dist] [Rank 0] step=7820, skipped=99, lr=[1.0351546681472116e-07, 1.0351546681472116e-07], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:33:51,994] [INFO] [timer.py:199:stop] epoch=0/micro_step=7820/global_step=7820, RunningAvgSamplesPerSec=105.7191999963779, CurrSamplesPerSec=114.77339353437402, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:33:52,087] [INFO] [logging.py:96:log_dist] [Rank 0] step=7820, skipped=134, lr=[6.079740731487898e-08, 6.079740731487898e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7819|ppo_ep: 1|act_loss: 0.0115509033203125|cri_loss: 0.00637054443359375|unsuper_loss: 0.0
+average reward score: 5.8359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.89%) |Training time=0.44s (20.54%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7820|ppo_ep: 1|act_loss: -0.0177001953125|cri_loss: -0.00807952880859375|unsuper_loss: 0.0
+average reward score: 5.40234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.54%) |Training time=0.42s (19.75%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
+[2023-04-14 13:33:56,375] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 7821|ppo_ep: 1|act_loss: 0.01320648193359375|cri_loss: 0.0069122314453125|unsuper_loss: 0.0
+average reward score: 6.01953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.07%) |Training time=0.44s (20.82%) |Others=0.09 (4.10%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.56
+[2023-04-14 13:33:58,514] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 7822|ppo_ep: 1|act_loss: -0.004489898681640625|cri_loss: -0.001972198486328125|unsuper_loss: 0.0
+average reward score: 6.29296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.81%) |Training time=0.45s (20.98%) |Others=0.09 (4.21%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7823|ppo_ep: 1|act_loss: -0.0143280029296875|cri_loss: -0.00704193115234375|unsuper_loss: 0.0
+average reward score: 5.59375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.34%) |Training time=0.45s (21.02%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7824|ppo_ep: 1|act_loss: -0.003658294677734375|cri_loss: -0.0014791488647460938|unsuper_loss: 0.0
+average reward score: 5.6796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.35%) |Training time=0.45s (20.99%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7825|ppo_ep: 1|act_loss: 0.0084686279296875|cri_loss: 0.0049591064453125|unsuper_loss: 0.0
+average reward score: 5.63671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.50%) |Training time=0.45s (20.83%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7826|ppo_ep: 1|act_loss: -0.00666046142578125|cri_loss: -0.003215789794921875|unsuper_loss: 0.0
+average reward score: 5.00390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.30%) |Training time=0.45s (21.07%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7827|ppo_ep: 1|act_loss: 0.016510009765625|cri_loss: 0.0093536376953125|unsuper_loss: 0.0
+average reward score: 4.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.37%) |Training time=0.45s (20.96%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7828|ppo_ep: 1|act_loss: 0.022125244140625|cri_loss: 0.01151275634765625|unsuper_loss: 0.0
+average reward score: 5.15625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.44%) |Training time=0.45s (20.89%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
+[2023-04-14 13:34:13,448] [INFO] [logging.py:96:log_dist] [Rank 0] step=7830, skipped=99, lr=[9.972324247574424e-08, 9.972324247574424e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:34:13,466] [INFO] [timer.py:199:stop] epoch=0/micro_step=7830/global_step=7830, RunningAvgSamplesPerSec=105.7271991751751, CurrSamplesPerSec=111.44625006331337, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:34:13,559] [INFO] [logging.py:96:log_dist] [Rank 0] step=7830, skipped=136, lr=[5.912118083573476e-08, 5.912118083573476e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7829|ppo_ep: 1|act_loss: 0.01406097412109375|cri_loss: 0.007244110107421875|unsuper_loss: 0.0
+average reward score: 5.25390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.46%) |Training time=0.45s (20.98%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7830|ppo_ep: 1|act_loss: 0.0462646484375|cri_loss: 0.0237579345703125|unsuper_loss: 0.0
+average reward score: 6.25
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.41%) |Training time=0.45s (20.94%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7831|ppo_ep: 1|act_loss: 0.0146026611328125|cri_loss: 0.00739288330078125|unsuper_loss: 0.0
+average reward score: 5.12109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.66%) |Training time=0.44s (20.66%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7832|ppo_ep: 1|act_loss: 0.0281982421875|cri_loss: 0.01439666748046875|unsuper_loss: 0.0
+average reward score: 5.42578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.82%) |Training time=0.44s (20.51%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7833|ppo_ep: 1|act_loss: 0.0078125|cri_loss: 0.00453948974609375|unsuper_loss: 0.0
+average reward score: 5.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.76s (76.40%) |Training time=0.45s (19.34%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.86 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7834|ppo_ep: 1|act_loss: 0.00756072998046875|cri_loss: 0.003940582275390625|unsuper_loss: 0.0
+average reward score: 5.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.83%) |Training time=0.44s (20.39%) |Others=0.10 (4.79%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7835|ppo_ep: 1|act_loss: 0.00428009033203125|cri_loss: 0.002353668212890625|unsuper_loss: 0.0
+average reward score: 5.17578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.33%) |Training time=0.43s (20.07%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7836|ppo_ep: 1|act_loss: 0.00107574462890625|cri_loss: 0.0007843971252441406|unsuper_loss: 0.0
+average reward score: 4.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.18%) |Training time=0.46s (21.15%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7837|ppo_ep: 1|act_loss: 0.01763916015625|cri_loss: 0.009185791015625|unsuper_loss: 0.0
+average reward score: 6.05859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.71%) |Training time=0.48s (21.78%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7838|ppo_ep: 1|act_loss: -0.01302337646484375|cri_loss: -0.006412506103515625|unsuper_loss: 0.0
+average reward score: 5.3125
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.09%) |Training time=0.47s (21.35%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.56
+[2023-04-14 13:34:35,280] [INFO] [logging.py:96:log_dist] [Rank 0] step=7840, skipped=99, lr=[9.60010581374241e-08, 9.60010581374241e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:34:35,298] [INFO] [timer.py:199:stop] epoch=0/micro_step=7840/global_step=7840, RunningAvgSamplesPerSec=105.7335546704453, CurrSamplesPerSec=107.28166269409262, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:34:35,392] [INFO] [logging.py:96:log_dist] [Rank 0] step=7840, skipped=136, lr=[5.705846133496118e-08, 5.705846133496118e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7839|ppo_ep: 1|act_loss: -0.00060272216796875|cri_loss: 0.000186920166015625|unsuper_loss: 0.0
+average reward score: 5.5859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.21%) |Training time=0.46s (20.33%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.12 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7840|ppo_ep: 1|act_loss: 0.01113128662109375|cri_loss: 0.00600433349609375|unsuper_loss: 0.0
+average reward score: 5.60546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.38%) |Training time=0.48s (22.03%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.56
+[2023-04-14 13:34:39,625] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 4096, reducing to 2048
+epoch: 0|step: 7841|ppo_ep: 1|act_loss: 0.04718017578125|cri_loss: 0.026702880859375|unsuper_loss: 0.0
+average reward score: 5.7109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.88%) |Training time=0.44s (20.44%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7842|ppo_ep: 1|act_loss: -0.0233612060546875|cri_loss: -0.0114898681640625|unsuper_loss: 0.0
+average reward score: 5.92578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.98%) |Training time=0.46s (21.38%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7843|ppo_ep: 1|act_loss: -0.00299072265625|cri_loss: -0.001346588134765625|unsuper_loss: 0.0
+average reward score: 5.484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.72%) |Training time=0.47s (21.67%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7844|ppo_ep: 1|act_loss: -0.0024204254150390625|cri_loss: -0.0009531974792480469|unsuper_loss: 0.0
+average reward score: 5.0078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.67%) |Training time=0.47s (21.72%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7845|ppo_ep: 1|act_loss: 0.016998291015625|cri_loss: 0.0087432861328125|unsuper_loss: 0.0
+average reward score: 5.81640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.86%) |Training time=0.47s (21.62%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7846|ppo_ep: 1|act_loss: 0.0045928955078125|cri_loss: 0.0025310516357421875|unsuper_loss: 0.0
+average reward score: 5.671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.59%) |Training time=0.47s (21.45%) |Others=0.11 (4.96%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7847|ppo_ep: 1|act_loss: 0.007110595703125|cri_loss: 0.003620147705078125|unsuper_loss: 0.0
+average reward score: 5.28125
+-------------------------------------------------------------------------------------
+|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.53%) |Training time=0.50s (21.23%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.52 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7848|ppo_ep: 1|act_loss: -0.0013885498046875|cri_loss: -0.0004911422729492188|unsuper_loss: 0.0
+average reward score: 4.671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.12%) |Training time=0.46s (21.34%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56
+[2023-04-14 13:34:57,189] [INFO] [logging.py:96:log_dist] [Rank 0] step=7850, skipped=100, lr=[9.271102205441317e-08, 9.271102205441317e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:34:57,208] [INFO] [timer.py:199:stop] epoch=0/micro_step=7850/global_step=7850, RunningAvgSamplesPerSec=105.73152737795587, CurrSamplesPerSec=106.00033328041914, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:34:57,301] [INFO] [logging.py:96:log_dist] [Rank 0] step=7850, skipped=136, lr=[5.5031952121065715e-08, 5.5031952121065715e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7849|ppo_ep: 1|act_loss: 0.003459930419921875|cri_loss: 0.001934051513671875|unsuper_loss: 0.0
+average reward score: 5.44921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.02%) |Training time=0.47s (21.43%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7850|ppo_ep: 1|act_loss: 0.007312774658203125|cri_loss: 0.003902435302734375|unsuper_loss: 0.0
+average reward score: 5.87890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.53%) |Training time=0.48s (21.84%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7851|ppo_ep: 1|act_loss: 0.00341033935546875|cri_loss: 0.002071380615234375|unsuper_loss: 0.0
+average reward score: 5.3671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.10%) |Training time=0.46s (21.35%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7852|ppo_ep: 1|act_loss: -0.00308990478515625|cri_loss: 0.0007801055908203125|unsuper_loss: 0.0
+average reward score: 5.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.55%) |Training time=0.48s (21.83%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7853|ppo_ep: 1|act_loss: 0.020660400390625|cri_loss: 0.010772705078125|unsuper_loss: 0.0
+average reward score: 5.33203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.72%) |Training time=0.47s (21.67%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7854|ppo_ep: 1|act_loss: 0.030853271484375|cri_loss: 0.0167236328125|unsuper_loss: 0.0
+average reward score: 5.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.80%) |Training time=0.47s (21.59%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7855|ppo_ep: 1|act_loss: 0.0005464553833007812|cri_loss: 0.0004324913024902344|unsuper_loss: 0.0
+average reward score: 5.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.88%) |Training time=0.47s (21.60%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7856|ppo_ep: 1|act_loss: -0.03692626953125|cri_loss: -0.0179595947265625|unsuper_loss: 0.0
+average reward score: 4.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.67%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7857|ppo_ep: 1|act_loss: -0.0096435546875|cri_loss: -0.0046844482421875|unsuper_loss: 0.0
+average reward score: 5.5859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.92%) |Training time=0.47s (21.55%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7858|ppo_ep: 1|act_loss: -0.0118255615234375|cri_loss: -0.005863189697265625|unsuper_loss: 0.0
+average reward score: 5.328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.86%) |Training time=0.47s (21.57%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.56
+[2023-04-14 13:35:18,956] [INFO] [logging.py:96:log_dist] [Rank 0] step=7860, skipped=100, lr=[8.912206485681388e-08, 8.912206485681388e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:35:18,975] [INFO] [timer.py:199:stop] epoch=0/micro_step=7860/global_step=7860, RunningAvgSamplesPerSec=105.72896164280641, CurrSamplesPerSec=104.51759121109141, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:35:19,067] [INFO] [logging.py:96:log_dist] [Rank 0] step=7860, skipped=136, lr=[5.304168323180425e-08, 5.304168323180425e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7859|ppo_ep: 1|act_loss: 0.0003643035888671875|cri_loss: 0.00044035911560058594|unsuper_loss: 0.0
+average reward score: 5.453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.89%) |Training time=0.47s (21.60%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7860|ppo_ep: 1|act_loss: -0.00658416748046875|cri_loss: -0.0031833648681640625|unsuper_loss: 0.0
+average reward score: 5.69140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.90%) |Training time=0.47s (21.48%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7861|ppo_ep: 1|act_loss: 0.002376556396484375|cri_loss: 0.001617431640625|unsuper_loss: 0.0
+average reward score: 5.74609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.01%) |Training time=0.47s (21.51%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7862|ppo_ep: 1|act_loss: 0.0129241943359375|cri_loss: 0.006740570068359375|unsuper_loss: 0.0
+average reward score: 5.5859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.42%) |Training time=0.47s (21.65%) |Others=0.11 (4.93%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7863|ppo_ep: 1|act_loss: 0.0234375|cri_loss: 0.01204681396484375|unsuper_loss: 0.0
+average reward score: 5.44140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.61%) |Training time=0.48s (20.60%) |Others=0.11 (4.79%)|CurSamplesPerSec=13.66 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7864|ppo_ep: 1|act_loss: -0.00978851318359375|cri_loss: -0.0033740997314453125|unsuper_loss: 0.0
+average reward score: 5.36328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.33%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7865|ppo_ep: 1|act_loss: 0.0022487640380859375|cri_loss: 0.0019283294677734375|unsuper_loss: 0.0
+average reward score: 5.625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.46s (21.24%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7866|ppo_ep: 1|act_loss: 0.0089569091796875|cri_loss: 0.00506591796875|unsuper_loss: 0.0
+average reward score: 5.484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.21%) |Training time=0.46s (21.16%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7867|ppo_ep: 1|act_loss: 0.0088348388671875|cri_loss: 0.004520416259765625|unsuper_loss: 0.0
+average reward score: 5.51171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.49%) |Training time=0.46s (20.98%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7868|ppo_ep: 1|act_loss: -0.03363037109375|cri_loss: -0.0160980224609375|unsuper_loss: 0.0
+average reward score: 5.90234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.71s (74.73%) |Training time=0.48s (20.89%) |Others=0.10 (4.39%)|CurSamplesPerSec=14.00 |AvgSamplesPerSec=14.56
+[2023-04-14 13:35:40,958] [INFO] [logging.py:96:log_dist] [Rank 0] step=7870, skipped=100, lr=[8.560330479489633e-08, 8.560330479489633e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:35:40,977] [INFO] [timer.py:199:stop] epoch=0/micro_step=7870/global_step=7870, RunningAvgSamplesPerSec=105.72843790630478, CurrSamplesPerSec=109.30619119053087, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:35:41,069] [INFO] [logging.py:96:log_dist] [Rank 0] step=7870, skipped=136, lr=[5.108768416776266e-08, 5.108768416776266e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7869|ppo_ep: 1|act_loss: 0.004215240478515625|cri_loss: 0.002651214599609375|unsuper_loss: 0.0
+average reward score: 5.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.33%) |Training time=0.46s (21.13%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7870|ppo_ep: 1|act_loss: 0.0010814666748046875|cri_loss: 0.0006008148193359375|unsuper_loss: 0.0
+average reward score: 5.28125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.24%) |Training time=0.46s (21.13%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7871|ppo_ep: 1|act_loss: 0.0111083984375|cri_loss: 0.00591278076171875|unsuper_loss: 0.0
+average reward score: 5.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.36%) |Training time=0.45s (21.08%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7872|ppo_ep: 1|act_loss: -0.00525665283203125|cri_loss: -0.0023651123046875|unsuper_loss: 0.0
+average reward score: 5.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.22%) |Training time=0.46s (21.14%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7873|ppo_ep: 1|act_loss: 0.024139404296875|cri_loss: 0.0124359130859375|unsuper_loss: 0.0
+average reward score: 4.65234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.24%) |Training time=0.46s (21.22%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7874|ppo_ep: 1|act_loss: 0.008209228515625|cri_loss: 0.00452423095703125|unsuper_loss: 0.0
+average reward score: 5.24609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.61%) |Training time=0.47s (21.79%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7875|ppo_ep: 1|act_loss: -0.016693115234375|cri_loss: -0.00795745849609375|unsuper_loss: 0.0
+average reward score: 5.4453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.11%) |Training time=0.46s (21.36%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7876|ppo_ep: 1|act_loss: 0.00591278076171875|cri_loss: 0.003322601318359375|unsuper_loss: 0.0
+average reward score: 6.03515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.89%) |Training time=0.47s (21.49%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7877|ppo_ep: 1|act_loss: -0.0061492919921875|cri_loss: -0.00229644775390625|unsuper_loss: 0.0
+average reward score: 5.84375
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.92%) |Training time=0.47s (19.87%) |Others=0.10 (4.21%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7878|ppo_ep: 1|act_loss: -0.009002685546875|cri_loss: -0.0041046142578125|unsuper_loss: 0.0
+average reward score: 5.7734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.26%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56
+[2023-04-14 13:36:02,778] [INFO] [logging.py:96:log_dist] [Rank 0] step=7880, skipped=100, lr=[8.215479402517389e-08, 8.215479402517389e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:36:02,797] [INFO] [timer.py:199:stop] epoch=0/micro_step=7880/global_step=7880, RunningAvgSamplesPerSec=105.72969054760189, CurrSamplesPerSec=104.98801479963392, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:36:02,889] [INFO] [logging.py:96:log_dist] [Rank 0] step=7880, skipped=136, lr=[4.9169983891921294e-08, 4.9169983891921294e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7879|ppo_ep: 1|act_loss: -0.016082763671875|cri_loss: -0.00780487060546875|unsuper_loss: 0.0
+average reward score: 5.21875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.89%) |Training time=0.47s (21.59%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7880|ppo_ep: 1|act_loss: 0.02557373046875|cri_loss: 0.0131072998046875|unsuper_loss: 0.0
+average reward score: 5.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.83%) |Training time=0.47s (21.54%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7881|ppo_ep: 1|act_loss: 0.0496826171875|cri_loss: 0.025543212890625|unsuper_loss: 0.0
+average reward score: 5.15234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.47%) |Training time=0.48s (22.03%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7882|ppo_ep: 1|act_loss: 0.00325775146484375|cri_loss: 0.001895904541015625|unsuper_loss: 0.0
+average reward score: 5.42578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.26%) |Training time=0.48s (22.16%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7883|ppo_ep: 1|act_loss: 0.013031005859375|cri_loss: 0.006710052490234375|unsuper_loss: 0.0
+average reward score: 5.0859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.68%) |Training time=0.47s (21.80%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7884|ppo_ep: 1|act_loss: 0.00453948974609375|cri_loss: 0.0025043487548828125|unsuper_loss: 0.0
+average reward score: 5.1875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.36%) |Training time=0.48s (22.04%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7885|ppo_ep: 1|act_loss: -0.0011043548583984375|cri_loss: -0.0003352165222167969|unsuper_loss: 0.0
+average reward score: 5.703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.70%) |Training time=0.47s (21.75%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7886|ppo_ep: 1|act_loss: 0.020843505859375|cri_loss: 0.01062774658203125|unsuper_loss: 0.0
+average reward score: 5.5234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.52%) |Training time=0.48s (21.89%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7887|ppo_ep: 1|act_loss: 0.002925872802734375|cri_loss: 0.0016040802001953125|unsuper_loss: 0.0
+average reward score: 5.76953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.83%) |Training time=0.47s (21.66%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7888|ppo_ep: 1|act_loss: 0.0274200439453125|cri_loss: 0.01441192626953125|unsuper_loss: 0.0
+average reward score: 5.78515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.77%) |Training time=0.47s (21.63%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56
+[2023-04-14 13:36:24,543] [INFO] [logging.py:96:log_dist] [Rank 0] step=7890, skipped=100, lr=[7.877658366289522e-08, 7.877658366289522e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:36:24,562] [INFO] [timer.py:199:stop] epoch=0/micro_step=7890/global_step=7890, RunningAvgSamplesPerSec=105.72515201238446, CurrSamplesPerSec=102.51301521834603, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:36:24,654] [INFO] [logging.py:96:log_dist] [Rank 0] step=7890, skipped=136, lr=[4.7288610829223645e-08, 4.7288610829223645e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7889|ppo_ep: 1|act_loss: -0.01506805419921875|cri_loss: -0.004730224609375|unsuper_loss: 0.0
+average reward score: 5.01953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.69%) |Training time=0.47s (21.81%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7890|ppo_ep: 1|act_loss: 0.00989532470703125|cri_loss: 0.00505828857421875|unsuper_loss: 0.0
+average reward score: 5.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.67%) |Training time=0.47s (21.73%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7891|ppo_ep: 1|act_loss: 0.031524658203125|cri_loss: 0.016021728515625|unsuper_loss: 0.0
+average reward score: 5.61328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.76%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7892|ppo_ep: 1|act_loss: 0.004886627197265625|cri_loss: 0.0025768280029296875|unsuper_loss: 0.0
+average reward score: 5.5546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.64%) |Training time=0.49s (21.75%) |Others=0.15 (6.61%)|CurSamplesPerSec=14.32 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7893|ppo_ep: 1|act_loss: 0.0214385986328125|cri_loss: 0.0110321044921875|unsuper_loss: 0.0
+average reward score: 6.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.66s (74.48%) |Training time=0.47s (21.09%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.36 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7894|ppo_ep: 1|act_loss: -0.004444122314453125|cri_loss: -0.0020580291748046875|unsuper_loss: 0.0
+average reward score: 5.390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.65%) |Training time=0.47s (21.75%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7895|ppo_ep: 1|act_loss: 0.03582763671875|cri_loss: 0.018310546875|unsuper_loss: 0.0
+average reward score: 5.14453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.60%) |Training time=0.48s (21.85%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7896|ppo_ep: 1|act_loss: 0.0079345703125|cri_loss: 0.005886077880859375|unsuper_loss: 0.0
+average reward score: 5.27734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.59%) |Training time=0.48s (21.83%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7897|ppo_ep: 1|act_loss: 0.0352783203125|cri_loss: 0.0183258056640625|unsuper_loss: 0.0
+average reward score: 6.36328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.64s (72.27%) |Training time=0.51s (22.65%) |Others=0.12 (5.09%)|CurSamplesPerSec=14.12 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7898|ppo_ep: 1|act_loss: 0.043914794921875|cri_loss: 0.023712158203125|unsuper_loss: 0.0
+average reward score: 5.28515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.99%) |Training time=0.46s (21.38%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56
+[2023-04-14 13:36:46,507] [INFO] [logging.py:96:log_dist] [Rank 0] step=7900, skipped=100, lr=[7.54687237812874e-08, 7.54687237812874e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:36:46,526] [INFO] [timer.py:199:stop] epoch=0/micro_step=7900/global_step=7900, RunningAvgSamplesPerSec=105.7195812062558, CurrSamplesPerSec=103.41567842742634, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:36:46,618] [INFO] [logging.py:96:log_dist] [Rank 0] step=7900, skipped=136, lr=[4.544359286615785e-08, 4.544359286615785e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7899|ppo_ep: 1|act_loss: -0.022247314453125|cri_loss: -0.0109405517578125|unsuper_loss: 0.0
+average reward score: 5.23046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.71%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7900|ppo_ep: 1|act_loss: 0.0120697021484375|cri_loss: 0.00836944580078125|unsuper_loss: 0.0
+average reward score: 5.29296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.86%) |Training time=0.47s (21.53%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7901|ppo_ep: 1|act_loss: -0.007129669189453125|cri_loss: -0.003482818603515625|unsuper_loss: 0.0
+average reward score: 5.30078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.90%) |Training time=0.47s (21.52%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7902|ppo_ep: 1|act_loss: 0.0109100341796875|cri_loss: 0.005550384521484375|unsuper_loss: 0.0
+average reward score: 5.29296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.05%) |Training time=0.46s (21.34%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7903|ppo_ep: 1|act_loss: 0.01129913330078125|cri_loss: 0.005908966064453125|unsuper_loss: 0.0
+average reward score: 5.2421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.67%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7904|ppo_ep: 1|act_loss: 0.0010099411010742188|cri_loss: 0.0005588531494140625|unsuper_loss: 0.0
+average reward score: 5.76171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.73%) |Training time=0.47s (21.66%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7905|ppo_ep: 1|act_loss: 0.04443359375|cri_loss: 0.0255889892578125|unsuper_loss: 0.0
+average reward score: 4.86328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.87%) |Training time=0.47s (21.60%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7906|ppo_ep: 1|act_loss: 0.009063720703125|cri_loss: 0.004764556884765625|unsuper_loss: 0.0
+average reward score: 5.765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.71%) |Training time=0.47s (21.68%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7907|ppo_ep: 1|act_loss: -0.0076751708984375|cri_loss: -0.0036468505859375|unsuper_loss: 0.0
+average reward score: 5.1328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.98%) |Training time=0.46s (21.32%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7908|ppo_ep: 1|act_loss: -0.018890380859375|cri_loss: -0.00931549072265625|unsuper_loss: 0.0
+average reward score: 5.33984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.18%) |Training time=0.50s (21.11%) |Others=0.11 (4.71%)|CurSamplesPerSec=13.54 |AvgSamplesPerSec=14.56
+[2023-04-14 13:37:08,435] [INFO] [logging.py:96:log_dist] [Rank 0] step=7910, skipped=100, lr=[7.223126341081292e-08, 7.223126341081292e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:37:08,454] [INFO] [timer.py:199:stop] epoch=0/micro_step=7910/global_step=7910, RunningAvgSamplesPerSec=105.7167886290013, CurrSamplesPerSec=103.1705201890031, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:37:08,547] [INFO] [logging.py:96:log_dist] [Rank 0] step=7910, skipped=136, lr=[4.3634957350340844e-08, 4.3634957350340844e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7909|ppo_ep: 1|act_loss: -0.00423431396484375|cri_loss: -0.0020046234130859375|unsuper_loss: 0.0
+average reward score: 5.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.74%) |Training time=0.47s (21.72%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7910|ppo_ep: 1|act_loss: 0.030242919921875|cri_loss: 0.015960693359375|unsuper_loss: 0.0
+average reward score: 5.3046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.67%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7911|ppo_ep: 1|act_loss: 0.00960540771484375|cri_loss: 0.0049896240234375|unsuper_loss: 0.0
+average reward score: 5.6015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.01%) |Training time=0.47s (21.47%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7912|ppo_ep: 1|act_loss: 0.034393310546875|cri_loss: 0.017669677734375|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.34%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7913|ppo_ep: 1|act_loss: -0.016571044921875|cri_loss: -0.0081634521484375|unsuper_loss: 0.0
+average reward score: 5.73828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.05%) |Training time=0.47s (21.38%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7914|ppo_ep: 1|act_loss: -0.0011844635009765625|cri_loss: -0.00016069412231445312|unsuper_loss: 0.0
+average reward score: 4.765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.97%) |Training time=0.46s (21.40%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7915|ppo_ep: 1|act_loss: -0.01568603515625|cri_loss: -0.007717132568359375|unsuper_loss: 0.0
+average reward score: 5.6015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.94%) |Training time=0.46s (21.44%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7916|ppo_ep: 1|act_loss: 0.031219482421875|cri_loss: 0.0164794921875|unsuper_loss: 0.0
+average reward score: 5.03125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.75%) |Training time=0.47s (21.65%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7917|ppo_ep: 1|act_loss: 0.0123443603515625|cri_loss: 0.006290435791015625|unsuper_loss: 0.0
+average reward score: 5.88671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.63%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7918|ppo_ep: 1|act_loss: 0.0018014907836914062|cri_loss: 0.001071929931640625|unsuper_loss: 0.0
+average reward score: 5.1015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.87%) |Training time=0.47s (21.51%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56
+[2023-04-14 13:37:30,149] [INFO] [logging.py:96:log_dist] [Rank 0] step=7920, skipped=100, lr=[6.906425053844542e-08, 6.906425053844542e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:37:30,168] [INFO] [timer.py:199:stop] epoch=0/micro_step=7920/global_step=7920, RunningAvgSamplesPerSec=105.7158984428223, CurrSamplesPerSec=105.03155851879676, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:37:30,260] [INFO] [logging.py:96:log_dist] [Rank 0] step=7920, skipped=136, lr=[4.186273109011374e-08, 4.186273109011374e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7919|ppo_ep: 1|act_loss: 0.0194854736328125|cri_loss: 0.0098876953125|unsuper_loss: 0.0
+average reward score: 4.953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.88%) |Training time=0.47s (21.59%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7920|ppo_ep: 1|act_loss: -0.015533447265625|cri_loss: -0.007617950439453125|unsuper_loss: 0.0
+average reward score: 5.390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.85%) |Training time=0.47s (21.52%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7921|ppo_ep: 1|act_loss: 0.0017957687377929688|cri_loss: 0.0020465850830078125|unsuper_loss: 0.0
+average reward score: 5.64453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.57%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7922|ppo_ep: 1|act_loss: -0.0041046142578125|cri_loss: -0.0018215179443359375|unsuper_loss: 0.0
+average reward score: 5.73828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.60s (70.66%) |Training time=0.47s (20.87%) |Others=0.19 (8.47%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.56
+[2023-04-14 13:37:39,018] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 7923|ppo_ep: 1|act_loss: 0.0170135498046875|cri_loss: 0.00887298583984375|unsuper_loss: 0.0
+average reward score: 4.66796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.47s (21.58%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56
+[2023-04-14 13:37:41,177] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 7924|ppo_ep: 1|act_loss: -0.002185821533203125|cri_loss: -0.001056671142578125|unsuper_loss: 0.0
+average reward score: 5.3359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.23%) |Training time=0.46s (21.51%) |Others=0.09 (4.26%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7925|ppo_ep: 1|act_loss: 0.0026702880859375|cri_loss: 0.0013885498046875|unsuper_loss: 0.0
+average reward score: 5.00390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.34%) |Training time=0.48s (22.10%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7926|ppo_ep: 1|act_loss: -0.0080108642578125|cri_loss: -0.0037822723388671875|unsuper_loss: 0.0
+average reward score: 5.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.17%) |Training time=0.47s (21.22%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7927|ppo_ep: 1|act_loss: -0.0149993896484375|cri_loss: -0.007312774658203125|unsuper_loss: 0.0
+average reward score: 4.66796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.38%) |Training time=0.46s (20.27%) |Others=0.10 (4.35%)|CurSamplesPerSec=14.17 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7928|ppo_ep: 1|act_loss: -0.014862060546875|cri_loss: -0.006984710693359375|unsuper_loss: 0.0
+average reward score: 4.99609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.38%) |Training time=0.45s (20.95%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.56
+[2023-04-14 13:37:51,969] [INFO] [logging.py:96:log_dist] [Rank 0] step=7930, skipped=100, lr=[6.596773210695512e-08, 6.596773210695512e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:37:51,987] [INFO] [timer.py:199:stop] epoch=0/micro_step=7930/global_step=7930, RunningAvgSamplesPerSec=105.71659977392, CurrSamplesPerSec=109.8774545198531, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:37:52,080] [INFO] [logging.py:96:log_dist] [Rank 0] step=7930, skipped=138, lr=[4.047118241664511e-08, 4.047118241664511e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7929|ppo_ep: 1|act_loss: 0.017242431640625|cri_loss: 0.009002685546875|unsuper_loss: 0.0
+average reward score: 5.3515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.17%) |Training time=0.45s (21.17%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7930|ppo_ep: 1|act_loss: -0.021514892578125|cri_loss: -0.010650634765625|unsuper_loss: 0.0
+average reward score: 5.29296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.36%) |Training time=0.45s (20.96%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7931|ppo_ep: 1|act_loss: 0.025634765625|cri_loss: 0.0132598876953125|unsuper_loss: 0.0
+average reward score: 6.0390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.51%) |Training time=0.45s (20.91%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7932|ppo_ep: 1|act_loss: 0.01055145263671875|cri_loss: 0.005443572998046875|unsuper_loss: 0.0
+average reward score: 5.9140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.24%) |Training time=0.45s (21.09%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7933|ppo_ep: 1|act_loss: -0.01123046875|cri_loss: -0.005527496337890625|unsuper_loss: 0.0
+average reward score: 6.484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.45s (21.09%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7934|ppo_ep: 1|act_loss: 0.0125274658203125|cri_loss: 0.00664520263671875|unsuper_loss: 0.0
+average reward score: 5.55859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.84%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7935|ppo_ep: 1|act_loss: 0.00568389892578125|cri_loss: 0.003032684326171875|unsuper_loss: 0.0
+average reward score: 6.0234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.03%) |Training time=0.49s (22.20%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7936|ppo_ep: 1|act_loss: -0.01641845703125|cri_loss: -0.00799560546875|unsuper_loss: 0.0
+average reward score: 4.42578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.99%) |Training time=0.46s (21.27%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7937|ppo_ep: 1|act_loss: 0.0003528594970703125|cri_loss: 0.00023508071899414062|unsuper_loss: 0.0
+average reward score: 5.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.61s (69.68%) |Training time=0.60s (26.04%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7938|ppo_ep: 1|act_loss: 0.043304443359375|cri_loss: 0.026031494140625|unsuper_loss: 0.0
+average reward score: 5.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.02%) |Training time=0.46s (21.32%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56
+[2023-04-14 13:38:13,739] [INFO] [logging.py:96:log_dist] [Rank 0] step=7940, skipped=100, lr=[6.294175401421511e-08, 6.294175401421511e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:38:13,757] [INFO] [timer.py:199:stop] epoch=0/micro_step=7940/global_step=7940, RunningAvgSamplesPerSec=105.71327649972606, CurrSamplesPerSec=107.65157898873818, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:38:13,849] [INFO] [logging.py:96:log_dist] [Rank 0] step=7940, skipped=138, lr=[3.876455865061551e-08, 3.876455865061551e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7939|ppo_ep: 1|act_loss: -0.003467559814453125|cri_loss: -0.0015869140625|unsuper_loss: 0.0
+average reward score: 5.92578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.23%) |Training time=0.46s (21.24%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7940|ppo_ep: 1|act_loss: -0.00472259521484375|cri_loss: -0.0023040771484375|unsuper_loss: 0.0
+average reward score: 5.80859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.28%) |Training time=0.43s (20.00%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7941|ppo_ep: 1|act_loss: 0.0321044921875|cri_loss: 0.01654052734375|unsuper_loss: 0.0
+average reward score: 5.54296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.09%) |Training time=0.44s (20.34%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7942|ppo_ep: 1|act_loss: 0.016448974609375|cri_loss: 0.008331298828125|unsuper_loss: 0.0
+average reward score: 5.58984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.78%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7943|ppo_ep: 1|act_loss: -0.0034580230712890625|cri_loss: -0.00157928466796875|unsuper_loss: 0.0
+average reward score: 5.515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.61%) |Training time=0.47s (21.87%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7944|ppo_ep: 1|act_loss: -0.0009264945983886719|cri_loss: -2.09808349609375e-05|unsuper_loss: 0.0
+average reward score: 4.9375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.83%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7945|ppo_ep: 1|act_loss: 0.024078369140625|cri_loss: 0.01306915283203125|unsuper_loss: 0.0
+average reward score: 5.13671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.47%) |Training time=0.48s (22.01%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7946|ppo_ep: 1|act_loss: 0.002613067626953125|cri_loss: 0.00144195556640625|unsuper_loss: 0.0
+average reward score: 5.34375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.77%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7947|ppo_ep: 1|act_loss: 0.00836944580078125|cri_loss: 0.004421234130859375|unsuper_loss: 0.0
+average reward score: 4.83203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.46%) |Training time=0.47s (21.88%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7948|ppo_ep: 1|act_loss: -0.0103759765625|cri_loss: -0.005062103271484375|unsuper_loss: 0.0
+average reward score: 5.44140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.67%) |Training time=0.47s (21.68%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56
+[2023-04-14 13:38:35,426] [INFO] [logging.py:96:log_dist] [Rank 0] step=7950, skipped=100, lr=[5.998636111252047e-08, 5.998636111252047e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:38:35,444] [INFO] [timer.py:199:stop] epoch=0/micro_step=7950/global_step=7950, RunningAvgSamplesPerSec=105.71354108782916, CurrSamplesPerSec=107.05438952760315, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:38:35,537] [INFO] [logging.py:96:log_dist] [Rank 0] step=7950, skipped=138, lr=[3.709441633123367e-08, 3.709441633123367e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7949|ppo_ep: 1|act_loss: -0.00852203369140625|cri_loss: -0.004192352294921875|unsuper_loss: 0.0
+average reward score: 5.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.99%) |Training time=0.47s (21.49%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7950|ppo_ep: 1|act_loss: -0.00669097900390625|cri_loss: -0.0032520294189453125|unsuper_loss: 0.0
+average reward score: 5.88671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.76%) |Training time=0.47s (21.61%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7951|ppo_ep: 1|act_loss: -0.005435943603515625|cri_loss: -0.0026531219482421875|unsuper_loss: 0.0
+average reward score: 5.87890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.03%) |Training time=0.46s (21.42%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7952|ppo_ep: 1|act_loss: 0.0321044921875|cri_loss: 0.016448974609375|unsuper_loss: 0.0
+average reward score: 6.01953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.66%) |Training time=0.50s (21.06%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.58 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7953|ppo_ep: 1|act_loss: 0.006015777587890625|cri_loss: 0.0031280517578125|unsuper_loss: 0.0
+average reward score: 4.9921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (22.05%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7954|ppo_ep: 1|act_loss: -0.0130157470703125|cri_loss: -0.00617218017578125|unsuper_loss: 0.0
+average reward score: 5.6484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.17%) |Training time=0.48s (22.21%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7955|ppo_ep: 1|act_loss: -0.00555419921875|cri_loss: -0.0027217864990234375|unsuper_loss: 0.0
+average reward score: 5.0703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.23%) |Training time=0.48s (22.00%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7956|ppo_ep: 1|act_loss: -0.01708984375|cri_loss: -0.00836944580078125|unsuper_loss: 0.0
+average reward score: 5.19140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.67%) |Training time=0.48s (21.74%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.49 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7957|ppo_ep: 1|act_loss: -0.010009765625|cri_loss: -0.004848480224609375|unsuper_loss: 0.0
+average reward score: 5.65625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.45s (20.79%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7958|ppo_ep: 1|act_loss: 0.0113372802734375|cri_loss: 0.006534576416015625|unsuper_loss: 0.0
+average reward score: 5.64453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.64%) |Training time=0.44s (20.67%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.56
+[2023-04-14 13:38:57,265] [INFO] [logging.py:96:log_dist] [Rank 0] step=7960, skipped=100, lr=[5.710159720792351e-08, 5.710159720792351e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:38:57,284] [INFO] [timer.py:199:stop] epoch=0/micro_step=7960/global_step=7960, RunningAvgSamplesPerSec=105.71254016421052, CurrSamplesPerSec=113.03297482527259, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:38:57,376] [INFO] [logging.py:96:log_dist] [Rank 0] step=7960, skipped=138, lr=[3.5460780214038593e-08, 3.5460780214038593e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7959|ppo_ep: 1|act_loss: 0.0247802734375|cri_loss: 0.01277923583984375|unsuper_loss: 0.0
+average reward score: 5.609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.63%) |Training time=0.45s (20.81%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7960|ppo_ep: 1|act_loss: -0.0133819580078125|cri_loss: -0.00598907470703125|unsuper_loss: 0.0
+average reward score: 5.796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.47%) |Training time=0.45s (20.87%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7961|ppo_ep: 1|act_loss: -0.01129913330078125|cri_loss: -0.0054931640625|unsuper_loss: 0.0
+average reward score: 5.17578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.56%) |Training time=0.44s (20.75%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.56
+epoch: 0|step: 7962|ppo_ep: 1|act_loss: -0.004425048828125|cri_loss: -0.0021190643310546875|unsuper_loss: 0.0
+average reward score: 5.2109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.61%) |Training time=0.44s (20.73%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.57
+epoch: 0|step: 7963|ppo_ep: 1|act_loss: -0.0086669921875|cri_loss: -0.003204345703125|unsuper_loss: 0.0
+average reward score: 5.953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.60%) |Training time=0.44s (20.80%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.57
+epoch: 0|step: 7964|ppo_ep: 1|act_loss: 0.00540924072265625|cri_loss: 0.002834320068359375|unsuper_loss: 0.0
+average reward score: 5.34375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.44%) |Training time=0.45s (20.88%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.57
+epoch: 0|step: 7965|ppo_ep: 1|act_loss: 0.0267333984375|cri_loss: 0.01537322998046875|unsuper_loss: 0.0
+average reward score: 5.1953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.68%) |Training time=0.44s (20.73%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.57
+epoch: 0|step: 7966|ppo_ep: 1|act_loss: 0.00862884521484375|cri_loss: 0.005084991455078125|unsuper_loss: 0.0
+average reward score: 6.19921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.93%) |Training time=0.44s (20.38%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.57
+epoch: 0|step: 7967|ppo_ep: 1|act_loss: -0.0021800994873046875|cri_loss: -0.0008406639099121094|unsuper_loss: 0.0
+average reward score: 5.03515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.62%) |Training time=0.44s (19.02%) |Others=0.10 (4.35%)|CurSamplesPerSec=13.85 |AvgSamplesPerSec=14.57
+epoch: 0|step: 7968|ppo_ep: 1|act_loss: -0.0058746337890625|cri_loss: -0.0027408599853515625|unsuper_loss: 0.0
+average reward score: 6.0234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.97%) |Training time=0.44s (20.41%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.57
+[2023-04-14 13:39:18,820] [INFO] [logging.py:96:log_dist] [Rank 0] step=7970, skipped=100, lr=[5.4287505059585056e-08, 5.4287505059585056e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:39:18,838] [INFO] [timer.py:199:stop] epoch=0/micro_step=7970/global_step=7970, RunningAvgSamplesPerSec=105.72253965619953, CurrSamplesPerSec=114.09618176810422, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:39:18,931] [INFO] [logging.py:96:log_dist] [Rank 0] step=7970, skipped=138, lr=[3.38636745134574e-08, 3.38636745134574e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7969|ppo_ep: 1|act_loss: -0.0057830810546875|cri_loss: -0.0027828216552734375|unsuper_loss: 0.0
+average reward score: 5.28125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.68%) |Training time=0.44s (20.74%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.57
+epoch: 0|step: 7970|ppo_ep: 1|act_loss: 0.0011510848999023438|cri_loss: 0.0006318092346191406|unsuper_loss: 0.0
+average reward score: 4.9609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.70%) |Training time=0.44s (20.60%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.57
+epoch: 0|step: 7971|ppo_ep: 1|act_loss: 0.027252197265625|cri_loss: 0.01389312744140625|unsuper_loss: 0.0
+average reward score: 4.7421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.55%) |Training time=0.45s (20.86%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.57
+epoch: 0|step: 7972|ppo_ep: 1|act_loss: -0.0053253173828125|cri_loss: -0.00258636474609375|unsuper_loss: 0.0
+average reward score: 5.6875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.86%) |Training time=0.44s (20.41%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.57
+epoch: 0|step: 7973|ppo_ep: 1|act_loss: -0.009918212890625|cri_loss: -0.004913330078125|unsuper_loss: 0.0
+average reward score: 5.375
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.27%) |Training time=0.43s (20.12%) |Others=0.10 (4.61%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.57
+epoch: 0|step: 7974|ppo_ep: 1|act_loss: 0.059112548828125|cri_loss: 0.0310211181640625|unsuper_loss: 0.0
+average reward score: 5.05078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.58%) |Training time=0.44s (20.75%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.57
+epoch: 0|step: 7975|ppo_ep: 1|act_loss: 0.00844573974609375|cri_loss: 0.004795074462890625|unsuper_loss: 0.0
+average reward score: 5.50390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.86%) |Training time=0.44s (20.55%) |Others=0.10 (4.59%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.57
+epoch: 0|step: 7976|ppo_ep: 1|act_loss: -0.0298004150390625|cri_loss: -0.01467132568359375|unsuper_loss: 0.0
+average reward score: 5.1953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.82%) |Training time=0.44s (20.47%) |Others=0.10 (4.71%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.57
+epoch: 0|step: 7977|ppo_ep: 1|act_loss: 0.008209228515625|cri_loss: 0.00433349609375|unsuper_loss: 0.0
+average reward score: 5.1953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.86%) |Training time=0.44s (20.53%) |Others=0.10 (4.61%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.57
+epoch: 0|step: 7978|ppo_ep: 1|act_loss: -0.0465087890625|cri_loss: -0.0218505859375|unsuper_loss: 0.0
+average reward score: 5.5
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.80%) |Training time=0.44s (20.52%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.57
+[2023-04-14 13:39:40,188] [INFO] [logging.py:96:log_dist] [Rank 0] step=7980, skipped=100, lr=[5.154412637913806e-08, 5.154412637913806e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:39:40,206] [INFO] [timer.py:199:stop] epoch=0/micro_step=7980/global_step=7980, RunningAvgSamplesPerSec=105.7339108712191, CurrSamplesPerSec=115.24871199527388, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:39:40,299] [INFO] [logging.py:96:log_dist] [Rank 0] step=7980, skipped=138, lr=[3.230312290245008e-08, 3.230312290245008e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7979|ppo_ep: 1|act_loss: 0.0009937286376953125|cri_loss: 0.0007677078247070312|unsuper_loss: 0.0
+average reward score: 5.5
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.76%) |Training time=0.44s (20.65%) |Others=0.10 (4.59%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.57
+epoch: 0|step: 7980|ppo_ep: 1|act_loss: -0.01158905029296875|cri_loss: -0.005451202392578125|unsuper_loss: 0.0
+average reward score: 5.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.73%) |Training time=0.44s (20.58%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.57
+epoch: 0|step: 7981|ppo_ep: 1|act_loss: -0.03515625|cri_loss: -0.01605224609375|unsuper_loss: 0.0
+average reward score: 5.3671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.98%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.57
+epoch: 0|step: 7982|ppo_ep: 1|act_loss: 0.022247314453125|cri_loss: 0.01145172119140625|unsuper_loss: 0.0
+average reward score: 5.3359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.63%) |Training time=0.44s (20.68%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.57
+epoch: 0|step: 7983|ppo_ep: 1|act_loss: -0.014556884765625|cri_loss: -0.007183074951171875|unsuper_loss: 0.0
+average reward score: 5.1328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.69s (73.36%) |Training time=0.51s (22.29%) |Others=0.10 (4.35%)|CurSamplesPerSec=13.90 |AvgSamplesPerSec=14.57
+epoch: 0|step: 7984|ppo_ep: 1|act_loss: -0.015045166015625|cri_loss: -0.0073394775390625|unsuper_loss: 0.0
+average reward score: 5.13671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.76%) |Training time=0.44s (20.57%) |Others=0.10 (4.67%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.57
+epoch: 0|step: 7985|ppo_ep: 1|act_loss: 0.075439453125|cri_loss: 0.042724609375|unsuper_loss: 0.0
+average reward score: 5.328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.75%) |Training time=0.44s (20.47%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.57
+epoch: 0|step: 7986|ppo_ep: 1|act_loss: 0.00634002685546875|cri_loss: 0.003490447998046875|unsuper_loss: 0.0
+average reward score: 5.75390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.31%) |Training time=0.47s (21.12%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.57
+epoch: 0|step: 7987|ppo_ep: 1|act_loss: -0.03533935546875|cri_loss: -0.0172882080078125|unsuper_loss: 0.0
+average reward score: 4.9921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.97%) |Training time=0.44s (20.34%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.57
+epoch: 0|step: 7988|ppo_ep: 1|act_loss: -0.023040771484375|cri_loss: -0.01068115234375|unsuper_loss: 0.0
+average reward score: 5.91015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.34%) |Training time=0.45s (20.95%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.57
+[2023-04-14 13:40:01,805] [INFO] [logging.py:96:log_dist] [Rank 0] step=7990, skipped=100, lr=[4.887150183007368e-08, 4.887150183007368e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:40:01,823] [INFO] [timer.py:199:stop] epoch=0/micro_step=7990/global_step=7990, RunningAvgSamplesPerSec=105.74197867872607, CurrSamplesPerSec=112.59064195032909, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:40:01,916] [INFO] [logging.py:96:log_dist] [Rank 0] step=7990, skipped=138, lr=[3.077914851215585e-08, 3.077914851215585e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7989|ppo_ep: 1|act_loss: -0.03289794921875|cri_loss: -0.016021728515625|unsuper_loss: 0.0
+average reward score: 4.76171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.52%) |Training time=0.45s (20.90%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.57
+epoch: 0|step: 7990|ppo_ep: 1|act_loss: -0.03704833984375|cri_loss: -0.016448974609375|unsuper_loss: 0.0
+average reward score: 5.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.88%) |Training time=0.44s (20.43%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.57
+epoch: 0|step: 7991|ppo_ep: 1|act_loss: 0.00550079345703125|cri_loss: 0.003116607666015625|unsuper_loss: 0.0
+average reward score: 5.703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.45s (20.87%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.57
+epoch: 0|step: 7992|ppo_ep: 1|act_loss: -0.0137481689453125|cri_loss: -0.006755828857421875|unsuper_loss: 0.0
+average reward score: 5.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.64%) |Training time=0.44s (20.66%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.57
+epoch: 0|step: 7993|ppo_ep: 1|act_loss: -0.005077362060546875|cri_loss: -0.00238800048828125|unsuper_loss: 0.0
+average reward score: 5.66015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.78%) |Training time=0.44s (20.63%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.57
+epoch: 0|step: 7994|ppo_ep: 1|act_loss: -0.0119781494140625|cri_loss: -0.005706787109375|unsuper_loss: 0.0
+average reward score: 6.09765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.62%) |Training time=0.44s (20.70%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.57
+epoch: 0|step: 7995|ppo_ep: 1|act_loss: 0.0263214111328125|cri_loss: 0.01381683349609375|unsuper_loss: 0.0
+average reward score: 6.078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.45s (20.87%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.57
+epoch: 0|step: 7996|ppo_ep: 1|act_loss: -0.0144805908203125|cri_loss: -0.007122039794921875|unsuper_loss: 0.0
+average reward score: 4.9921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.81%) |Training time=0.44s (20.48%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.57
+epoch: 0|step: 7997|ppo_ep: 1|act_loss: -0.007595062255859375|cri_loss: -0.003307342529296875|unsuper_loss: 0.0
+average reward score: 5.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.64%) |Training time=0.44s (20.69%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.57
+epoch: 0|step: 7998|ppo_ep: 1|act_loss: 0.001560211181640625|cri_loss: 0.0010843276977539062|unsuper_loss: 0.0
+average reward score: 5.5625
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.38%) |Training time=0.45s (19.30%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.80 |AvgSamplesPerSec=14.57
+[2023-04-14 13:40:23,385] [INFO] [logging.py:96:log_dist] [Rank 0] step=8000, skipped=100, lr=[4.6269671027135476e-08, 4.6269671027135476e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:40:23,403] [INFO] [timer.py:199:stop] epoch=0/micro_step=8000/global_step=8000, RunningAvgSamplesPerSec=105.75146589136898, CurrSamplesPerSec=110.35337026640816, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:40:23,496] [INFO] [logging.py:96:log_dist] [Rank 0] step=8000, skipped=138, lr=[2.9291773931551525e-08, 2.9291773931551525e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 7999|ppo_ep: 1|act_loss: 0.01483154296875|cri_loss: 0.007595062255859375|unsuper_loss: 0.0
+average reward score: 6.6484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.47%) |Training time=0.45s (21.00%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8000|ppo_ep: 1|act_loss: -0.00762176513671875|cri_loss: -0.00371551513671875|unsuper_loss: 0.0
+average reward score: 5.51953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.78%) |Training time=0.44s (20.52%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8001|ppo_ep: 1|act_loss: -0.013580322265625|cri_loss: -0.006587982177734375|unsuper_loss: 0.0
+average reward score: 5.49609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.74%) |Training time=0.44s (20.66%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8002|ppo_ep: 1|act_loss: -0.0023956298828125|cri_loss: -0.001026153564453125|unsuper_loss: 0.0
+average reward score: 5.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.99%) |Training time=0.46s (21.35%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8003|ppo_ep: 1|act_loss: -0.00656890869140625|cri_loss: -0.0019073486328125|unsuper_loss: 0.0
+average reward score: 5.62109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.36%) |Training time=0.48s (22.09%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8004|ppo_ep: 1|act_loss: -0.00583648681640625|cri_loss: -0.002777099609375|unsuper_loss: 0.0
+average reward score: 5.42578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (21.97%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8005|ppo_ep: 1|act_loss: -0.00774383544921875|cri_loss: -0.00351715087890625|unsuper_loss: 0.0
+average reward score: 5.71875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.28%) |Training time=0.48s (22.19%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8006|ppo_ep: 1|act_loss: 0.00832366943359375|cri_loss: 0.00452423095703125|unsuper_loss: 0.0
+average reward score: 4.890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.34%) |Training time=0.48s (22.02%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8007|ppo_ep: 1|act_loss: 0.0088043212890625|cri_loss: 0.0045166015625|unsuper_loss: 0.0
+average reward score: 5.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.23%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8008|ppo_ep: 1|act_loss: -0.0028076171875|cri_loss: -0.00128936767578125|unsuper_loss: 0.0
+average reward score: 6.09375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (21.95%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.57
+[2023-04-14 13:40:45,038] [INFO] [logging.py:96:log_dist] [Rank 0] step=8010, skipped=100, lr=[4.373867253573118e-08, 4.373867253573118e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:40:45,056] [INFO] [timer.py:199:stop] epoch=0/micro_step=8010/global_step=8010, RunningAvgSamplesPerSec=105.74922781707347, CurrSamplesPerSec=100.61894819858148, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:40:45,149] [INFO] [logging.py:96:log_dist] [Rank 0] step=8010, skipped=138, lr=[2.7841021207116736e-08, 2.7841021207116736e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 8009|ppo_ep: 1|act_loss: -0.0247039794921875|cri_loss: -0.01165771484375|unsuper_loss: 0.0
+average reward score: 5.078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.40%) |Training time=0.48s (22.11%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8010|ppo_ep: 1|act_loss: -0.025115966796875|cri_loss: -0.01239776611328125|unsuper_loss: 0.0
+average reward score: 5.34765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.95%) |Training time=0.49s (22.45%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8011|ppo_ep: 1|act_loss: 0.02227783203125|cri_loss: 0.01142120361328125|unsuper_loss: 0.0
+average reward score: 6.1171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (22.05%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8012|ppo_ep: 1|act_loss: -0.01776123046875|cri_loss: -0.00848388671875|unsuper_loss: 0.0
+average reward score: 5.2734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.26%) |Training time=0.48s (22.13%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8013|ppo_ep: 1|act_loss: -0.00836944580078125|cri_loss: -0.00402069091796875|unsuper_loss: 0.0
+average reward score: 5.55859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (21.99%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8014|ppo_ep: 1|act_loss: -0.02996826171875|cri_loss: -0.01462554931640625|unsuper_loss: 0.0
+average reward score: 4.5703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.12%) |Training time=0.48s (20.53%) |Others=0.10 (4.35%)|CurSamplesPerSec=13.65 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8015|ppo_ep: 1|act_loss: 0.020477294921875|cri_loss: 0.0107574462890625|unsuper_loss: 0.0
+average reward score: 5.6328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.93%) |Training time=0.47s (21.54%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8016|ppo_ep: 1|act_loss: -0.00269317626953125|cri_loss: -0.0012426376342773438|unsuper_loss: 0.0
+average reward score: 5.9140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.01%) |Training time=0.49s (21.61%) |Others=0.10 (4.38%)|CurSamplesPerSec=14.00 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8017|ppo_ep: 1|act_loss: 0.0018358230590820312|cri_loss: 0.0013628005981445312|unsuper_loss: 0.0
+average reward score: 5.3984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.06%) |Training time=0.49s (22.44%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8018|ppo_ep: 1|act_loss: -0.00238037109375|cri_loss: -0.0007963180541992188|unsuper_loss: 0.0
+average reward score: 5.1796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.33%) |Training time=0.48s (22.07%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.57
+[2023-04-14 13:41:07,086] [INFO] [logging.py:96:log_dist] [Rank 0] step=8020, skipped=100, lr=[4.127854387136491e-08, 4.127854387136491e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:41:07,104] [INFO] [timer.py:199:stop] epoch=0/micro_step=8020/global_step=8020, RunningAvgSamplesPerSec=105.74197847665799, CurrSamplesPerSec=100.35315457578353, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:41:07,197] [INFO] [logging.py:96:log_dist] [Rank 0] step=8020, skipped=138, lr=[2.6426911842506175e-08, 2.6426911842506175e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 8019|ppo_ep: 1|act_loss: -0.011383056640625|cri_loss: -0.00542449951171875|unsuper_loss: 0.0
+average reward score: 5.5859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.37%) |Training time=0.48s (22.14%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8020|ppo_ep: 1|act_loss: 0.0060577392578125|cri_loss: 0.003360748291015625|unsuper_loss: 0.0
+average reward score: 5.27734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.48s (22.00%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8021|ppo_ep: 1|act_loss: -0.01690673828125|cri_loss: -0.0083465576171875|unsuper_loss: 0.0
+average reward score: 5.0859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (21.95%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8022|ppo_ep: 1|act_loss: -0.04150390625|cri_loss: -0.02032470703125|unsuper_loss: 0.0
+average reward score: 6.12109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.46%) |Training time=0.48s (21.92%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8023|ppo_ep: 1|act_loss: -0.00159454345703125|cri_loss: -0.000701904296875|unsuper_loss: 0.0
+average reward score: 5.609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.94%) |Training time=0.49s (22.41%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8024|ppo_ep: 1|act_loss: -0.0018596649169921875|cri_loss: -0.0006575584411621094|unsuper_loss: 0.0
+average reward score: 5.93359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.03%) |Training time=0.49s (22.40%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.57
+[2023-04-14 13:41:20,232] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 8025|ppo_ep: 1|act_loss: 0.010040283203125|cri_loss: 0.00547027587890625|unsuper_loss: 0.0
+average reward score: 5.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.48s (22.45%) |Others=0.09 (4.06%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.57
+[2023-04-14 13:41:22,396] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 8026|ppo_ep: 1|act_loss: -0.029541015625|cri_loss: -0.0144195556640625|unsuper_loss: 0.0
+average reward score: 5.2109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.08%) |Training time=0.49s (22.80%) |Others=0.09 (4.12%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8027|ppo_ep: 1|act_loss: 0.00446319580078125|cri_loss: 0.0026836395263671875|unsuper_loss: 0.0
+average reward score: 5.82421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.80%) |Training time=0.49s (22.60%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8028|ppo_ep: 1|act_loss: -0.0071868896484375|cri_loss: -0.0032958984375|unsuper_loss: 0.0
+average reward score: 5.45703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.48s (22.27%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57
+[2023-04-14 13:41:28,805] [INFO] [logging.py:96:log_dist] [Rank 0] step=8030, skipped=100, lr=[3.888932149907788e-08, 3.888932149907788e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:41:28,824] [INFO] [timer.py:199:stop] epoch=0/micro_step=8030/global_step=8030, RunningAvgSamplesPerSec=105.73302555462291, CurrSamplesPerSec=96.34630702129675, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:41:28,916] [INFO] [logging.py:96:log_dist] [Rank 0] step=8030, skipped=140, lr=[2.5322021673613308e-08, 2.5322021673613308e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 8029|ppo_ep: 1|act_loss: 0.0107421875|cri_loss: 0.00566864013671875|unsuper_loss: 0.0
+average reward score: 5.796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.78%) |Training time=0.49s (22.73%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8030|ppo_ep: 1|act_loss: 0.00165557861328125|cri_loss: 0.0010852813720703125|unsuper_loss: 0.0
+average reward score: 5.5234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.68%) |Training time=0.49s (21.04%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8031|ppo_ep: 1|act_loss: -0.0274658203125|cri_loss: -0.01277923583984375|unsuper_loss: 0.0
+average reward score: 5.328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.05%) |Training time=0.49s (22.32%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8032|ppo_ep: 1|act_loss: -0.019622802734375|cri_loss: -0.009552001953125|unsuper_loss: 0.0
+average reward score: 5.890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.72%) |Training time=0.50s (22.70%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8033|ppo_ep: 1|act_loss: 0.005889892578125|cri_loss: 0.0031986236572265625|unsuper_loss: 0.0
+average reward score: 5.37109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.81%) |Training time=0.49s (22.57%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8034|ppo_ep: 1|act_loss: 0.02691650390625|cri_loss: 0.0142822265625|unsuper_loss: 0.0
+average reward score: 5.5703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.94%) |Training time=0.49s (22.44%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8035|ppo_ep: 1|act_loss: -0.005855560302734375|cri_loss: -0.0028533935546875|unsuper_loss: 0.0
+average reward score: 5.58203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.98%) |Training time=0.49s (22.49%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8036|ppo_ep: 1|act_loss: -0.0105743408203125|cri_loss: -0.0033416748046875|unsuper_loss: 0.0
+average reward score: 5.62109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.61%) |Training time=0.50s (22.79%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8037|ppo_ep: 1|act_loss: -0.0026702880859375|cri_loss: -0.001216888427734375|unsuper_loss: 0.0
+average reward score: 5.34765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.96%) |Training time=0.49s (22.41%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8038|ppo_ep: 1|act_loss: -0.0136566162109375|cri_loss: -0.006710052490234375|unsuper_loss: 0.0
+average reward score: 5.640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.75%) |Training time=0.49s (22.63%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.57
+[2023-04-14 13:41:50,718] [INFO] [logging.py:96:log_dist] [Rank 0] step=8040, skipped=100, lr=[3.657104083291008e-08, 3.657104083291008e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:41:50,736] [INFO] [timer.py:199:stop] epoch=0/micro_step=8040/global_step=8040, RunningAvgSamplesPerSec=105.72194107019268, CurrSamplesPerSec=98.03218566099805, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:41:50,828] [INFO] [logging.py:96:log_dist] [Rank 0] step=8040, skipped=140, lr=[2.3973922811987295e-08, 2.3973922811987295e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 8039|ppo_ep: 1|act_loss: 0.041595458984375|cri_loss: 0.0220184326171875|unsuper_loss: 0.0
+average reward score: 4.953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.84%) |Training time=0.49s (22.56%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8040|ppo_ep: 1|act_loss: -0.011566162109375|cri_loss: -0.0054473876953125|unsuper_loss: 0.0
+average reward score: 4.96875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.97%) |Training time=0.48s (22.39%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8041|ppo_ep: 1|act_loss: -0.003047943115234375|cri_loss: -0.0013294219970703125|unsuper_loss: 0.0
+average reward score: 5.26953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.72%) |Training time=0.49s (22.68%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8042|ppo_ep: 1|act_loss: -0.0085906982421875|cri_loss: -0.0038299560546875|unsuper_loss: 0.0
+average reward score: 5.80859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.76%) |Training time=0.49s (22.61%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8043|ppo_ep: 1|act_loss: 0.00794219970703125|cri_loss: 0.004283905029296875|unsuper_loss: 0.0
+average reward score: 5.86328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.88%) |Training time=0.49s (22.58%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8044|ppo_ep: 1|act_loss: -0.002685546875|cri_loss: -0.000995635986328125|unsuper_loss: 0.0
+average reward score: 6.1015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.66%) |Training time=0.50s (22.70%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8045|ppo_ep: 1|act_loss: -0.0005998611450195312|cri_loss: -0.00021529197692871094|unsuper_loss: 0.0
+average reward score: 5.3828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.28%) |Training time=0.51s (23.17%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.44 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8046|ppo_ep: 1|act_loss: 0.00278472900390625|cri_loss: 0.001537322998046875|unsuper_loss: 0.0
+average reward score: 5.6328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.75s (73.86%) |Training time=0.52s (21.89%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.54 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8047|ppo_ep: 1|act_loss: 0.0053863525390625|cri_loss: 0.0028438568115234375|unsuper_loss: 0.0
+average reward score: 5.63671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.77%) |Training time=0.49s (22.68%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8048|ppo_ep: 1|act_loss: 0.0013275146484375|cri_loss: 0.0008149147033691406|unsuper_loss: 0.0
+average reward score: 5.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.68%) |Training time=0.49s (22.74%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.57
+[2023-04-14 13:42:12,673] [INFO] [logging.py:96:log_dist] [Rank 0] step=8050, skipped=100, lr=[3.4323736235372594e-08, 3.4323736235372594e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:42:12,691] [INFO] [timer.py:199:stop] epoch=0/micro_step=8050/global_step=8050, RunningAvgSamplesPerSec=105.708434479618, CurrSamplesPerSec=97.58464677133455, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:42:12,784] [INFO] [logging.py:96:log_dist] [Rank 0] step=8050, skipped=140, lr=[2.266252462991253e-08, 2.266252462991253e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 8049|ppo_ep: 1|act_loss: 0.017578125|cri_loss: 0.0091705322265625|unsuper_loss: 0.0
+average reward score: 5.484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.73%) |Training time=0.49s (22.59%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8050|ppo_ep: 1|act_loss: -0.001239776611328125|cri_loss: -0.0003724098205566406|unsuper_loss: 0.0
+average reward score: 5.4375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.04%) |Training time=0.48s (22.33%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8051|ppo_ep: 1|act_loss: 0.0269927978515625|cri_loss: 0.0146484375|unsuper_loss: 0.0
+average reward score: 4.96484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.20%) |Training time=0.50s (22.84%) |Others=0.11 (4.96%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8052|ppo_ep: 1|act_loss: -0.0083770751953125|cri_loss: -0.004146575927734375|unsuper_loss: 0.0
+average reward score: 5.4921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.95%) |Training time=0.49s (22.41%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8053|ppo_ep: 1|act_loss: 0.001995086669921875|cri_loss: 0.0011777877807617188|unsuper_loss: 0.0
+average reward score: 5.80078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.95%) |Training time=0.49s (22.43%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8054|ppo_ep: 1|act_loss: -0.0205535888671875|cri_loss: -0.00970458984375|unsuper_loss: 0.0
+average reward score: 5.7421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.84%) |Training time=0.49s (22.56%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8055|ppo_ep: 1|act_loss: 0.00510406494140625|cri_loss: 0.0027561187744140625|unsuper_loss: 0.0
+average reward score: 5.0234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.99%) |Training time=0.49s (22.45%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8056|ppo_ep: 1|act_loss: -0.01055908203125|cri_loss: -0.004421234130859375|unsuper_loss: 0.0
+average reward score: 5.359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.79%) |Training time=0.49s (22.61%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8057|ppo_ep: 1|act_loss: 0.002025604248046875|cri_loss: 0.0011615753173828125|unsuper_loss: 0.0
+average reward score: 6.08984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.49s (22.42%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8058|ppo_ep: 1|act_loss: 0.00799560546875|cri_loss: 0.00431060791015625|unsuper_loss: 0.0
+average reward score: 5.31640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.71%) |Training time=0.49s (22.67%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.57
+[2023-04-14 13:42:34,383] [INFO] [logging.py:96:log_dist] [Rank 0] step=8060, skipped=100, lr=[3.21474410169414e-08, 3.21474410169414e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:42:34,401] [INFO] [timer.py:199:stop] epoch=0/micro_step=8060/global_step=8060, RunningAvgSamplesPerSec=105.69826892905085, CurrSamplesPerSec=98.67971046991659, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:42:34,494] [INFO] [logging.py:96:log_dist] [Rank 0] step=8060, skipped=140, lr=[2.1387846565474047e-08, 2.1387846565474047e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 8059|ppo_ep: 1|act_loss: 0.028778076171875|cri_loss: 0.014984130859375|unsuper_loss: 0.0
+average reward score: 5.05078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.89%) |Training time=0.49s (22.49%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8060|ppo_ep: 1|act_loss: -0.0341796875|cri_loss: -0.0166473388671875|unsuper_loss: 0.0
+average reward score: 4.65625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.64%) |Training time=0.50s (22.76%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8061|ppo_ep: 1|act_loss: 0.0007905960083007812|cri_loss: 0.0005269050598144531|unsuper_loss: 0.0
+average reward score: 4.9375
+-------------------------------------------------------------------------------------
+|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.25%) |Training time=0.51s (21.57%) |Others=0.10 (4.18%)|CurSamplesPerSec=13.60 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8062|ppo_ep: 1|act_loss: 0.0484619140625|cri_loss: 0.028228759765625|unsuper_loss: 0.0
+average reward score: 5.69140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.78%) |Training time=0.49s (22.61%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8063|ppo_ep: 1|act_loss: 0.00508880615234375|cri_loss: 0.0027179718017578125|unsuper_loss: 0.0
+average reward score: 5.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.90%) |Training time=0.49s (22.59%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8064|ppo_ep: 1|act_loss: -0.04681396484375|cri_loss: -0.0221099853515625|unsuper_loss: 0.0
+average reward score: 5.44921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.33%) |Training time=0.50s (23.00%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8065|ppo_ep: 1|act_loss: 0.001220703125|cri_loss: 0.001300811767578125|unsuper_loss: 0.0
+average reward score: 5.08203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.44%) |Training time=0.50s (23.06%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8066|ppo_ep: 1|act_loss: -0.002658843994140625|cri_loss: -0.0011577606201171875|unsuper_loss: 0.0
+average reward score: 5.1796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.52%) |Training time=0.50s (22.90%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8067|ppo_ep: 1|act_loss: 0.01605224609375|cri_loss: 0.0081939697265625|unsuper_loss: 0.0
+average reward score: 4.80859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.76%) |Training time=0.49s (22.70%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8068|ppo_ep: 1|act_loss: -0.0056304931640625|cri_loss: -0.0023651123046875|unsuper_loss: 0.0
+average reward score: 5.29296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.47%) |Training time=0.50s (22.94%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.57
+[2023-04-14 13:42:56,307] [INFO] [logging.py:96:log_dist] [Rank 0] step=8070, skipped=100, lr=[3.004218743556186e-08, 3.004218743556186e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:42:56,325] [INFO] [timer.py:199:stop] epoch=0/micro_step=8070/global_step=8070, RunningAvgSamplesPerSec=105.68438726030831, CurrSamplesPerSec=97.16916567905542, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:42:56,418] [INFO] [logging.py:96:log_dist] [Rank 0] step=8070, skipped=140, lr=[2.0149907512475585e-08, 2.0149907512475585e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 8069|ppo_ep: 1|act_loss: -0.00200653076171875|cri_loss: -0.0005998611450195312|unsuper_loss: 0.0
+average reward score: 5.7265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.76%) |Training time=0.49s (22.72%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8070|ppo_ep: 1|act_loss: -0.019775390625|cri_loss: -0.00946807861328125|unsuper_loss: 0.0
+average reward score: 4.7109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.28%) |Training time=0.50s (23.13%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8071|ppo_ep: 1|act_loss: 0.01132965087890625|cri_loss: 0.00701141357421875|unsuper_loss: 0.0
+average reward score: 5.6640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.89%) |Training time=0.49s (22.59%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8072|ppo_ep: 1|act_loss: -0.01421356201171875|cri_loss: -0.00688934326171875|unsuper_loss: 0.0
+average reward score: 5.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.46%) |Training time=0.50s (22.98%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8073|ppo_ep: 1|act_loss: -0.00011110305786132812|cri_loss: 0.00040340423583984375|unsuper_loss: 0.0
+average reward score: 4.9453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.14%) |Training time=0.51s (23.36%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8074|ppo_ep: 1|act_loss: -0.022796630859375|cri_loss: -0.0111541748046875|unsuper_loss: 0.0
+average reward score: 5.24609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.73%) |Training time=0.50s (22.76%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.44 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8075|ppo_ep: 1|act_loss: -0.002719879150390625|cri_loss: -0.0008921623229980469|unsuper_loss: 0.0
+average reward score: 4.9921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.50s |Gather latency=0.00s (0.00%) |Generate time=1.85s (73.98%) |Training time=0.55s (22.10%) |Others=0.10 (3.92%)|CurSamplesPerSec=12.79 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8076|ppo_ep: 1|act_loss: -0.0154876708984375|cri_loss: -0.0076904296875|unsuper_loss: 0.0
+average reward score: 5.69921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.80%) |Training time=0.49s (22.59%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8077|ppo_ep: 1|act_loss: 0.019439697265625|cri_loss: 0.00988006591796875|unsuper_loss: 0.0
+average reward score: 5.3046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.21%) |Training time=0.51s (23.16%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8078|ppo_ep: 1|act_loss: 0.006072998046875|cri_loss: 0.003635406494140625|unsuper_loss: 0.0
+average reward score: 5.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.59%) |Training time=0.50s (22.81%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.57
+[2023-04-14 13:43:18,464] [INFO] [logging.py:96:log_dist] [Rank 0] step=8080, skipped=100, lr=[2.8008006696170348e-08, 2.8008006696170348e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:43:18,483] [INFO] [timer.py:199:stop] epoch=0/micro_step=8080/global_step=8080, RunningAvgSamplesPerSec=105.6675462528966, CurrSamplesPerSec=98.04500416380557, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:43:18,575] [INFO] [logging.py:96:log_dist] [Rank 0] step=8080, skipped=140, lr=[1.8948725820160663e-08, 1.8948725820160663e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 8079|ppo_ep: 1|act_loss: -0.007293701171875|cri_loss: -0.003353118896484375|unsuper_loss: 0.0
+average reward score: 5.703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.15%) |Training time=0.49s (22.37%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8080|ppo_ep: 1|act_loss: -0.01055908203125|cri_loss: -0.0049285888671875|unsuper_loss: 0.0
+average reward score: 5.83984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.69%) |Training time=0.49s (22.70%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8081|ppo_ep: 1|act_loss: -0.005428314208984375|cri_loss: -0.00260162353515625|unsuper_loss: 0.0
+average reward score: 5.2734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.13%) |Training time=0.48s (22.21%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8082|ppo_ep: 1|act_loss: 0.005420684814453125|cri_loss: 0.003185272216796875|unsuper_loss: 0.0
+average reward score: 5.0625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.62%) |Training time=0.50s (22.77%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8083|ppo_ep: 1|act_loss: 0.006107330322265625|cri_loss: 0.003162384033203125|unsuper_loss: 0.0
+average reward score: 5.3359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.89%) |Training time=0.49s (22.46%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8084|ppo_ep: 1|act_loss: -0.0063629150390625|cri_loss: -0.003002166748046875|unsuper_loss: 0.0
+average reward score: 5.45703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.14%) |Training time=0.48s (22.22%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8085|ppo_ep: 1|act_loss: -0.0124359130859375|cri_loss: -0.005931854248046875|unsuper_loss: 0.0
+average reward score: 5.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.02%) |Training time=0.48s (22.37%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8086|ppo_ep: 1|act_loss: -0.008056640625|cri_loss: -0.003505706787109375|unsuper_loss: 0.0
+average reward score: 5.55078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.02%) |Training time=0.48s (22.34%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8087|ppo_ep: 1|act_loss: 0.002338409423828125|cri_loss: 0.0013360977172851562|unsuper_loss: 0.0
+average reward score: 4.47265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.10%) |Training time=0.48s (22.26%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8088|ppo_ep: 1|act_loss: 0.05010986328125|cri_loss: 0.0265655517578125|unsuper_loss: 0.0
+average reward score: 5.2109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.98%) |Training time=0.48s (22.37%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.57
+[2023-04-14 13:43:40,120] [INFO] [logging.py:96:log_dist] [Rank 0] step=8090, skipped=100, lr=[2.6044928950231964e-08, 2.6044928950231964e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:43:40,139] [INFO] [timer.py:199:stop] epoch=0/micro_step=8090/global_step=8090, RunningAvgSamplesPerSec=105.65891652113257, CurrSamplesPerSec=100.38415276719263, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:43:40,231] [INFO] [logging.py:96:log_dist] [Rank 0] step=8090, skipped=140, lr=[1.7784319292939166e-08, 1.7784319292939166e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 8089|ppo_ep: 1|act_loss: -0.00270843505859375|cri_loss: -0.0010089874267578125|unsuper_loss: 0.0
+average reward score: 5.5
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.31%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8090|ppo_ep: 1|act_loss: 0.007781982421875|cri_loss: 0.0042724609375|unsuper_loss: 0.0
+average reward score: 6.19140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.13%) |Training time=0.48s (20.54%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.77 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8091|ppo_ep: 1|act_loss: -0.00762939453125|cri_loss: -0.0021915435791015625|unsuper_loss: 0.0
+average reward score: 5.71484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.94%) |Training time=0.48s (22.42%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8092|ppo_ep: 1|act_loss: 0.00388336181640625|cri_loss: 0.001983642578125|unsuper_loss: 0.0
+average reward score: 5.1875
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.66%) |Training time=0.49s (22.72%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8093|ppo_ep: 1|act_loss: 0.05047607421875|cri_loss: 0.0269012451171875|unsuper_loss: 0.0
+average reward score: 5.1171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.06%) |Training time=0.48s (22.31%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8094|ppo_ep: 1|act_loss: 0.019500732421875|cri_loss: 0.0110321044921875|unsuper_loss: 0.0
+average reward score: 4.95703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.86%) |Training time=0.49s (22.55%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8095|ppo_ep: 1|act_loss: -0.018035888671875|cri_loss: -0.0085296630859375|unsuper_loss: 0.0
+average reward score: 5.91015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.12%) |Training time=0.48s (22.33%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8096|ppo_ep: 1|act_loss: 0.0235137939453125|cri_loss: 0.01200103759765625|unsuper_loss: 0.0
+average reward score: 5.66796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.96%) |Training time=0.49s (22.41%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8097|ppo_ep: 1|act_loss: -0.021636962890625|cri_loss: -0.010711669921875|unsuper_loss: 0.0
+average reward score: 5.75390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.81%) |Training time=0.49s (22.59%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8098|ppo_ep: 1|act_loss: -0.033905029296875|cri_loss: -0.015106201171875|unsuper_loss: 0.0
+average reward score: 5.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.64%) |Training time=0.49s (22.75%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57
+[2023-04-14 13:44:01,948] [INFO] [logging.py:96:log_dist] [Rank 0] step=8100, skipped=100, lr=[2.4152983295295917e-08, 2.4152983295295917e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:44:01,966] [INFO] [timer.py:199:stop] epoch=0/micro_step=8100/global_step=8100, RunningAvgSamplesPerSec=105.64966874876379, CurrSamplesPerSec=98.67942026513467, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:44:02,060] [INFO] [logging.py:96:log_dist] [Rank 0] step=8100, skipped=140, lr=[1.6656705190125078e-08, 1.6656705190125078e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 8099|ppo_ep: 1|act_loss: -0.01568603515625|cri_loss: -0.007781982421875|unsuper_loss: 0.0
+average reward score: 5.01953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.98%) |Training time=0.49s (22.46%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8100|ppo_ep: 1|act_loss: 0.00882720947265625|cri_loss: 0.004547119140625|unsuper_loss: 0.0
+average reward score: 5.125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.87%) |Training time=0.49s (22.53%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8101|ppo_ep: 1|act_loss: -0.002315521240234375|cri_loss: -0.0010395050048828125|unsuper_loss: 0.0
+average reward score: 5.515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.99%) |Training time=0.49s (22.47%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8102|ppo_ep: 1|act_loss: -0.025299072265625|cri_loss: -0.0122222900390625|unsuper_loss: 0.0
+average reward score: 5.66015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.90%) |Training time=0.49s (22.47%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8103|ppo_ep: 1|act_loss: -0.0008783340454101562|cri_loss: -0.00017642974853515625|unsuper_loss: 0.0
+average reward score: 5.28125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.15%) |Training time=0.48s (22.24%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8104|ppo_ep: 1|act_loss: -0.03424072265625|cri_loss: -0.015899658203125|unsuper_loss: 0.0
+average reward score: 5.86328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.69s (73.86%) |Training time=0.50s (21.75%) |Others=0.10 (4.39%)|CurSamplesPerSec=14.01 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8105|ppo_ep: 1|act_loss: 0.029693603515625|cri_loss: 0.0153961181640625|unsuper_loss: 0.0
+average reward score: 5.34375
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.01%) |Training time=0.48s (20.76%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8106|ppo_ep: 1|act_loss: 0.016998291015625|cri_loss: 0.00864410400390625|unsuper_loss: 0.0
+average reward score: 5.56640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.04%) |Training time=0.50s (22.85%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8107|ppo_ep: 1|act_loss: 0.0084686279296875|cri_loss: 0.00434112548828125|unsuper_loss: 0.0
+average reward score: 4.7890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.73%) |Training time=0.50s (22.74%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8108|ppo_ep: 1|act_loss: -0.0159454345703125|cri_loss: -0.00785064697265625|unsuper_loss: 0.0
+average reward score: 5.76953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.08%) |Training time=0.49s (22.31%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.57
+[2023-04-14 13:44:23,944] [INFO] [logging.py:96:log_dist] [Rank 0] step=8110, skipped=100, lr=[2.233219777456001e-08, 2.233219777456001e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:44:23,962] [INFO] [timer.py:199:stop] epoch=0/micro_step=8110/global_step=8110, RunningAvgSamplesPerSec=105.63997826562213, CurrSamplesPerSec=102.96742378386755, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:44:24,059] [INFO] [logging.py:96:log_dist] [Rank 0] step=8110, skipped=140, lr=[1.556590022567972e-08, 1.556590022567972e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 8109|ppo_ep: 1|act_loss: -0.01812744140625|cri_loss: -0.008941650390625|unsuper_loss: 0.0
+average reward score: 4.953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.41%) |Training time=0.48s (21.83%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8110|ppo_ep: 1|act_loss: 0.00665283203125|cri_loss: 0.0033893585205078125|unsuper_loss: 0.0
+average reward score: 4.83984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.23%) |Training time=0.42s (19.02%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8111|ppo_ep: 1|act_loss: -0.00710296630859375|cri_loss: -0.0033931732177734375|unsuper_loss: 0.0
+average reward score: 5.75
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.18%) |Training time=0.46s (21.15%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8112|ppo_ep: 1|act_loss: -0.0011281967163085938|cri_loss: -6.67572021484375e-05|unsuper_loss: 0.0
+average reward score: 5.9609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.47%) |Training time=0.45s (20.86%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8113|ppo_ep: 1|act_loss: 0.0004973411560058594|cri_loss: 0.00041937828063964844|unsuper_loss: 0.0
+average reward score: 5.05859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.39%) |Training time=0.45s (20.92%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8114|ppo_ep: 1|act_loss: 0.004322052001953125|cri_loss: 0.0022449493408203125|unsuper_loss: 0.0
+average reward score: 5.9609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.50%) |Training time=0.45s (20.83%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8115|ppo_ep: 1|act_loss: -0.01165008544921875|cri_loss: -0.005748748779296875|unsuper_loss: 0.0
+average reward score: 5.3203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.30%) |Training time=0.44s (20.13%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8116|ppo_ep: 1|act_loss: 0.04052734375|cri_loss: 0.0211029052734375|unsuper_loss: 0.0
+average reward score: 6.1953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.07%) |Training time=0.46s (21.28%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8117|ppo_ep: 1|act_loss: 0.00750732421875|cri_loss: 0.00391387939453125|unsuper_loss: 0.0
+average reward score: 4.9765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.21%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8118|ppo_ep: 1|act_loss: 0.0135040283203125|cri_loss: 0.006977081298828125|unsuper_loss: 0.0
+average reward score: 4.80859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.13%) |Training time=0.46s (21.22%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.57
+[2023-04-14 13:44:45,867] [INFO] [logging.py:96:log_dist] [Rank 0] step=8120, skipped=100, lr=[2.0582599376458696e-08, 2.0582599376458696e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:44:45,885] [INFO] [timer.py:199:stop] epoch=0/micro_step=8120/global_step=8120, RunningAvgSamplesPerSec=105.64460919016575, CurrSamplesPerSec=96.58647737641874, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:44:45,978] [INFO] [logging.py:96:log_dist] [Rank 0] step=8120, skipped=140, lr=[1.4511920567963911e-08, 1.4511920567963911e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 8119|ppo_ep: 1|act_loss: -0.017852783203125|cri_loss: -0.00876617431640625|unsuper_loss: 0.0
+average reward score: 6.0859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.77s (74.94%) |Training time=0.49s (20.90%) |Others=0.10 (4.16%)|CurSamplesPerSec=13.52 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8120|ppo_ep: 1|act_loss: 0.01031494140625|cri_loss: 0.00534820556640625|unsuper_loss: 0.0
+average reward score: 5.6796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.02%) |Training time=0.46s (21.33%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8121|ppo_ep: 1|act_loss: -0.0057525634765625|cri_loss: -0.002651214599609375|unsuper_loss: 0.0
+average reward score: 4.7109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.90%) |Training time=0.47s (21.57%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8122|ppo_ep: 1|act_loss: 0.0091400146484375|cri_loss: 0.0060882568359375|unsuper_loss: 0.0
+average reward score: 5.5703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.90%) |Training time=0.47s (21.43%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8123|ppo_ep: 1|act_loss: -0.00714874267578125|cri_loss: -0.0032329559326171875|unsuper_loss: 0.0
+average reward score: 5.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.29%) |Training time=0.46s (21.15%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8124|ppo_ep: 1|act_loss: -0.0182342529296875|cri_loss: -0.00887298583984375|unsuper_loss: 0.0
+average reward score: 5.13671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.60%) |Training time=0.47s (21.75%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8125|ppo_ep: 1|act_loss: -0.00010943412780761719|cri_loss: 7.2479248046875e-05|unsuper_loss: 0.0
+average reward score: 5.25
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.43%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8126|ppo_ep: 1|act_loss: -0.00679779052734375|cri_loss: -0.0029354095458984375|unsuper_loss: 0.0
+average reward score: 5.609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.56%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57
+[2023-04-14 13:45:03,323] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 8127|ppo_ep: 1|act_loss: -0.011077880859375|cri_loss: -0.005207061767578125|unsuper_loss: 0.0
+average reward score: 5.70703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.86%) |Training time=0.48s (22.07%) |Others=0.09 (4.07%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.57
+[2023-04-14 13:45:05,485] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 8128|ppo_ep: 1|act_loss: -0.0008001327514648438|cri_loss: -0.0003199577331542969|unsuper_loss: 0.0
+average reward score: 5.44921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.48s (22.28%) |Others=0.09 (4.15%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.57
+[2023-04-14 13:45:07,544] [INFO] [logging.py:96:log_dist] [Rank 0] step=8130, skipped=100, lr=[1.8904214034259727e-08, 1.8904214034259727e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:45:07,562] [INFO] [timer.py:199:stop] epoch=0/micro_step=8130/global_step=8130, RunningAvgSamplesPerSec=105.6427885351493, CurrSamplesPerSec=100.87728193430314, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:45:07,655] [INFO] [logging.py:96:log_dist] [Rank 0] step=8130, skipped=142, lr=[1.3695261579316776e-08, 1.3695261579316776e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 8129|ppo_ep: 1|act_loss: 0.004642486572265625|cri_loss: 0.002925872802734375|unsuper_loss: 0.0
+average reward score: 5.61328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.30%) |Training time=0.48s (22.10%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8130|ppo_ep: 1|act_loss: 0.00099945068359375|cri_loss: 0.0006532669067382812|unsuper_loss: 0.0
+average reward score: 6.16015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.84%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8131|ppo_ep: 1|act_loss: -0.02508544921875|cri_loss: -0.01206207275390625|unsuper_loss: 0.0
+average reward score: 5.6640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.33%) |Training time=0.48s (22.14%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8132|ppo_ep: 1|act_loss: -0.0108642578125|cri_loss: -0.0050048828125|unsuper_loss: 0.0
+average reward score: 5.92578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.32%) |Training time=0.48s (22.07%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8133|ppo_ep: 1|act_loss: 0.007167816162109375|cri_loss: 0.003833770751953125|unsuper_loss: 0.0
+average reward score: 6.10546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.46%) |Training time=0.49s (22.09%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8134|ppo_ep: 1|act_loss: -0.0309295654296875|cri_loss: -0.015045166015625|unsuper_loss: 0.0
+average reward score: 5.8046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.51s |Gather latency=0.00s (0.00%) |Generate time=1.87s (74.40%) |Training time=0.54s (21.60%) |Others=0.10 (4.00%)|CurSamplesPerSec=12.74 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8135|ppo_ep: 1|act_loss: 0.0028095245361328125|cri_loss: 0.0014448165893554688|unsuper_loss: 0.0
+average reward score: 5.09765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.90%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8136|ppo_ep: 1|act_loss: 0.01763916015625|cri_loss: 0.00968170166015625|unsuper_loss: 0.0
+average reward score: 5.53515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (21.94%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8137|ppo_ep: 1|act_loss: -0.00473785400390625|cri_loss: -0.0020809173583984375|unsuper_loss: 0.0
+average reward score: 5.53125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.85%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8138|ppo_ep: 1|act_loss: -0.018798828125|cri_loss: -0.00905609130859375|unsuper_loss: 0.0
+average reward score: 5.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.34%) |Training time=0.48s (22.02%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57
+[2023-04-14 13:45:29,625] [INFO] [logging.py:96:log_dist] [Rank 0] step=8140, skipped=100, lr=[1.729706662568434e-08, 1.729706662568434e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:45:29,643] [INFO] [timer.py:199:stop] epoch=0/micro_step=8140/global_step=8140, RunningAvgSamplesPerSec=105.63444801584032, CurrSamplesPerSec=99.91842909073584, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:45:29,738] [INFO] [logging.py:96:log_dist] [Rank 0] step=8140, skipped=142, lr=[1.2707606475514667e-08, 1.2707606475514667e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 8139|ppo_ep: 1|act_loss: -0.0002751350402832031|cri_loss: 0.00021839141845703125|unsuper_loss: 0.0
+average reward score: 5.34765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.16%) |Training time=0.48s (22.23%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8140|ppo_ep: 1|act_loss: 0.0008211135864257812|cri_loss: 0.0006456375122070312|unsuper_loss: 0.0
+average reward score: 6.015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.93%) |Training time=0.49s (22.47%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8141|ppo_ep: 1|act_loss: -0.0084075927734375|cri_loss: -0.003559112548828125|unsuper_loss: 0.0
+average reward score: 5.5234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.13%) |Training time=0.49s (22.37%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8142|ppo_ep: 1|act_loss: -0.009246826171875|cri_loss: -0.00395965576171875|unsuper_loss: 0.0
+average reward score: 5.5390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.60%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8143|ppo_ep: 1|act_loss: -0.019439697265625|cri_loss: -0.00914764404296875|unsuper_loss: 0.0
+average reward score: 5.92578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.73%) |Training time=0.47s (21.72%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8144|ppo_ep: 1|act_loss: -0.02069091796875|cri_loss: -0.01020050048828125|unsuper_loss: 0.0
+average reward score: 6.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.69%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8145|ppo_ep: 1|act_loss: -0.01068115234375|cri_loss: -0.005199432373046875|unsuper_loss: 0.0
+average reward score: 5.25390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.51%) |Training time=0.47s (21.94%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8146|ppo_ep: 1|act_loss: -0.0067138671875|cri_loss: -0.0033111572265625|unsuper_loss: 0.0
+average reward score: 5.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.35%) |Training time=0.48s (21.87%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8147|ppo_ep: 1|act_loss: 0.0004782676696777344|cri_loss: 0.0006284713745117188|unsuper_loss: 0.0
+average reward score: 5.4765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.86%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8148|ppo_ep: 1|act_loss: -0.023040771484375|cri_loss: -0.01136016845703125|unsuper_loss: 0.0
+average reward score: 5.5234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.60%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.57
+[2023-04-14 13:45:51,466] [INFO] [logging.py:96:log_dist] [Rank 0] step=8150, skipped=100, lr=[1.5761180972532812e-08, 1.5761180972532812e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:45:51,484] [INFO] [timer.py:199:stop] epoch=0/micro_step=8150/global_step=8150, RunningAvgSamplesPerSec=105.63041590312197, CurrSamplesPerSec=103.20169652866737, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:45:51,577] [INFO] [logging.py:96:log_dist] [Rank 0] step=8150, skipped=142, lr=[1.1756819045251055e-08, 1.1756819045251055e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 8149|ppo_ep: 1|act_loss: -0.011749267578125|cri_loss: -0.005733489990234375|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.48%) |Training time=0.47s (20.33%) |Others=0.10 (4.19%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8150|ppo_ep: 1|act_loss: -0.01499176025390625|cri_loss: -0.007411956787109375|unsuper_loss: 0.0
+average reward score: 5.0546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.83%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8151|ppo_ep: 1|act_loss: 0.0272369384765625|cri_loss: 0.0148773193359375|unsuper_loss: 0.0
+average reward score: 4.75
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.80%) |Training time=0.47s (21.65%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8152|ppo_ep: 1|act_loss: -0.00673675537109375|cri_loss: -0.0031871795654296875|unsuper_loss: 0.0
+average reward score: 5.1484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.77%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8153|ppo_ep: 1|act_loss: -0.01482391357421875|cri_loss: -0.00731658935546875|unsuper_loss: 0.0
+average reward score: 5.5078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.66%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8154|ppo_ep: 1|act_loss: -0.015289306640625|cri_loss: -0.00749969482421875|unsuper_loss: 0.0
+average reward score: 4.94921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.73%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57
+[2023-04-14 13:46:04,438] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, but hysteresis is 2. Reducing hysteresis to 1
+epoch: 0|step: 8155|ppo_ep: 1|act_loss: -0.00965118408203125|cri_loss: -0.004589080810546875|unsuper_loss: 0.0
+average reward score: 5.765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.54%) |Training time=0.45s (20.90%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8156|ppo_ep: 1|act_loss: -0.015777587890625|cri_loss: -0.0076141357421875|unsuper_loss: 0.0
+average reward score: 5.296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.79%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8157|ppo_ep: 1|act_loss: 0.0056304931640625|cri_loss: 0.00444793701171875|unsuper_loss: 0.0
+average reward score: 5.64453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.79%) |Training time=0.47s (21.67%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8158|ppo_ep: 1|act_loss: -0.0048980712890625|cri_loss: -0.0023345947265625|unsuper_loss: 0.0
+average reward score: 6.34375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.72%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.57
+[2023-04-14 13:46:13,063] [INFO] [logging.py:96:log_dist] [Rank 0] step=8160, skipped=101, lr=[1.4439831523126524e-08, 1.4439831523126524e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:46:13,082] [INFO] [timer.py:199:stop] epoch=0/micro_step=8160/global_step=8160, RunningAvgSamplesPerSec=105.62953923713883, CurrSamplesPerSec=105.34076847791707, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:46:13,174] [INFO] [logging.py:96:log_dist] [Rank 0] step=8160, skipped=142, lr=[1.0842913381488851e-08, 1.0842913381488851e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 8159|ppo_ep: 1|act_loss: 0.0166015625|cri_loss: 0.00849151611328125|unsuper_loss: 0.0
+average reward score: 5.328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.85%) |Training time=0.47s (21.62%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8160|ppo_ep: 1|act_loss: -0.001918792724609375|cri_loss: -0.000820159912109375|unsuper_loss: 0.0
+average reward score: 6.1953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.74%) |Training time=0.47s (21.61%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8161|ppo_ep: 1|act_loss: -0.012054443359375|cri_loss: -0.005950927734375|unsuper_loss: 0.0
+average reward score: 5.27734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.83%) |Training time=0.47s (21.59%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.57
+[2023-04-14 13:46:19,563] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192
+epoch: 0|step: 8162|ppo_ep: 1|act_loss: -0.01099395751953125|cri_loss: -0.00539398193359375|unsuper_loss: 0.0
+average reward score: 5.16015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.78%) |Training time=0.45s (20.61%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8163|ppo_ep: 1|act_loss: 0.00798797607421875|cri_loss: 0.00482177734375|unsuper_loss: 0.0
+average reward score: 5.25
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.96%) |Training time=0.47s (20.67%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.18 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8164|ppo_ep: 1|act_loss: 0.0006723403930664062|cri_loss: 0.0004429817199707031|unsuper_loss: 0.0
+average reward score: 5.8203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.62%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8165|ppo_ep: 1|act_loss: 0.01507568359375|cri_loss: 0.007755279541015625|unsuper_loss: 0.0
+average reward score: 6.05078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.57%) |Training time=0.47s (20.21%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8166|ppo_ep: 1|act_loss: -0.0082244873046875|cri_loss: -0.003971099853515625|unsuper_loss: 0.0
+average reward score: 5.6328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.58%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8167|ppo_ep: 1|act_loss: -0.002979278564453125|cri_loss: -0.0011272430419921875|unsuper_loss: 0.0
+average reward score: 5.25390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.28%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8168|ppo_ep: 1|act_loss: -0.0156402587890625|cri_loss: -0.007701873779296875|unsuper_loss: 0.0
+average reward score: 4.68359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.21%) |Training time=0.45s (21.11%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.57
+[2023-04-14 13:46:34,899] [INFO] [logging.py:96:log_dist] [Rank 0] step=8170, skipped=102, lr=[1.3176238413572798e-08, 1.3176238413572798e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:46:34,917] [INFO] [timer.py:199:stop] epoch=0/micro_step=8170/global_step=8170, RunningAvgSamplesPerSec=105.63095062148561, CurrSamplesPerSec=107.79319611192628, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:46:35,010] [INFO] [logging.py:96:log_dist] [Rank 0] step=8170, skipped=142, lr=[9.965903030514923e-09, 9.965903030514923e-09], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 8169|ppo_ep: 1|act_loss: -0.03314208984375|cri_loss: -0.0162353515625|unsuper_loss: 0.0
+average reward score: 5.2265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.10%) |Training time=0.46s (21.35%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8170|ppo_ep: 1|act_loss: -0.05389404296875|cri_loss: -0.0263214111328125|unsuper_loss: 0.0
+average reward score: 5.55078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.31%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8171|ppo_ep: 1|act_loss: -0.01548004150390625|cri_loss: -0.0076446533203125|unsuper_loss: 0.0
+average reward score: 5.86328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.19%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8172|ppo_ep: 1|act_loss: -0.0423583984375|cri_loss: -0.02008056640625|unsuper_loss: 0.0
+average reward score: 5.37109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.60%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8173|ppo_ep: 1|act_loss: -0.0012054443359375|cri_loss: 0.003978729248046875|unsuper_loss: 0.0
+average reward score: 5.578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.08%) |Training time=0.46s (21.35%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8174|ppo_ep: 1|act_loss: -0.005931854248046875|cri_loss: -0.0025997161865234375|unsuper_loss: 0.0
+average reward score: 5.34375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.46%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8175|ppo_ep: 1|act_loss: -0.0185089111328125|cri_loss: -0.00872039794921875|unsuper_loss: 0.0
+average reward score: 5.13671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.66%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8176|ppo_ep: 1|act_loss: -0.0109405517578125|cri_loss: -0.005306243896484375|unsuper_loss: 0.0
+average reward score: 5.3203125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.12%) |Training time=0.46s (21.18%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8177|ppo_ep: 1|act_loss: -0.01474761962890625|cri_loss: -0.0072021484375|unsuper_loss: 0.0
+average reward score: 5.04296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.31%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8178|ppo_ep: 1|act_loss: 0.027130126953125|cri_loss: 0.0140533447265625|unsuper_loss: 0.0
+average reward score: 5.859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.49%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.57
+[2023-04-14 13:46:56,452] [INFO] [logging.py:96:log_dist] [Rank 0] step=8180, skipped=102, lr=[1.1840003415331683e-08, 1.1840003415331683e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:46:56,470] [INFO] [timer.py:199:stop] epoch=0/micro_step=8180/global_step=8180, RunningAvgSamplesPerSec=105.63260714351426, CurrSamplesPerSec=106.2880881995418, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:46:56,607] [INFO] [logging.py:96:log_dist] [Rank 0] step=8180, skipped=142, lr=[9.12580099173832e-09, 9.12580099173832e-09], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 8179|ppo_ep: 1|act_loss: -0.0120086669921875|cri_loss: -0.0056304931640625|unsuper_loss: 0.0
+average reward score: 5.91015625
+-------------------------------------------------------------------------------------
+|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.47%) |Training time=0.47s (21.36%) |Others=0.14 (6.17%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8180|ppo_ep: 1|act_loss: -0.042755126953125|cri_loss: -0.01361846923828125|unsuper_loss: 0.0
+average reward score: 5.2578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.68s (74.74%) |Training time=0.46s (20.48%) |Others=0.11 (4.79%)|CurSamplesPerSec=14.19 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8181|ppo_ep: 1|act_loss: -0.00039386749267578125|cri_loss: 0.00017213821411132812|unsuper_loss: 0.0
+average reward score: 5.00390625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.02%) |Training time=0.46s (21.35%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8182|ppo_ep: 1|act_loss: -0.012725830078125|cri_loss: -0.00620269775390625|unsuper_loss: 0.0
+average reward score: 5.265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.70%) |Training time=0.47s (21.63%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8183|ppo_ep: 1|act_loss: 0.03289794921875|cri_loss: 0.0170745849609375|unsuper_loss: 0.0
+average reward score: 5.02734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.47s (21.54%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.57
+[2023-04-14 13:47:07,378] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096
+epoch: 0|step: 8184|ppo_ep: 1|act_loss: 0.01544189453125|cri_loss: 0.00815582275390625|unsuper_loss: 0.0
+average reward score: 4.6796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.25%) |Training time=0.45s (21.10%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8185|ppo_ep: 1|act_loss: -0.02349853515625|cri_loss: -0.0108642578125|unsuper_loss: 0.0
+average reward score: 5.234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.70%) |Training time=0.46s (21.51%) |Others=0.10 (4.79%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8186|ppo_ep: 1|act_loss: -0.00730133056640625|cri_loss: -0.0034046173095703125|unsuper_loss: 0.0
+average reward score: 4.81640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.09%) |Training time=0.46s (21.23%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8187|ppo_ep: 1|act_loss: -0.01087188720703125|cri_loss: -0.00518035888671875|unsuper_loss: 0.0
+average reward score: 5.4453125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.24%) |Training time=0.45s (21.19%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8188|ppo_ep: 1|act_loss: 0.011016845703125|cri_loss: 0.005649566650390625|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.19%) |Training time=0.45s (21.15%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.57
+[2023-04-14 13:47:18,096] [INFO] [logging.py:96:log_dist] [Rank 0] step=8190, skipped=103, lr=[1.0698389333422932e-08, 1.0698389333422932e-08], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:47:18,114] [INFO] [timer.py:199:stop] epoch=0/micro_step=8190/global_step=8190, RunningAvgSamplesPerSec=105.63530852231541, CurrSamplesPerSec=109.11442656642257, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:47:18,206] [INFO] [logging.py:96:log_dist] [Rank 0] step=8190, skipped=142, lr=[8.322619717497648e-09, 8.322619717497648e-09], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 8189|ppo_ep: 1|act_loss: -0.01428985595703125|cri_loss: -0.00637054443359375|unsuper_loss: 0.0
+average reward score: 5.51953125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.21%) |Training time=0.46s (21.24%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8190|ppo_ep: 1|act_loss: -0.00807952880859375|cri_loss: -0.0038166046142578125|unsuper_loss: 0.0
+average reward score: 5.9140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.00%) |Training time=0.47s (21.42%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8191|ppo_ep: 1|act_loss: -0.0005846023559570312|cri_loss: -0.00010395050048828125|unsuper_loss: 0.0
+average reward score: 4.96484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.32%) |Training time=0.48s (22.09%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8192|ppo_ep: 1|act_loss: -0.023468017578125|cri_loss: -0.01143646240234375|unsuper_loss: 0.0
+average reward score: 5.546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.45%) |Training time=0.49s (22.02%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8193|ppo_ep: 1|act_loss: 0.004619598388671875|cri_loss: 0.00238800048828125|unsuper_loss: 0.0
+average reward score: 6.140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.65%) |Training time=0.48s (20.93%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.10 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8194|ppo_ep: 1|act_loss: -0.001743316650390625|cri_loss: -0.0005774497985839844|unsuper_loss: 0.0
+average reward score: 5.59765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.18%) |Training time=0.45s (21.15%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8195|ppo_ep: 1|act_loss: -0.0200042724609375|cri_loss: -0.008514404296875|unsuper_loss: 0.0
+average reward score: 5.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.42%) |Training time=0.47s (20.11%) |Others=0.10 (4.47%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8196|ppo_ep: 1|act_loss: -0.0178985595703125|cri_loss: -0.0088043212890625|unsuper_loss: 0.0
+average reward score: 5.734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.29%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8197|ppo_ep: 1|act_loss: 0.02801513671875|cri_loss: 0.0142059326171875|unsuper_loss: 0.0
+average reward score: 6.1875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.45s (21.14%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8198|ppo_ep: 1|act_loss: -0.0113067626953125|cri_loss: -0.004970550537109375|unsuper_loss: 0.0
+average reward score: 5.703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.06%) |Training time=0.46s (21.28%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.57
+[2023-04-14 13:47:40,025] [INFO] [logging.py:96:log_dist] [Rank 0] step=8200, skipped=103, lr=[9.497721415816521e-09, 9.497721415816521e-09], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:47:40,044] [INFO] [timer.py:199:stop] epoch=0/micro_step=8200/global_step=8200, RunningAvgSamplesPerSec=105.63448505052008, CurrSamplesPerSec=103.44995325327169, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:47:40,136] [INFO] [logging.py:96:log_dist] [Rank 0] step=8200, skipped=142, lr=[7.556371112877325e-09, 7.556371112877325e-09], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 8199|ppo_ep: 1|act_loss: 0.011444091796875|cri_loss: 0.006195068359375|unsuper_loss: 0.0
+average reward score: 6.37109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.72%) |Training time=0.47s (21.76%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8200|ppo_ep: 1|act_loss: -0.0039825439453125|cri_loss: -0.0019474029541015625|unsuper_loss: 0.0
+average reward score: 5.55078125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.09%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8201|ppo_ep: 1|act_loss: 0.004802703857421875|cri_loss: 0.002658843994140625|unsuper_loss: 0.0
+average reward score: 5.48046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.35%) |Training time=0.45s (20.99%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8202|ppo_ep: 1|act_loss: -0.0181884765625|cri_loss: -0.00818634033203125|unsuper_loss: 0.0
+average reward score: 5.43359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.46%) |Training time=0.45s (20.86%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8203|ppo_ep: 1|act_loss: 0.010589599609375|cri_loss: 0.0053863525390625|unsuper_loss: 0.0
+average reward score: 5.34765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.35%) |Training time=0.45s (21.09%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8204|ppo_ep: 1|act_loss: 0.033782958984375|cri_loss: 0.0172882080078125|unsuper_loss: 0.0
+average reward score: 5.96484375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.17%) |Training time=0.45s (21.15%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8205|ppo_ep: 1|act_loss: 0.019134521484375|cri_loss: 0.01016998291015625|unsuper_loss: 0.0
+average reward score: 5.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.44s (20.74%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8206|ppo_ep: 1|act_loss: -0.020904541015625|cri_loss: -0.00949859619140625|unsuper_loss: 0.0
+average reward score: 5.4921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.96%) |Training time=0.46s (21.40%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8207|ppo_ep: 1|act_loss: -0.008209228515625|cri_loss: -0.00391387939453125|unsuper_loss: 0.0
+average reward score: 5.87890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.02%) |Training time=0.46s (21.37%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8208|ppo_ep: 1|act_loss: -0.0041656494140625|cri_loss: -0.001949310302734375|unsuper_loss: 0.0
+average reward score: 5.11328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.32%) |Training time=0.45s (21.01%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.57
+[2023-04-14 13:48:01,494] [INFO] [logging.py:96:log_dist] [Rank 0] step=8210, skipped=103, lr=[8.368430858746151e-09, 8.368430858746151e-09], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:48:01,512] [INFO] [timer.py:199:stop] epoch=0/micro_step=8210/global_step=8210, RunningAvgSamplesPerSec=105.63972264176873, CurrSamplesPerSec=108.21060710870746, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:48:01,604] [INFO] [logging.py:96:log_dist] [Rank 0] step=8210, skipped=142, lr=[6.827066535529947e-09, 6.827066535529947e-09], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 8209|ppo_ep: 1|act_loss: 0.01201629638671875|cri_loss: 0.006168365478515625|unsuper_loss: 0.0
+average reward score: 5.56640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.34%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8210|ppo_ep: 1|act_loss: -0.012237548828125|cri_loss: -0.00591278076171875|unsuper_loss: 0.0
+average reward score: 5.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.94%) |Training time=0.46s (21.41%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8211|ppo_ep: 1|act_loss: -0.002490997314453125|cri_loss: -0.0008959770202636719|unsuper_loss: 0.0
+average reward score: 5.78125
+-------------------------------------------------------------------------------------
+|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.81%) |Training time=0.46s (19.86%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8212|ppo_ep: 1|act_loss: -0.001079559326171875|cri_loss: -0.0004200935363769531|unsuper_loss: 0.0
+average reward score: 6.21875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.30%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8213|ppo_ep: 1|act_loss: -0.004608154296875|cri_loss: -0.002239227294921875|unsuper_loss: 0.0
+average reward score: 4.78515625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.38%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8214|ppo_ep: 1|act_loss: -0.0010585784912109375|cri_loss: -0.0004050731658935547|unsuper_loss: 0.0
+average reward score: 5.22265625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.98%) |Training time=0.46s (21.30%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8215|ppo_ep: 1|act_loss: 0.00254058837890625|cri_loss: 0.0013818740844726562|unsuper_loss: 0.0
+average reward score: 5.56640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.15%) |Training time=0.45s (21.18%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8216|ppo_ep: 1|act_loss: 0.033416748046875|cri_loss: 0.018310546875|unsuper_loss: 0.0
+average reward score: 5.42578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.05%) |Training time=0.46s (21.32%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8217|ppo_ep: 1|act_loss: 0.0007562637329101562|cri_loss: 0.0010824203491210938|unsuper_loss: 0.0
+average reward score: 5.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.22%) |Training time=0.45s (21.11%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8218|ppo_ep: 1|act_loss: -0.0105743408203125|cri_loss: -0.004993438720703125|unsuper_loss: 0.0
+average reward score: 5.1796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.33%) |Training time=0.46s (21.05%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.57
+[2023-04-14 13:48:23,164] [INFO] [logging.py:96:log_dist] [Rank 0] step=8220, skipped=103, lr=[7.3105344010205834e-09, 7.3105344010205834e-09], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:48:23,182] [INFO] [timer.py:199:stop] epoch=0/micro_step=8220/global_step=8220, RunningAvgSamplesPerSec=105.64292040215666, CurrSamplesPerSec=107.84100454045472, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:48:23,275] [INFO] [logging.py:96:log_dist] [Rank 0] step=8220, skipped=142, lr=[6.134716795508644e-09, 6.134716795508644e-09], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 8219|ppo_ep: 1|act_loss: 0.07666015625|cri_loss: 0.0426025390625|unsuper_loss: 0.0
+average reward score: 5.8671875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.46s (21.43%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8220|ppo_ep: 1|act_loss: -0.00444793701171875|cri_loss: -0.002170562744140625|unsuper_loss: 0.0
+average reward score: 5.12890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.00%) |Training time=0.46s (21.36%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8221|ppo_ep: 1|act_loss: 0.0042572021484375|cri_loss: 0.002227783203125|unsuper_loss: 0.0
+average reward score: 5.77734375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.44%) |Training time=0.45s (21.00%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8222|ppo_ep: 1|act_loss: 0.030242919921875|cri_loss: 0.0159759521484375|unsuper_loss: 0.0
+average reward score: 5.5
+-------------------------------------------------------------------------------------
+|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.21%) |Training time=0.46s (21.23%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8223|ppo_ep: 1|act_loss: -0.0179443359375|cri_loss: -0.0082550048828125|unsuper_loss: 0.0
+average reward score: 5.62109375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.15%) |Training time=0.46s (21.27%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8224|ppo_ep: 1|act_loss: -0.004680633544921875|cri_loss: -0.0015468597412109375|unsuper_loss: 0.0
+average reward score: 5.625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.26%) |Training time=0.45s (21.07%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8225|ppo_ep: 1|act_loss: 0.0092010498046875|cri_loss: 0.004795074462890625|unsuper_loss: 0.0
+average reward score: 5.84765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.11%) |Training time=0.46s (21.23%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8226|ppo_ep: 1|act_loss: -0.004886627197265625|cri_loss: -0.002384185791015625|unsuper_loss: 0.0
+average reward score: 5.0234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.97%) |Training time=0.46s (19.70%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.79 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8227|ppo_ep: 1|act_loss: -0.0252532958984375|cri_loss: -0.01244354248046875|unsuper_loss: 0.0
+average reward score: 4.96875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.07%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8228|ppo_ep: 1|act_loss: 0.0002658367156982422|cri_loss: 0.0002161264419555664|unsuper_loss: 0.0
+average reward score: 6.0234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.23%) |Training time=0.45s (21.12%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.57
+[2023-04-14 13:48:44,872] [INFO] [logging.py:96:log_dist] [Rank 0] step=8230, skipped=103, lr=[6.324047723218296e-09, 6.324047723218296e-09], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:48:44,890] [INFO] [timer.py:199:stop] epoch=0/micro_step=8230/global_step=8230, RunningAvgSamplesPerSec=105.64653553431586, CurrSamplesPerSec=106.69110328035784, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:48:44,974] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1
+[2023-04-14 13:48:44,975] [INFO] [logging.py:96:log_dist] [Rank 0] step=8230, skipped=143, lr=[5.543206908509291e-09, 5.543206908509291e-09], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 8229|ppo_ep: 1|act_loss: -0.0010890960693359375|cri_loss: -0.00036597251892089844|unsuper_loss: 0.0
+average reward score: 5.44921875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.33%) |Training time=0.46s (21.58%) |Others=0.09 (4.09%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.57
+[2023-04-14 13:48:47,137] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768
+epoch: 0|step: 8230|ppo_ep: 1|act_loss: 0.00010842084884643555|cri_loss: 0.00010132789611816406|unsuper_loss: 0.0
+average reward score: 4.859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.48s (22.10%) |Others=0.09 (4.15%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8231|ppo_ep: 1|act_loss: 0.01229095458984375|cri_loss: 0.006389617919921875|unsuper_loss: 0.0
+average reward score: 5.546875
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.76%) |Training time=0.47s (21.62%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8232|ppo_ep: 1|act_loss: -0.05596923828125|cri_loss: -0.0206756591796875|unsuper_loss: 0.0
+average reward score: 5.4140625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.80%) |Training time=0.46s (21.53%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8233|ppo_ep: 1|act_loss: 0.0100555419921875|cri_loss: 0.005420684814453125|unsuper_loss: 0.0
+average reward score: 5.0234375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.73%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8234|ppo_ep: 1|act_loss: 0.00278472900390625|cri_loss: 0.001552581787109375|unsuper_loss: 0.0
+average reward score: 5.42578125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.37%) |Training time=0.45s (20.96%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8235|ppo_ep: 1|act_loss: 0.005443572998046875|cri_loss: 0.00420379638671875|unsuper_loss: 0.0
+average reward score: 5.2890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.77%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8236|ppo_ep: 1|act_loss: 0.0144195556640625|cri_loss: 0.0084686279296875|unsuper_loss: 0.0
+average reward score: 5.51171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.46s (21.54%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8237|ppo_ep: 1|act_loss: 0.01497650146484375|cri_loss: 0.00762939453125|unsuper_loss: 0.0
+average reward score: 5.93359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.46s (21.54%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8238|ppo_ep: 1|act_loss: 0.02374267578125|cri_loss: 0.0127105712890625|unsuper_loss: 0.0
+average reward score: 6.1640625
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.71%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.57
+[2023-04-14 13:49:06,455] [INFO] [logging.py:96:log_dist] [Rank 0] step=8240, skipped=103, lr=[5.408985447451789e-09, 5.408985447451789e-09], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:49:06,473] [INFO] [timer.py:199:stop] epoch=0/micro_step=8240/global_step=8240, RunningAvgSamplesPerSec=105.64600782971256, CurrSamplesPerSec=105.71246698890796, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:49:06,566] [INFO] [logging.py:96:log_dist] [Rank 0] step=8240, skipped=144, lr=[4.981645860989148e-09, 4.981645860989148e-09], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 8239|ppo_ep: 1|act_loss: -0.0228118896484375|cri_loss: -0.01100921630859375|unsuper_loss: 0.0
+average reward score: 4.86328125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.47s (21.56%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8240|ppo_ep: 1|act_loss: 0.00240325927734375|cri_loss: 0.0013647079467773438|unsuper_loss: 0.0
+average reward score: 5.609375
+-------------------------------------------------------------------------------------
+|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.19%) |Training time=0.47s (21.06%) |Others=0.17 (7.75%)|CurSamplesPerSec=14.30 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8241|ppo_ep: 1|act_loss: -0.0002484321594238281|cri_loss: 6.747245788574219e-05|unsuper_loss: 0.0
+average reward score: 5.80859375
+-------------------------------------------------------------------------------------
+|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.67s (74.66%) |Training time=0.47s (20.86%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.33 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8242|ppo_ep: 1|act_loss: 0.01103973388671875|cri_loss: 0.006000518798828125|unsuper_loss: 0.0
+average reward score: 4.703125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.79%) |Training time=0.47s (21.57%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8243|ppo_ep: 1|act_loss: 0.030548095703125|cri_loss: 0.0157928466796875|unsuper_loss: 0.0
+average reward score: 6.09375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.79%) |Training time=0.47s (21.59%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8244|ppo_ep: 1|act_loss: -0.0150299072265625|cri_loss: -0.00738525390625|unsuper_loss: 0.0
+average reward score: 5.46875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.64%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8245|ppo_ep: 1|act_loss: -0.021026611328125|cri_loss: -0.01030731201171875|unsuper_loss: 0.0
+average reward score: 5.9375
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.84%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8246|ppo_ep: 1|act_loss: 0.00293731689453125|cri_loss: 0.0015716552734375|unsuper_loss: 0.0
+average reward score: 4.8984375
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.98%) |Training time=0.46s (21.38%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8247|ppo_ep: 1|act_loss: 0.0004968643188476562|cri_loss: 0.0003986358642578125|unsuper_loss: 0.0
+average reward score: 5.76171875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.84%) |Training time=0.47s (21.62%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8248|ppo_ep: 1|act_loss: 0.027130126953125|cri_loss: 0.01389312744140625|unsuper_loss: 0.0
+average reward score: 4.73828125
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.74%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.57
+[2023-04-14 13:49:28,193] [INFO] [logging.py:96:log_dist] [Rank 0] step=8250, skipped=103, lr=[4.5653611371511645e-09, 4.5653611371511645e-09], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:49:28,212] [INFO] [timer.py:199:stop] epoch=0/micro_step=8250/global_step=8250, RunningAvgSamplesPerSec=105.64539594557534, CurrSamplesPerSec=105.90983071658027, MemAllocated=9.61GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:49:28,304] [INFO] [logging.py:96:log_dist] [Rank 0] step=8250, skipped=144, lr=[4.3928225118547444e-09, 4.3928225118547444e-09], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 8249|ppo_ep: 1|act_loss: 0.00682830810546875|cri_loss: 0.0035343170166015625|unsuper_loss: 0.0
+average reward score: 5.765625
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.46s (21.56%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8250|ppo_ep: 1|act_loss: 0.022125244140625|cri_loss: 0.01197052001953125|unsuper_loss: 0.0
+average reward score: 4.421875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.55%) |Training time=0.47s (21.54%) |Others=0.11 (4.91%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8251|ppo_ep: 1|act_loss: 0.021697998046875|cri_loss: 0.01129150390625|unsuper_loss: 0.0
+average reward score: 5.25
+-------------------------------------------------------------------------------------
+|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.07%) |Training time=0.46s (21.32%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8252|ppo_ep: 1|act_loss: 0.01220703125|cri_loss: 0.00616455078125|unsuper_loss: 0.0
+average reward score: 5.37890625
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.86%) |Training time=0.47s (20.71%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.15 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8253|ppo_ep: 1|act_loss: -0.006011962890625|cri_loss: -0.00284576416015625|unsuper_loss: 0.0
+average reward score: 5.3046875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.16%) |Training time=0.46s (21.17%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8254|ppo_ep: 1|act_loss: 0.0171051025390625|cri_loss: 0.0088653564453125|unsuper_loss: 0.0
+average reward score: 5.54296875
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.99%) |Training time=0.46s (21.37%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8255|ppo_ep: 1|act_loss: -0.02935791015625|cri_loss: -0.01371002197265625|unsuper_loss: 0.0
+average reward score: 5.34375
+-------------------------------------------------------------------------------------
+|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.64s (72.87%) |Training time=0.51s (22.66%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.18 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8256|ppo_ep: 1|act_loss: -0.0007495880126953125|cri_loss: 0.0006227493286132812|unsuper_loss: 0.0
+average reward score: 5.8125
+-------------------------------------------------------------------------------------
+|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.05%) |Training time=0.46s (21.30%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8257|ppo_ep: 1|act_loss: 0.00316619873046875|cri_loss: 0.0016927719116210938|unsuper_loss: 0.0
+average reward score: 5.66796875
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.35%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.57
+epoch: 0|step: 8258|ppo_ep: 1|act_loss: 0.00679779052734375|cri_loss: 0.0038509368896484375|unsuper_loss: 0.0
+average reward score: 5.43359375
+-------------------------------------------------------------------------------------
+|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.46s (21.29%) |Others=0.11 (4.93%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.57
+[2023-04-14 13:49:49,728] [INFO] [logging.py:96:log_dist] [Rank 0] step=8260, skipped=103, lr=[3.793187296863779e-09, 3.793187296863779e-09], mom=[(0.9, 0.95), (0.9, 0.95)]
+[2023-04-14 13:49:49,747] [INFO] [timer.py:199:stop] epoch=0/micro_step=8260/global_step=8260, RunningAvgSamplesPerSec=105.65211622011834, CurrSamplesPerSec=163.9855291500147, MemAllocated=9.46GB, MaxMemAllocated=19.38GB
+[2023-04-14 13:49:50,315] [INFO] [logging.py:96:log_dist] [Rank 0] step=8260, skipped=144, lr=[3.840990081415141e-09, 3.840990081415141e-09], mom=[(0.9, 0.95), (0.9, 0.95)]
+epoch: 0|step: 8259|ppo_ep: 1|act_loss: 0.0166015625|cri_loss: 0.00836181640625|unsuper_loss: 0.0
+average reward score: 6.16015625
+-------------------------------------------------------------------------------------
+saving model ...
+saving model ...saving model ...saving model ...
+
+
+saving model ...saving model ...
+saving model ...saving model ...
+
+
+[2023-04-14 13:49:56,880] [INFO] [launch.py:460:main] Process 412204 exits successfully.
+[2023-04-14 13:49:57,881] [INFO] [launch.py:460:main] Process 412199 exits successfully.
+[2023-04-14 13:49:57,881] [INFO] [launch.py:460:main] Process 412200 exits successfully.
+[2023-04-14 13:49:57,881] [INFO] [launch.py:460:main] Process 412205 exits successfully.
+[2023-04-14 13:49:57,881] [INFO] [launch.py:460:main] Process 412201 exits successfully.
+[2023-04-14 13:49:58,883] [INFO] [launch.py:460:main] Process 412202 exits successfully.
+[2023-04-14 13:49:58,883] [INFO] [launch.py:460:main] Process 412203 exits successfully.
+[2023-04-14 13:50:01,886] [INFO] [launch.py:460:main] Process 412198 exits successfully.