diff --git "a/training.log" "b/training.log" new file mode 100644--- /dev/null +++ "b/training.log" @@ -0,0 +1,37700 @@ +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version! + warnings.warn( +[2023-04-14 08:32:38,890] [WARNING] [runner.py:190:fetch_hostfile] Unable to find hostfile, will proceed with training with local resources only. +[2023-04-14 08:32:39,797] [INFO] [runner.py:540:main] cmd = /home/minutiae/.conda/envs/py39/bin/python -u -m deepspeed.launcher.launch --world_info=eyJsb2NhbGhvc3QiOiBbMCwgMSwgMiwgMywgNCwgNSwgNiwgN119 --master_addr=127.0.0.1 --master_port=12346 --enable_each_rank_log=None main.py --data_path Dahoas/rm-static Dahoas/full-hh-rlhf Dahoas/synthetic-instruct-gptj-pairwise yitingxie/rlhf-reward-datasets openai/webgpt_comparisons stanfordnlp/SHP --data_split 2,4,4 --actor_model_name_or_path /lus/grand/projects/BNN-Scale/chatgpt/hf_runs/DeepSpeedExamples/applications/DeepSpeed-Chat/output/actor-models/1.3b --critic_model_name_or_path /lus/grand/projects/BNN-Scale/chatgpt/hf_runs/DeepSpeedExamples/applications/DeepSpeed-Chat/output/reward-models/350m --num_padding_at_beginning 1 --per_device_train_batch_size 4 --per_device_mini_train_batch_size 4 --generation_batch_numbers 1 --ppo_epochs 1 --max_answer_seq_len 256 --max_prompt_seq_len 256 --actor_learning_rate 9.65e-6 --critic_learning_rate 5e-6 --actor_weight_decay 0.1 --critic_weight_decay 0.1 --num_train_epochs 1 --lr_scheduler_type cosine --gradient_accumulation_steps 1 --num_warmup_steps 100 --deepspeed --seed 1234 --enable_hybrid_engine --actor_zero_stage 2 --critic_zero_stage 2 --output_dir /lus/grand/projects/BNN-Scale/chatgpt/hf_runs/DeepSpeedExamples/applications/DeepSpeed-Chat/output/step3-models/1.3b +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version! + warnings.warn( +[2023-04-14 08:32:50,431] [INFO] [launch.py:229:main] WORLD INFO DICT: {'localhost': [0, 1, 2, 3, 4, 5, 6, 7]} +[2023-04-14 08:32:50,884] [INFO] [launch.py:235:main] nnodes=1, num_local_procs=8, node_rank=0 +[2023-04-14 08:32:50,884] [INFO] [launch.py:246:main] global_rank_mapping=defaultdict(, {'localhost': [0, 1, 2, 3, 4, 5, 6, 7]}) +[2023-04-14 08:32:50,884] [INFO] [launch.py:247:main] dist_world_size=8 +[2023-04-14 08:32:50,884] [INFO] [launch.py:249:main] Setting CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version! + warnings.warn( +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version! + warnings.warn( +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version! + warnings.warn( +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version! + warnings.warn( +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version! + warnings.warn( +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version! + warnings.warn( +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version! + warnings.warn( +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version! + warnings.warn( +[2023-04-14 08:35:14,863] [INFO] [comm.py:586:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec) + 0%| | 0/2 [00:00 +[2023-04-14 08:44:13,193] [INFO] [logging.py:96:log_dist] [Rank 0] Creating torch.float16 ZeRO stage 2 optimizer +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +[2023-04-14 08:44:13,703] [INFO] [stage_1_and_2.py:133:__init__] Reduce bucket size 500,000,000 +[2023-04-14 08:44:13,704] [INFO] [stage_1_and_2.py:134:__init__] Allgather bucket size 500,000,000 +[2023-04-14 08:44:13,704] [INFO] [stage_1_and_2.py:135:__init__] CPU Offload: False +[2023-04-14 08:44:13,704] [INFO] [stage_1_and_2.py:136:__init__] Round robin gradient partitioning: False +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Emitting ninja build file /home/minutiae/.cache/torch_extensions/py39_cu113/utils/build.ninja... +Building extension module utils... +Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +ninja: no work to do. +Loading extension module utils... +Time to load utils op: 7.119100570678711 seconds +Loading extension module utils... +Time to load utils op: 6.615035772323608 seconds +Loading extension module utils... +Loading extension module utils... +Time to load utils op: 7.214683532714844 seconds +Loading extension module utils... +Loading extension module utils... +Loading extension module utils... +Loading extension module utils... +Time to load utils op: 7.2167649269104 seconds +Time to load utils op: 7.216015338897705 seconds +Time to load utils op: 7.218137264251709 seconds +Time to load utils op: 7.217472076416016 seconds +Time to load utils op: 7.218402147293091 seconds +Rank: 0 partition count [8, 8] and sizes[(164401920, False), (67840, False)] +Rank: 1 partition count [8, 8] and sizes[(164401920, False), (67840, False)] +Rank: 4 partition count [8, 8] and sizes[(164401920, False), (67840, False)] +Rank: 6 partition count [8, 8] and sizes[(164401920, False), (67840, False)] +Rank: 3 partition count [8, 8] and sizes[(164401920, False), (67840, False)] +Rank: 2 partition count [8, 8] and sizes[(164401920, False), (67840, False)] +Rank: 7 partition count [8, 8] and sizes[(164401920, False), (67840, False)] +Rank: 5 partition count [8, 8] and sizes[(164401920, False), (67840, False)] +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Time to load utils op: 0.0018334388732910156 seconds +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Time to load utils op: 0.0009965896606445312 seconds +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.001102447509765625 seconds +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0010309219360351562 seconds +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0010993480682373047 seconds +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Time to load utils op: 0.0011150836944580078 seconds +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0014064311981201172 seconds +[2023-04-14 08:44:34,254] [INFO] [utils.py:785:see_memory_usage] Before initializing optimizer states +[2023-04-14 08:44:34,255] [INFO] [utils.py:786:see_memory_usage] MA 3.06 GB Max_MA 3.06 GB CA 3.07 GB Max_CA 3 GB +[2023-04-14 08:44:34,255] [INFO] [utils.py:793:see_memory_usage] CPU Virtual Memory: used = 111.88 GB, percent = 11.1% +[2023-04-14 08:44:34,948] [INFO] [utils.py:785:see_memory_usage] After initializing optimizer states +[2023-04-14 08:44:34,949] [INFO] [utils.py:786:see_memory_usage] MA 4.29 GB Max_MA 4.91 GB CA 4.91 GB Max_CA 5 GB +[2023-04-14 08:44:34,949] [INFO] [utils.py:793:see_memory_usage] CPU Virtual Memory: used = 111.87 GB, percent = 11.1% +[2023-04-14 08:44:34,949] [INFO] [stage_1_and_2.py:489:__init__] optimizer state initialized +[2023-04-14 08:44:35,656] [INFO] [utils.py:785:see_memory_usage] After initializing ZeRO optimizer +[2023-04-14 08:44:35,657] [INFO] [utils.py:786:see_memory_usage] MA 4.29 GB Max_MA 4.29 GB CA 4.91 GB Max_CA 5 GB +[2023-04-14 08:44:35,657] [INFO] [utils.py:793:see_memory_usage] CPU Virtual Memory: used = 111.85 GB, percent = 11.1% +[2023-04-14 08:44:35,659] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Final Optimizer = FusedAdam +[2023-04-14 08:44:35,659] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed using client LR scheduler +[2023-04-14 08:44:35,659] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed LR Scheduler = +[2023-04-14 08:44:35,659] [INFO] [logging.py:96:log_dist] [Rank 0] step=0, skipped=0, lr=[0.0, 0.0], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 08:44:35,660] [INFO] [config.py:953:print] DeepSpeedEngine configuration: +[2023-04-14 08:44:35,660] [INFO] [config.py:957:print] activation_checkpointing_config { + "partition_activations": false, + "contiguous_memory_optimization": false, + "cpu_checkpointing": false, + "number_checkpoints": null, + "synchronize_checkpoint_boundary": false, + "profile": false +} +[2023-04-14 08:44:35,660] [INFO] [config.py:957:print] aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True} +[2023-04-14 08:44:35,660] [INFO] [config.py:957:print] amp_enabled .................. False +[2023-04-14 08:44:35,660] [INFO] [config.py:957:print] amp_params ................... False +[2023-04-14 08:44:35,660] [INFO] [config.py:957:print] autotuning_config ............ { + "enabled": false, + "start_step": null, + "end_step": null, + "metric_path": null, + "arg_mappings": null, + "metric": "throughput", + "model_info": null, + "results_dir": "autotuning_results", + "exps_dir": "autotuning_exps", + "overwrite": true, + "fast": true, + "start_profile_step": 3, + "end_profile_step": 5, + "tuner_type": "gridsearch", + "tuner_early_stopping": 5, + "tuner_num_trials": 50, + "model_info_path": null, + "mp_size": 1, + "max_train_batch_size": null, + "min_train_batch_size": 1, + "max_train_micro_batch_size_per_gpu": 1.024000e+03, + "min_train_micro_batch_size_per_gpu": 1, + "num_tuning_micro_batch_sizes": 3 +} +[2023-04-14 08:44:35,660] [INFO] [config.py:957:print] bfloat16_enabled ............. False +[2023-04-14 08:44:35,660] [INFO] [config.py:957:print] checkpoint_parallel_write_pipeline False +[2023-04-14 08:44:35,660] [INFO] [config.py:957:print] checkpoint_tag_validation_enabled True +[2023-04-14 08:44:35,660] [INFO] [config.py:957:print] checkpoint_tag_validation_fail False +[2023-04-14 08:44:35,660] [INFO] [config.py:957:print] comms_config ................. +[2023-04-14 08:44:35,660] [INFO] [config.py:957:print] communication_data_type ...... None +[2023-04-14 08:44:35,661] [INFO] [config.py:957:print] compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}} +[2023-04-14 08:44:35,661] [INFO] [config.py:957:print] curriculum_enabled_legacy .... False +[2023-04-14 08:44:35,661] [INFO] [config.py:957:print] curriculum_params_legacy ..... False +[2023-04-14 08:44:35,661] [INFO] [config.py:957:print] data_efficiency_config ....... {'enabled': False, 'seed': 1234, 'data_sampling': {'enabled': False, 'num_epochs': 1000, 'num_workers': 0, 'curriculum_learning': {'enabled': False}}, 'data_routing': {'enabled': False, 'random_ltd': {'enabled': False, 'layer_token_lr_schedule': {'enabled': False}}}} +[2023-04-14 08:44:35,661] [INFO] [config.py:957:print] data_efficiency_enabled ...... False +[2023-04-14 08:44:35,661] [INFO] [config.py:957:print] dataloader_drop_last ......... False +[2023-04-14 08:44:35,661] [INFO] [config.py:957:print] disable_allgather ............ False +[2023-04-14 08:44:35,661] [INFO] [config.py:957:print] dump_state ................... False +[2023-04-14 08:44:35,661] [INFO] [config.py:957:print] dynamic_loss_scale_args ...... {'init_scale': 65536, 'scale_window': 100, 'delayed_shift': 2, 'min_scale': 1} +[2023-04-14 08:44:35,661] [INFO] [config.py:957:print] eigenvalue_enabled ........... False +[2023-04-14 08:44:35,661] [INFO] [config.py:957:print] eigenvalue_gas_boundary_resolution 1 +[2023-04-14 08:44:35,661] [INFO] [config.py:957:print] eigenvalue_layer_name ........ bert.encoder.layer +[2023-04-14 08:44:35,661] [INFO] [config.py:957:print] eigenvalue_layer_num ......... 0 +[2023-04-14 08:44:35,661] [INFO] [config.py:957:print] eigenvalue_max_iter .......... 100 +[2023-04-14 08:44:35,661] [INFO] [config.py:957:print] eigenvalue_stability ......... 1e-06 +[2023-04-14 08:44:35,661] [INFO] [config.py:957:print] eigenvalue_tol ............... 0.01 +[2023-04-14 08:44:35,661] [INFO] [config.py:957:print] eigenvalue_verbose ........... False +[2023-04-14 08:44:35,661] [INFO] [config.py:957:print] elasticity_enabled ........... False +[2023-04-14 08:44:35,661] [INFO] [config.py:957:print] flops_profiler_config ........ { + "enabled": false, + "profile_step": 1, + "module_depth": -1, + "top_modules": 1, + "detailed": true, + "output_file": null +} +[2023-04-14 08:44:35,661] [INFO] [config.py:957:print] fp16_auto_cast ............... False +[2023-04-14 08:44:35,661] [INFO] [config.py:957:print] fp16_enabled ................. True +[2023-04-14 08:44:35,661] [INFO] [config.py:957:print] fp16_master_weights_and_gradients False +[2023-04-14 08:44:35,661] [INFO] [config.py:957:print] global_rank .................. 0 +[2023-04-14 08:44:35,661] [INFO] [config.py:957:print] grad_accum_dtype ............. None +[2023-04-14 08:44:35,661] [INFO] [config.py:957:print] gradient_accumulation_steps .. 1 +[2023-04-14 08:44:35,661] [INFO] [config.py:957:print] gradient_clipping ............ 1.0 +[2023-04-14 08:44:35,661] [INFO] [config.py:957:print] gradient_predivide_factor .... 1.0 +[2023-04-14 08:44:35,661] [INFO] [config.py:957:print] hybrid_engine ................ enabled=True max_out_tokens=512 inference_tp_size=1 release_inference_cache=False pin_parameters=True tp_gather_partition_size=8 +[2023-04-14 08:44:35,661] [INFO] [config.py:957:print] initial_dynamic_scale ........ 65536 +[2023-04-14 08:44:35,661] [INFO] [config.py:957:print] load_universal_checkpoint .... False +[2023-04-14 08:44:35,661] [INFO] [config.py:957:print] loss_scale ................... 0 +[2023-04-14 08:44:35,661] [INFO] [config.py:957:print] memory_breakdown ............. False +[2023-04-14 08:44:35,662] [INFO] [config.py:957:print] monitor_config ............... tensorboard=TensorBoardConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') wandb=WandbConfig(enabled=False, group=None, team=None, project='deepspeed') csv_monitor=CSVConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') enabled=False +[2023-04-14 08:44:35,662] [INFO] [config.py:957:print] nebula_config ................ { + "enabled": false, + "persistent_storage_path": null, + "persistent_time_interval": 100, + "num_of_version_in_retention": 2, + "enable_nebula_load": true, + "load_path": null +} +[2023-04-14 08:44:35,662] [INFO] [config.py:957:print] optimizer_legacy_fusion ...... False +[2023-04-14 08:44:35,662] [INFO] [config.py:957:print] optimizer_name ............... None +[2023-04-14 08:44:35,662] [INFO] [config.py:957:print] optimizer_params ............. None +[2023-04-14 08:44:35,662] [INFO] [config.py:957:print] pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0} +[2023-04-14 08:44:35,662] [INFO] [config.py:957:print] pld_enabled .................. False +[2023-04-14 08:44:35,662] [INFO] [config.py:957:print] pld_params ................... False +[2023-04-14 08:44:35,662] [INFO] [config.py:957:print] prescale_gradients ........... False +[2023-04-14 08:44:35,662] [INFO] [config.py:957:print] scheduler_name ............... None +[2023-04-14 08:44:35,662] [INFO] [config.py:957:print] scheduler_params ............. None +[2023-04-14 08:44:35,662] [INFO] [config.py:957:print] sparse_attention ............. None +[2023-04-14 08:44:35,662] [INFO] [config.py:957:print] sparse_gradients_enabled ..... False +[2023-04-14 08:44:35,662] [INFO] [config.py:957:print] steps_per_print .............. 10 +[2023-04-14 08:44:35,662] [INFO] [config.py:957:print] train_batch_size ............. 32 +[2023-04-14 08:44:35,662] [INFO] [config.py:957:print] train_micro_batch_size_per_gpu 4 +[2023-04-14 08:44:35,662] [INFO] [config.py:957:print] use_node_local_storage ....... False +[2023-04-14 08:44:35,662] [INFO] [config.py:957:print] wall_clock_breakdown ......... False +[2023-04-14 08:44:35,662] [INFO] [config.py:957:print] world_size ................... 8 +[2023-04-14 08:44:35,662] [INFO] [config.py:957:print] zero_allow_untested_optimizer False +[2023-04-14 08:44:35,662] [INFO] [config.py:957:print] zero_config .................. stage=2 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=500,000,000 allgather_partitions=True allgather_bucket_size=500,000,000 overlap_comm=False load_from_fp32_weights=True elastic_checkpoint=False offload_param=DeepSpeedZeroOffloadParamConfig(device='none', nvme_path=None, buffer_count=5, buffer_size=100,000,000, max_in_cpu=1,000,000,000, pin_memory=False) offload_optimizer=DeepSpeedZeroOffloadOptimizerConfig(device='none', nvme_path=None, buffer_count=4, pin_memory=False, pipeline=False, pipeline_read=False, pipeline_write=False, fast_init=False) sub_group_size=1,000,000,000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=30000000 param_persistence_threshold=10000 model_persistence_threshold=sys.maxsize max_live_parameters=30000000 max_reuse_distance=1,000,000,000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False memory_efficient_linear=False +[2023-04-14 08:44:35,662] [INFO] [config.py:957:print] zero_enabled ................. True +[2023-04-14 08:44:35,662] [INFO] [config.py:957:print] zero_force_ds_cpu_optimizer .. True +[2023-04-14 08:44:35,662] [INFO] [config.py:957:print] zero_optimization_stage ...... 2 +[2023-04-14 08:44:35,662] [INFO] [config.py:943:print_user_config] json = { + "train_batch_size": 32, + "train_micro_batch_size_per_gpu": 4, + "steps_per_print": 10, + "zero_optimization": { + "stage": 2, + "offload_param": { + "device": "none" + }, + "offload_optimizer": { + "device": "none" + }, + "stage3_param_persistence_threshold": 1.000000e+04, + "stage3_max_live_parameters": 3.000000e+07, + "stage3_prefetch_bucket_size": 3.000000e+07, + "memory_efficient_linear": false + }, + "fp16": { + "enabled": true, + "loss_scale_window": 100 + }, + "gradient_clipping": 1.0, + "prescale_gradients": false, + "wall_clock_breakdown": false, + "hybrid_engine": { + "enabled": true, + "inference_tp_size": 1, + "release_inference_cache": false, + "pin_parameters": true, + "tp_gather_partition_size": 8 + } +} +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0014083385467529297 seconds +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combinationUsing /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... + +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Detected CUDA files, patching ldflags +Emitting ninja build file /home/minutiae/.cache/torch_extensions/py39_cu113/transformer_inference/build.ninja... +Building extension module transformer_inference... +Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +[1/9] /usr/local/cuda-11.4/bin/nvcc -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/minutiae/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/dequantize.cu -o dequantize.cuda.o +[2/9] /usr/local/cuda-11.4/bin/nvcc -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/minutiae/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/gelu.cu -o gelu.cuda.o +[3/9] /usr/local/cuda-11.4/bin/nvcc -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/minutiae/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/relu.cu -o relu.cuda.o +[4/9] /usr/local/cuda-11.4/bin/nvcc -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/minutiae/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/apply_rotary_pos_emb.cu -o apply_rotary_pos_emb.cuda.o +[5/9] /usr/local/cuda-11.4/bin/nvcc -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/minutiae/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu -o transform.cuda.o +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu(57): warning: variable "lane" was declared but never referenced + +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu(94): warning: variable "half_dim" was declared but never referenced + +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu(111): warning: variable "vals_half" was declared but never referenced + +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu(112): warning: variable "output_half" was declared but never referenced + +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu(129): warning: variable "lane" was declared but never referenced + +[6/9] /usr/local/cuda-11.4/bin/nvcc -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/minutiae/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/softmax.cu -o softmax.cuda.o +[7/9] /usr/local/cuda-11.4/bin/nvcc -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/minutiae/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu -o layer_norm.cuda.o +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced + detected during: + instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=1, maxThreads=256]" +(166): here + instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" +(192): here + +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced + detected during: + instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=1, maxThreads=256]" +(166): here + instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" +(192): here + +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced + detected during: + instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=2, maxThreads=256]" +(168): here + instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" +(192): here + +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced + detected during: + instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=2, maxThreads=256]" +(168): here + instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" +(192): here + +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced + detected during: + instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=4, maxThreads=256]" +(170): here + instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" +(192): here + +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced + detected during: + instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=4, maxThreads=256]" +(170): here + instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" +(192): here + +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced + detected during: + instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=8, maxThreads=256]" +(172): here + instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" +(192): here + +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced + detected during: + instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=8, maxThreads=256]" +(172): here + instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" +(192): here + +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced + detected during: + instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=16, maxThreads=256]" +(174): here + instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" +(192): here + +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced + detected during: + instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=16, maxThreads=256]" +(174): here + instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" +(192): here + +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced + detected during: + instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=2, threadsPerGroup=256, maxThreads=256]" +(179): here + instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" +(192): here + +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced + detected during: + instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=2, threadsPerGroup=256, maxThreads=256]" +(179): here + instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" +(192): here + +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced + detected during: + instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=4, threadsPerGroup=256, maxThreads=256]" +(182): here + instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" +(192): here + +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced + detected during: + instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=4, threadsPerGroup=256, maxThreads=256]" +(182): here + instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" +(192): here + +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced + detected during: + instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=6, threadsPerGroup=256, maxThreads=256]" +(185): here + instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" +(192): here + +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced + detected during: + instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=6, threadsPerGroup=256, maxThreads=256]" +(185): here + instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" +(192): here + +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced + detected during: + instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=8, threadsPerGroup=256, maxThreads=256]" +(188): here + instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" +(192): here + +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced + detected during: + instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=8, threadsPerGroup=256, maxThreads=256]" +(188): here + instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" +(192): here + +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced + detected during: + instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=1, maxThreads=256]" +(166): here + instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" +(200): here + +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced + detected during: + instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=1, maxThreads=256]" +(166): here + instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" +(200): here + +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced + detected during: + instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=2, maxThreads=256]" +(168): here + instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" +(200): here + +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced + detected during: + instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=2, maxThreads=256]" +(168): here + instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" +(200): here + +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced + detected during: + instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=4, maxThreads=256]" +(170): here + instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" +(200): here + +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced + detected during: + instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=4, maxThreads=256]" +(170): here + instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" +(200): here + +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced + detected during: + instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=8, maxThreads=256]" +(172): here + instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" +(200): here + +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced + detected during: + instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=8, maxThreads=256]" +(172): here + instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" +(200): here + +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced + detected during: + instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=16, maxThreads=256]" +(174): here + instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" +(200): here + +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced + detected during: + instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=16, maxThreads=256]" +(174): here + instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" +(200): here + +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced + detected during: + instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=4, threadsPerGroup=256, maxThreads=256]" +(179): here + instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" +(200): here + +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced + detected during: + instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=4, threadsPerGroup=256, maxThreads=256]" +(179): here + instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" +(200): here + +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced + detected during: + instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=8, threadsPerGroup=256, maxThreads=256]" +(182): here + instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" +(200): here + +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced + detected during: + instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=8, threadsPerGroup=256, maxThreads=256]" +(182): here + instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" +(200): here + +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced + detected during: + instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=12, threadsPerGroup=256, maxThreads=256]" +(185): here + instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" +(200): here + +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced + detected during: + instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=12, threadsPerGroup=256, maxThreads=256]" +(185): here + instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" +(200): here + +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced + detected during: + instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=16, threadsPerGroup=256, maxThreads=256]" +(188): here + instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" +(200): here + +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced + detected during: + instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=16, threadsPerGroup=256, maxThreads=256]" +(188): here + instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" +(200): here + +[8/9] /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicxx -MMD -MF pt_binding.o.d -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/minutiae/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++14 -O3 -std=c++14 -g -Wno-reorder -c /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp -o pt_binding.o +In file included from /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:10: +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes/inference_context.h: In member function ‘void InferenceContext::GenWorkSpace(const unsigned int&, const unsigned int&, const size_t&, const size_t&, const size_t&, const unsigned int&, const bool&, const size_t&, const unsigned int&, unsigned int, unsigned int)’: +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes/inference_context.h:139:52: warning: format ‘%d’ expects argument of type ‘int’, but argument 2 has type ‘size_t’ {aka ‘long unsigned int’} [-Wformat=] + 139 | "Allocatable workspace available (%d tokens) is less than minimum requested " + | ~^ + | | + | int + | %ld + 140 | "workspace (%d tokens)\n", + 141 | _max_seq_len, + | ~~~~~~~~~~~~ + | | + | size_t {aka long unsigned int} +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp: In instantiation of ‘std::vector ds_softmax_context(at::Tensor&, at::Tensor&, int, bool, bool, int, float, bool, bool, int, bool, unsigned int, unsigned int, at::Tensor&) [with T = float]’: +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:1750:99: required from here +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:536:50: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] + 536 | {hidden_dim * InferenceContext::Instance().GetMaxTokenLenght(), + | ~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:536:50: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:537:41: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] + 537 | k * InferenceContext::Instance().GetMaxTokenLenght(), + | ~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:537:41: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:545:38: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] + 545 | {hidden_dim * InferenceContext::Instance().GetMaxTokenLenght(), + | ~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:545:38: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:546:29: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] + 546 | k * InferenceContext::Instance().GetMaxTokenLenght(), + | ~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:546:29: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp: In instantiation of ‘std::vector ds_softmax_context(at::Tensor&, at::Tensor&, int, bool, bool, int, float, bool, bool, int, bool, unsigned int, unsigned int, at::Tensor&) [with T = __half]’: +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:1753:49: required from here +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:536:50: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] + 536 | {hidden_dim * InferenceContext::Instance().GetMaxTokenLenght(), + | ~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:536:50: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:537:41: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] + 537 | k * InferenceContext::Instance().GetMaxTokenLenght(), + | ~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:537:41: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:545:38: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] + 545 | {hidden_dim * InferenceContext::Instance().GetMaxTokenLenght(), + | ~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:545:38: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:546:29: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] + 546 | k * InferenceContext::Instance().GetMaxTokenLenght(), + | ~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:546:29: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] +[9/9] /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicxx pt_binding.o gelu.cuda.o relu.cuda.o layer_norm.cuda.o softmax.cuda.o dequantize.cuda.o apply_rotary_pos_emb.cuda.o transform.cuda.o -shared -lcurand -L/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/lib -lc10 -lc10_cuda -ltorch_cpu -ltorch_cuda_cu -ltorch_cuda_cpp -ltorch -ltorch_python -L/usr/local/cuda-11.4/lib64 -lcudart -o transformer_inference.so +Loading extension module transformer_inference... +Loading extension module transformer_inference... +Time to load transformer_inference op: 33.57252836227417 seconds +Time to load transformer_inference op: 33.4696843624115 seconds +Loading extension module transformer_inference... +Time to load transformer_inference op: 33.380457639694214 seconds +Loading extension module transformer_inference... +Loading extension module transformer_inference... +Time to load transformer_inference op: 33.478936433792114 seconds +Time to load transformer_inference op: 33.23845458030701 seconds +Loading extension module transformer_inference... +Time to load transformer_inference op: 32.77969670295715 seconds +[2023-04-14 08:45:10,904] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed-Inference config: {'layer_id': 0, 'hidden_size': 2048, 'intermediate_size': 8192, 'heads': 32, 'num_hidden_layers': -1, 'fp16': True, 'pre_layer_norm': True, 'local_rank': -1, 'stochastic_mode': False, 'epsilon': 1e-05, 'mp_size': 1, 'q_int8': False, 'scale_attention': True, 'triangular_masking': True, 'local_attention': False, 'window_size': 1, 'rotary_dim': -1, 'rotate_half': False, 'rotate_every_two': True, 'return_tuple': True, 'mlp_after_attn': True, 'mlp_act_func_type': , 'specialized_mode': False, 'training_mp_size': 1, 'bigscience_bloom': False, 'max_out_tokens': 512, 'min_out_tokens': 512, 'scale_attn_by_inverse_layer_idx': False, 'enable_qkv_quantization': False, 'use_mup': False, 'return_single_tuple': False, 'set_empty_params': True, 'transposed_mode': True} +Loading extension module transformer_inference... +Time to load transformer_inference op: 33.499565839767456 seconds +Loading extension module transformer_inference... +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Time to load transformer_inference op: 33.51667404174805 seconds +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module transformer_inference, skipping build step... +Loading extension module transformer_inference... +Time to load transformer_inference op: 0.17304539680480957 seconds +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module transformer_inference, skipping build step... +Loading extension module transformer_inference... +Time to load transformer_inference op: 0.2198350429534912 seconds +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module transformer_inference, skipping build step... +Loading extension module transformer_inference... +Time to load transformer_inference op: 0.23125028610229492 seconds +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module transformer_inference, skipping build step... +Loading extension module transformer_inference... +Time to load transformer_inference op: 0.23965048789978027 seconds +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module transformer_inference, skipping build step... +Loading extension module transformer_inference... +Time to load transformer_inference op: 0.24375414848327637 seconds +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module transformer_inference, skipping build step... +Loading extension module transformer_inference... +Time to load transformer_inference op: 0.25126194953918457 seconds +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module transformer_inference, skipping build step... +Loading extension module transformer_inference... +Time to load transformer_inference op: 0.31780099868774414 seconds +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module transformer_inference, skipping build step... +Loading extension module transformer_inference... +Time to load transformer_inference op: 0.1689906120300293 seconds +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +No modifications detected for re-loaded extension module transformer_inference, skipping build step... +Loading extension module transformer_inference... +Time to load transformer_inference op: 0.33678388595581055 seconds +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module transformer_inference, skipping build step... +Loading extension module transformer_inference... +Time to load transformer_inference op: 0.2203667163848877 seconds +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module transformer_inference, skipping build step... +Loading extension module transformer_inference... +Time to load transformer_inference op: 0.2368457317352295 seconds +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module transformer_inference, skipping build step... +Loading extension module transformer_inference... +Time to load transformer_inference op: 0.2534804344177246 seconds +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module transformer_inference, skipping build step... +Loading extension module transformer_inference... +Time to load transformer_inference op: 0.2447066307067871 seconds +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module transformer_inference, skipping build step... +Loading extension module transformer_inference... +Time to load transformer_inference op: 0.27658915519714355 seconds +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module transformer_inference, skipping build step... +Loading extension module transformer_inference... +Time to load transformer_inference op: 0.3282308578491211 seconds +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module transformer_inference, skipping build step... +Loading extension module transformer_inference... +Time to load transformer_inference op: 0.8675589561462402 seconds +*****************[end] Initialized Actor Model [end] (duration: 119.75s)****************** +*************************[start] Initializing Ref Model [start] ************************** +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... + +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 1.891409158706665 seconds +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.8250091075897217 seconds +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0019402503967285156 seconds +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0012993812561035156 seconds +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0013997554779052734 seconds +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0013012886047363281 seconds +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0018343925476074219 seconds +[2023-04-14 08:45:57,529] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed info: version=0.9.0, git-hash=unknown, git-branch=unknown +[2023-04-14 08:46:12,698] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False +[2023-04-14 08:46:13,190] [INFO] [config.py:953:print] DeepSpeedEngine configuration: +[2023-04-14 08:46:13,191] [INFO] [config.py:957:print] activation_checkpointing_config { + "partition_activations": false, + "contiguous_memory_optimization": false, + "cpu_checkpointing": false, + "number_checkpoints": null, + "synchronize_checkpoint_boundary": false, + "profile": false +} +[2023-04-14 08:46:13,191] [INFO] [config.py:957:print] aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True} +[2023-04-14 08:46:13,191] [INFO] [config.py:957:print] amp_enabled .................. False +[2023-04-14 08:46:13,191] [INFO] [config.py:957:print] amp_params ................... False +[2023-04-14 08:46:13,191] [INFO] [config.py:957:print] autotuning_config ............ { + "enabled": false, + "start_step": null, + "end_step": null, + "metric_path": null, + "arg_mappings": null, + "metric": "throughput", + "model_info": null, + "results_dir": "autotuning_results", + "exps_dir": "autotuning_exps", + "overwrite": true, + "fast": true, + "start_profile_step": 3, + "end_profile_step": 5, + "tuner_type": "gridsearch", + "tuner_early_stopping": 5, + "tuner_num_trials": 50, + "model_info_path": null, + "mp_size": 1, + "max_train_batch_size": null, + "min_train_batch_size": 1, + "max_train_micro_batch_size_per_gpu": 1.024000e+03, + "min_train_micro_batch_size_per_gpu": 1, + "num_tuning_micro_batch_sizes": 3 +} +[2023-04-14 08:46:13,191] [INFO] [config.py:957:print] bfloat16_enabled ............. False +[2023-04-14 08:46:13,191] [INFO] [config.py:957:print] checkpoint_parallel_write_pipeline False +[2023-04-14 08:46:13,191] [INFO] [config.py:957:print] checkpoint_tag_validation_enabled True +[2023-04-14 08:46:13,191] [INFO] [config.py:957:print] checkpoint_tag_validation_fail False +[2023-04-14 08:46:13,191] [INFO] [config.py:957:print] comms_config ................. +[2023-04-14 08:46:13,191] [INFO] [config.py:957:print] communication_data_type ...... None +[2023-04-14 08:46:13,191] [INFO] [config.py:957:print] compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}} +[2023-04-14 08:46:13,191] [INFO] [config.py:957:print] curriculum_enabled_legacy .... False +[2023-04-14 08:46:13,191] [INFO] [config.py:957:print] curriculum_params_legacy ..... False +[2023-04-14 08:46:13,191] [INFO] [config.py:957:print] data_efficiency_config ....... {'enabled': False, 'seed': 1234, 'data_sampling': {'enabled': False, 'num_epochs': 1000, 'num_workers': 0, 'curriculum_learning': {'enabled': False}}, 'data_routing': {'enabled': False, 'random_ltd': {'enabled': False, 'layer_token_lr_schedule': {'enabled': False}}}} +[2023-04-14 08:46:13,191] [INFO] [config.py:957:print] data_efficiency_enabled ...... False +[2023-04-14 08:46:13,191] [INFO] [config.py:957:print] dataloader_drop_last ......... False +[2023-04-14 08:46:13,191] [INFO] [config.py:957:print] disable_allgather ............ False +[2023-04-14 08:46:13,191] [INFO] [config.py:957:print] dump_state ................... False +[2023-04-14 08:46:13,191] [INFO] [config.py:957:print] dynamic_loss_scale_args ...... None +[2023-04-14 08:46:13,191] [INFO] [config.py:957:print] eigenvalue_enabled ........... False +[2023-04-14 08:46:13,191] [INFO] [config.py:957:print] eigenvalue_gas_boundary_resolution 1 +[2023-04-14 08:46:13,191] [INFO] [config.py:957:print] eigenvalue_layer_name ........ bert.encoder.layer +[2023-04-14 08:46:13,191] [INFO] [config.py:957:print] eigenvalue_layer_num ......... 0 +[2023-04-14 08:46:13,191] [INFO] [config.py:957:print] eigenvalue_max_iter .......... 100 +[2023-04-14 08:46:13,191] [INFO] [config.py:957:print] eigenvalue_stability ......... 1e-06 +[2023-04-14 08:46:13,191] [INFO] [config.py:957:print] eigenvalue_tol ............... 0.01 +[2023-04-14 08:46:13,191] [INFO] [config.py:957:print] eigenvalue_verbose ........... False +[2023-04-14 08:46:13,191] [INFO] [config.py:957:print] elasticity_enabled ........... False +[2023-04-14 08:46:13,191] [INFO] [config.py:957:print] flops_profiler_config ........ { + "enabled": false, + "profile_step": 1, + "module_depth": -1, + "top_modules": 1, + "detailed": true, + "output_file": null +} +[2023-04-14 08:46:13,191] [INFO] [config.py:957:print] fp16_auto_cast ............... False +[2023-04-14 08:46:13,191] [INFO] [config.py:957:print] fp16_enabled ................. True +[2023-04-14 08:46:13,191] [INFO] [config.py:957:print] fp16_master_weights_and_gradients False +[2023-04-14 08:46:13,191] [INFO] [config.py:957:print] global_rank .................. 0 +[2023-04-14 08:46:13,192] [INFO] [config.py:957:print] grad_accum_dtype ............. None +[2023-04-14 08:46:13,192] [INFO] [config.py:957:print] gradient_accumulation_steps .. 1 +[2023-04-14 08:46:13,192] [INFO] [config.py:957:print] gradient_clipping ............ 1.0 +[2023-04-14 08:46:13,192] [INFO] [config.py:957:print] gradient_predivide_factor .... 1.0 +[2023-04-14 08:46:13,192] [INFO] [config.py:957:print] hybrid_engine ................ enabled=False max_out_tokens=512 inference_tp_size=1 release_inference_cache=False pin_parameters=True tp_gather_partition_size=8 +[2023-04-14 08:46:13,192] [INFO] [config.py:957:print] initial_dynamic_scale ........ 65536 +[2023-04-14 08:46:13,192] [INFO] [config.py:957:print] load_universal_checkpoint .... False +[2023-04-14 08:46:13,192] [INFO] [config.py:957:print] loss_scale ................... 0 +[2023-04-14 08:46:13,192] [INFO] [config.py:957:print] memory_breakdown ............. False +[2023-04-14 08:46:13,192] [INFO] [config.py:957:print] monitor_config ............... tensorboard=TensorBoardConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') wandb=WandbConfig(enabled=False, group=None, team=None, project='deepspeed') csv_monitor=CSVConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') enabled=False +[2023-04-14 08:46:13,192] [INFO] [config.py:957:print] nebula_config ................ { + "enabled": false, + "persistent_storage_path": null, + "persistent_time_interval": 100, + "num_of_version_in_retention": 2, + "enable_nebula_load": true, + "load_path": null +} +[2023-04-14 08:46:13,192] [INFO] [config.py:957:print] optimizer_legacy_fusion ...... False +[2023-04-14 08:46:13,192] [INFO] [config.py:957:print] optimizer_name ............... None +[2023-04-14 08:46:13,192] [INFO] [config.py:957:print] optimizer_params ............. None +[2023-04-14 08:46:13,192] [INFO] [config.py:957:print] pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0} +[2023-04-14 08:46:13,192] [INFO] [config.py:957:print] pld_enabled .................. False +[2023-04-14 08:46:13,192] [INFO] [config.py:957:print] pld_params ................... False +[2023-04-14 08:46:13,192] [INFO] [config.py:957:print] prescale_gradients ........... False +[2023-04-14 08:46:13,192] [INFO] [config.py:957:print] scheduler_name ............... None +[2023-04-14 08:46:13,192] [INFO] [config.py:957:print] scheduler_params ............. None +[2023-04-14 08:46:13,192] [INFO] [config.py:957:print] sparse_attention ............. None +[2023-04-14 08:46:13,192] [INFO] [config.py:957:print] sparse_gradients_enabled ..... False +[2023-04-14 08:46:13,192] [INFO] [config.py:957:print] steps_per_print .............. 10 +[2023-04-14 08:46:13,192] [INFO] [config.py:957:print] train_batch_size ............. 32 +[2023-04-14 08:46:13,192] [INFO] [config.py:957:print] train_micro_batch_size_per_gpu 4 +[2023-04-14 08:46:13,192] [INFO] [config.py:957:print] use_node_local_storage ....... False +[2023-04-14 08:46:13,192] [INFO] [config.py:957:print] wall_clock_breakdown ......... False +[2023-04-14 08:46:13,192] [INFO] [config.py:957:print] world_size ................... 8 +[2023-04-14 08:46:13,192] [INFO] [config.py:957:print] zero_allow_untested_optimizer False +[2023-04-14 08:46:13,192] [INFO] [config.py:957:print] zero_config .................. stage=0 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=500,000,000 allgather_partitions=True allgather_bucket_size=500,000,000 overlap_comm=False load_from_fp32_weights=True elastic_checkpoint=False offload_param=DeepSpeedZeroOffloadParamConfig(device='none', nvme_path=None, buffer_count=5, buffer_size=100,000,000, max_in_cpu=1,000,000,000, pin_memory=False) offload_optimizer=None sub_group_size=1,000,000,000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=50,000,000 param_persistence_threshold=10000 model_persistence_threshold=sys.maxsize max_live_parameters=1,000,000,000 max_reuse_distance=1,000,000,000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False memory_efficient_linear=False +[2023-04-14 08:46:13,192] [INFO] [config.py:957:print] zero_enabled ................. False +[2023-04-14 08:46:13,192] [INFO] [config.py:957:print] zero_force_ds_cpu_optimizer .. True +[2023-04-14 08:46:13,192] [INFO] [config.py:957:print] zero_optimization_stage ...... 0 +[2023-04-14 08:46:13,192] [INFO] [config.py:943:print_user_config] json = { + "train_batch_size": 32, + "train_micro_batch_size_per_gpu": 4, + "steps_per_print": 10, + "zero_optimization": { + "stage": 0, + "stage3_param_persistence_threshold": 1.000000e+04, + "offload_param": { + "device": "none" + }, + "memory_efficient_linear": false + }, + "fp16": { + "enabled": true + }, + "gradient_clipping": 1.0, + "prescale_gradients": false, + "wall_clock_breakdown": false +} +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0015451908111572266 seconds +*******************[end] Initialized Ref Model [end] (duration: 59.70s)******************* +************************[start] Initializing Critic Model [start] ************************ +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module fused_adam, skipping build step... +Loading extension module fused_adam... +Time to load fused_adam op: 0.0030541419982910156 seconds +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module fused_adam, skipping build step... +Loading extension module fused_adam... +Time to load fused_adam op: 0.0025348663330078125 seconds +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module fused_adam, skipping build step... +Loading extension module fused_adam... +Time to load fused_adam op: 0.0019736289978027344 seconds +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module fused_adam, skipping build step... +Loading extension module fused_adam... +Time to load fused_adam op: 0.0029854774475097656 seconds +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module fused_adam, skipping build step... +Loading extension module fused_adam... +Time to load fused_adam op: 0.0022940635681152344 seconds +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module fused_adam, skipping build step... +Loading extension module fused_adam... +Time to load fused_adam op: 0.0029518604278564453 seconds +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module fused_adam, skipping build step... +Loading extension module fused_adam... +Time to load fused_adam op: 0.002866029739379883 seconds +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0012390613555908203 seconds +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.003458738327026367 seconds +huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... +To disable this warning, you can either: + - Avoid using `tokenizers` before the fork if possible + - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module fused_adam, skipping build step... +Loading extension module fused_adam... +Time to load fused_adam op: 0.002884387969970703 seconds +[2023-04-14 08:46:30,739] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed info: version=0.9.0, git-hash=unknown, git-branch=unknown +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.5726070404052734 seconds +[2023-04-14 08:46:34,252] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False +Rank: 6 partition count [8, 8] and sizes[(41365824, False), (33792, False)] +Rank: 7 partition count [8, 8] and sizes[(41365824, False), (33792, False)] +Rank: 1 partition count [8, 8] and sizes[(41365824, False), (33792, False)] +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0016791820526123047 seconds +[2023-04-14 08:46:36,743] [INFO] [logging.py:96:log_dist] [Rank 0] Removing param_group that has no 'params' in the client Optimizer +[2023-04-14 08:46:36,743] [INFO] [logging.py:96:log_dist] [Rank 0] Using client Optimizer as basic optimizer +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.002676725387573242 seconds +[2023-04-14 08:46:36,762] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Basic Optimizer = FusedAdam +[2023-04-14 08:46:36,762] [INFO] [utils.py:51:is_zero_supported_optimizer] Checking ZeRO support for optimizer=FusedAdam type= +[2023-04-14 08:46:36,763] [INFO] [logging.py:96:log_dist] [Rank 0] Creating torch.float16 ZeRO stage 2 optimizer +[2023-04-14 08:46:36,763] [INFO] [stage_1_and_2.py:133:__init__] Reduce bucket size 500,000,000 +[2023-04-14 08:46:36,763] [INFO] [stage_1_and_2.py:134:__init__] Allgather bucket size 500,000,000 +[2023-04-14 08:46:36,763] [INFO] [stage_1_and_2.py:135:__init__] CPU Offload: False +[2023-04-14 08:46:36,763] [INFO] [stage_1_and_2.py:136:__init__] Round robin gradient partitioning: False +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0006117820739746094 seconds +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.002028226852416992 seconds +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0013179779052734375 seconds +Rank: 0 partition count [8, 8] and sizes[(41365824, False), (33792, False)] +Rank: 3 partition count [8, 8] and sizes[(41365824, False), (33792, False)] +Rank: 4 partition count [8, 8] and sizes[(41365824, False), (33792, False)] +Rank: 5 partition count [8, 8] and sizes[(41365824, False), (33792, False)] +Rank: 2 partition count [8, 8] and sizes[(41365824, False), (33792, False)] +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... + +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...Time to load utils op: 0.0013089179992675781 seconds + +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0011861324310302734 seconds +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0008509159088134766 secondsUsing /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... + +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0007336139678955078 seconds +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...Time to load utils op: 0.0008528232574462891 seconds + +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0011703968048095703 seconds +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0012271404266357422 seconds +[2023-04-14 08:46:42,781] [INFO] [utils.py:785:see_memory_usage] Before initializing optimizer states +[2023-04-14 08:46:42,782] [INFO] [utils.py:786:see_memory_usage] MA 8.1 GB Max_MA 8.1 GB CA 8.29 GB Max_CA 8 GB +[2023-04-14 08:46:42,782] [INFO] [utils.py:793:see_memory_usage] CPU Virtual Memory: used = 125.79 GB, percent = 12.5% +[2023-04-14 08:46:43,491] [INFO] [utils.py:785:see_memory_usage] After initializing optimizer states +[2023-04-14 08:46:43,491] [INFO] [utils.py:786:see_memory_usage] MA 8.41 GB Max_MA 8.56 GB CA 8.75 GB Max_CA 9 GB +[2023-04-14 08:46:43,492] [INFO] [utils.py:793:see_memory_usage] CPU Virtual Memory: used = 127.29 GB, percent = 12.6% +[2023-04-14 08:46:43,492] [INFO] [stage_1_and_2.py:489:__init__] optimizer state initialized +[2023-04-14 08:46:44,198] [INFO] [utils.py:785:see_memory_usage] After initializing ZeRO optimizer +[2023-04-14 08:46:44,198] [INFO] [utils.py:786:see_memory_usage] MA 8.41 GB Max_MA 8.41 GB CA 8.75 GB Max_CA 9 GB +[2023-04-14 08:46:44,199] [INFO] [utils.py:793:see_memory_usage] CPU Virtual Memory: used = 129.3 GB, percent = 12.8% +[2023-04-14 08:46:44,200] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Final Optimizer = FusedAdam +[2023-04-14 08:46:44,200] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed using client LR scheduler +[2023-04-14 08:46:44,200] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed LR Scheduler = +[2023-04-14 08:46:44,200] [INFO] [logging.py:96:log_dist] [Rank 0] step=0, skipped=0, lr=[0.0, 0.0], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 08:46:44,201] [INFO] [config.py:953:print] DeepSpeedEngine configuration: +[2023-04-14 08:46:44,201] [INFO] [config.py:957:print] activation_checkpointing_config { + "partition_activations": false, + "contiguous_memory_optimization": false, + "cpu_checkpointing": false, + "number_checkpoints": null, + "synchronize_checkpoint_boundary": false, + "profile": false +} +[2023-04-14 08:46:44,201] [INFO] [config.py:957:print] aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True} +[2023-04-14 08:46:44,201] [INFO] [config.py:957:print] amp_enabled .................. False +[2023-04-14 08:46:44,201] [INFO] [config.py:957:print] amp_params ................... False +[2023-04-14 08:46:44,201] [INFO] [config.py:957:print] autotuning_config ............ { + "enabled": false, + "start_step": null, + "end_step": null, + "metric_path": null, + "arg_mappings": null, + "metric": "throughput", + "model_info": null, + "results_dir": "autotuning_results", + "exps_dir": "autotuning_exps", + "overwrite": true, + "fast": true, + "start_profile_step": 3, + "end_profile_step": 5, + "tuner_type": "gridsearch", + "tuner_early_stopping": 5, + "tuner_num_trials": 50, + "model_info_path": null, + "mp_size": 1, + "max_train_batch_size": null, + "min_train_batch_size": 1, + "max_train_micro_batch_size_per_gpu": 1.024000e+03, + "min_train_micro_batch_size_per_gpu": 1, + "num_tuning_micro_batch_sizes": 3 +} +[2023-04-14 08:46:44,208] [INFO] [config.py:957:print] bfloat16_enabled ............. False +[2023-04-14 08:46:44,208] [INFO] [config.py:957:print] checkpoint_parallel_write_pipeline False +[2023-04-14 08:46:44,208] [INFO] [config.py:957:print] checkpoint_tag_validation_enabled True +[2023-04-14 08:46:44,209] [INFO] [config.py:957:print] checkpoint_tag_validation_fail False +[2023-04-14 08:46:44,209] [INFO] [config.py:957:print] comms_config ................. +[2023-04-14 08:46:44,209] [INFO] [config.py:957:print] communication_data_type ...... None +[2023-04-14 08:46:44,209] [INFO] [config.py:957:print] compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}} +[2023-04-14 08:46:44,209] [INFO] [config.py:957:print] curriculum_enabled_legacy .... False +[2023-04-14 08:46:44,209] [INFO] [config.py:957:print] curriculum_params_legacy ..... False +[2023-04-14 08:46:44,209] [INFO] [config.py:957:print] data_efficiency_config ....... {'enabled': False, 'seed': 1234, 'data_sampling': {'enabled': False, 'num_epochs': 1000, 'num_workers': 0, 'curriculum_learning': {'enabled': False}}, 'data_routing': {'enabled': False, 'random_ltd': {'enabled': False, 'layer_token_lr_schedule': {'enabled': False}}}} +[2023-04-14 08:46:44,209] [INFO] [config.py:957:print] data_efficiency_enabled ...... False +[2023-04-14 08:46:44,209] [INFO] [config.py:957:print] dataloader_drop_last ......... False +[2023-04-14 08:46:44,209] [INFO] [config.py:957:print] disable_allgather ............ False +[2023-04-14 08:46:44,209] [INFO] [config.py:957:print] dump_state ................... False +[2023-04-14 08:46:44,209] [INFO] [config.py:957:print] dynamic_loss_scale_args ...... {'init_scale': 65536, 'scale_window': 100, 'delayed_shift': 2, 'min_scale': 1} +[2023-04-14 08:46:44,209] [INFO] [config.py:957:print] eigenvalue_enabled ........... False +[2023-04-14 08:46:44,209] [INFO] [config.py:957:print] eigenvalue_gas_boundary_resolution 1 +[2023-04-14 08:46:44,209] [INFO] [config.py:957:print] eigenvalue_layer_name ........ bert.encoder.layer +[2023-04-14 08:46:44,209] [INFO] [config.py:957:print] eigenvalue_layer_num ......... 0 +[2023-04-14 08:46:44,209] [INFO] [config.py:957:print] eigenvalue_max_iter .......... 100 +[2023-04-14 08:46:44,209] [INFO] [config.py:957:print] eigenvalue_stability ......... 1e-06 +[2023-04-14 08:46:44,209] [INFO] [config.py:957:print] eigenvalue_tol ............... 0.01 +[2023-04-14 08:46:44,209] [INFO] [config.py:957:print] eigenvalue_verbose ........... False +[2023-04-14 08:46:44,209] [INFO] [config.py:957:print] elasticity_enabled ........... False +[2023-04-14 08:46:44,209] [INFO] [config.py:957:print] flops_profiler_config ........ { + "enabled": false, + "profile_step": 1, + "module_depth": -1, + "top_modules": 1, + "detailed": true, + "output_file": null +} +[2023-04-14 08:46:44,209] [INFO] [config.py:957:print] fp16_auto_cast ............... False +[2023-04-14 08:46:44,209] [INFO] [config.py:957:print] fp16_enabled ................. True +[2023-04-14 08:46:44,209] [INFO] [config.py:957:print] fp16_master_weights_and_gradients False +[2023-04-14 08:46:44,209] [INFO] [config.py:957:print] global_rank .................. 0 +[2023-04-14 08:46:44,209] [INFO] [config.py:957:print] grad_accum_dtype ............. None +[2023-04-14 08:46:44,209] [INFO] [config.py:957:print] gradient_accumulation_steps .. 1 +[2023-04-14 08:46:44,209] [INFO] [config.py:957:print] gradient_clipping ............ 1.0 +[2023-04-14 08:46:44,210] [INFO] [config.py:957:print] gradient_predivide_factor .... 1.0 +[2023-04-14 08:46:44,210] [INFO] [config.py:957:print] hybrid_engine ................ enabled=False max_out_tokens=512 inference_tp_size=1 release_inference_cache=False pin_parameters=True tp_gather_partition_size=8 +[2023-04-14 08:46:44,210] [INFO] [config.py:957:print] initial_dynamic_scale ........ 65536 +[2023-04-14 08:46:44,210] [INFO] [config.py:957:print] load_universal_checkpoint .... False +[2023-04-14 08:46:44,210] [INFO] [config.py:957:print] loss_scale ................... 0 +[2023-04-14 08:46:44,210] [INFO] [config.py:957:print] memory_breakdown ............. False +[2023-04-14 08:46:44,210] [INFO] [config.py:957:print] monitor_config ............... tensorboard=TensorBoardConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') wandb=WandbConfig(enabled=False, group=None, team=None, project='deepspeed') csv_monitor=CSVConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') enabled=False +[2023-04-14 08:46:44,210] [INFO] [config.py:957:print] nebula_config ................ { + "enabled": false, + "persistent_storage_path": null, + "persistent_time_interval": 100, + "num_of_version_in_retention": 2, + "enable_nebula_load": true, + "load_path": null +} +[2023-04-14 08:46:44,210] [INFO] [config.py:957:print] optimizer_legacy_fusion ...... False +[2023-04-14 08:46:44,210] [INFO] [config.py:957:print] optimizer_name ............... None +[2023-04-14 08:46:44,210] [INFO] [config.py:957:print] optimizer_params ............. None +[2023-04-14 08:46:44,210] [INFO] [config.py:957:print] pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0} +[2023-04-14 08:46:44,210] [INFO] [config.py:957:print] pld_enabled .................. False +[2023-04-14 08:46:44,210] [INFO] [config.py:957:print] pld_params ................... False +[2023-04-14 08:46:44,210] [INFO] [config.py:957:print] prescale_gradients ........... False +[2023-04-14 08:46:44,210] [INFO] [config.py:957:print] scheduler_name ............... None +[2023-04-14 08:46:44,210] [INFO] [config.py:957:print] scheduler_params ............. None +[2023-04-14 08:46:44,210] [INFO] [config.py:957:print] sparse_attention ............. None +[2023-04-14 08:46:44,210] [INFO] [config.py:957:print] sparse_gradients_enabled ..... False +[2023-04-14 08:46:44,210] [INFO] [config.py:957:print] steps_per_print .............. 10 +[2023-04-14 08:46:44,210] [INFO] [config.py:957:print] train_batch_size ............. 32 +[2023-04-14 08:46:44,210] [INFO] [config.py:957:print] train_micro_batch_size_per_gpu 4 +[2023-04-14 08:46:44,210] [INFO] [config.py:957:print] use_node_local_storage ....... False +[2023-04-14 08:46:44,210] [INFO] [config.py:957:print] wall_clock_breakdown ......... False +[2023-04-14 08:46:44,210] [INFO] [config.py:957:print] world_size ................... 8 +[2023-04-14 08:46:44,210] [INFO] [config.py:957:print] zero_allow_untested_optimizer False +[2023-04-14 08:46:44,210] [INFO] [config.py:957:print] zero_config .................. stage=2 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=500,000,000 allgather_partitions=True allgather_bucket_size=500,000,000 overlap_comm=False load_from_fp32_weights=True elastic_checkpoint=False offload_param=DeepSpeedZeroOffloadParamConfig(device='none', nvme_path=None, buffer_count=5, buffer_size=100,000,000, max_in_cpu=1,000,000,000, pin_memory=False) offload_optimizer=DeepSpeedZeroOffloadOptimizerConfig(device='none', nvme_path=None, buffer_count=4, pin_memory=False, pipeline=False, pipeline_read=False, pipeline_write=False, fast_init=False) sub_group_size=1,000,000,000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=30000000 param_persistence_threshold=10000 model_persistence_threshold=sys.maxsize max_live_parameters=30000000 max_reuse_distance=1,000,000,000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False memory_efficient_linear=False +[2023-04-14 08:46:44,210] [INFO] [config.py:957:print] zero_enabled ................. True +[2023-04-14 08:46:44,210] [INFO] [config.py:957:print] zero_force_ds_cpu_optimizer .. True +[2023-04-14 08:46:44,210] [INFO] [config.py:957:print] zero_optimization_stage ...... 2 +[2023-04-14 08:46:44,210] [INFO] [config.py:943:print_user_config] json = { + "train_batch_size": 32, + "train_micro_batch_size_per_gpu": 4, + "steps_per_print": 10, + "zero_optimization": { + "stage": 2, + "offload_param": { + "device": "none" + }, + "offload_optimizer": { + "device": "none" + }, + "stage3_param_persistence_threshold": 1.000000e+04, + "stage3_max_live_parameters": 3.000000e+07, + "stage3_prefetch_bucket_size": 3.000000e+07, + "memory_efficient_linear": false + }, + "fp16": { + "enabled": true, + "loss_scale_window": 100 + }, + "gradient_clipping": 1.0, + "prescale_gradients": false, + "wall_clock_breakdown": false, + "hybrid_engine": { + "enabled": false, + "inference_tp_size": 1, + "release_inference_cache": false, + "pin_parameters": true, + "tp_gather_partition_size": 8 + } +} +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0017056465148925781 seconds +*****************[end] Initialized Critic Model [end] (duration: 31.02s)****************** +************************[start] Initializing Reward Model [start] ************************ +[2023-04-14 08:47:02,078] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed info: version=0.9.0, git-hash=unknown, git-branch=unknown +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 1.1240429878234863 seconds +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.42214012145996094 seconds +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.002139568328857422 seconds +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0012676715850830078 seconds +[2023-04-14 08:47:21,293] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False +[2023-04-14 08:47:21,295] [INFO] [config.py:953:print] DeepSpeedEngine configuration: +[2023-04-14 08:47:21,295] [INFO] [config.py:957:print] activation_checkpointing_config { + "partition_activations": false, + "contiguous_memory_optimization": false, + "cpu_checkpointing": false, + "number_checkpoints": null, + "synchronize_checkpoint_boundary": false, + "profile": false +} +[2023-04-14 08:47:21,295] [INFO] [config.py:957:print] aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True} +[2023-04-14 08:47:21,295] [INFO] [config.py:957:print] amp_enabled .................. False +[2023-04-14 08:47:21,295] [INFO] [config.py:957:print] amp_params ................... False +[2023-04-14 08:47:21,295] [INFO] [config.py:957:print] autotuning_config ............ { + "enabled": false, + "start_step": null, + "end_step": null, + "metric_path": null, + "arg_mappings": null, + "metric": "throughput", + "model_info": null, + "results_dir": "autotuning_results", + "exps_dir": "autotuning_exps", + "overwrite": true, + "fast": true, + "start_profile_step": 3, + "end_profile_step": 5, + "tuner_type": "gridsearch", + "tuner_early_stopping": 5, + "tuner_num_trials": 50, + "model_info_path": null, + "mp_size": 1, + "max_train_batch_size": null, + "min_train_batch_size": 1, + "max_train_micro_batch_size_per_gpu": 1.024000e+03, + "min_train_micro_batch_size_per_gpu": 1, + "num_tuning_micro_batch_sizes": 3 +} +[2023-04-14 08:47:21,295] [INFO] [config.py:957:print] bfloat16_enabled ............. False +[2023-04-14 08:47:21,295] [INFO] [config.py:957:print] checkpoint_parallel_write_pipeline False +[2023-04-14 08:47:21,295] [INFO] [config.py:957:print] checkpoint_tag_validation_enabled True +[2023-04-14 08:47:21,295] [INFO] [config.py:957:print] checkpoint_tag_validation_fail False +[2023-04-14 08:47:21,295] [INFO] [config.py:957:print] comms_config ................. +[2023-04-14 08:47:21,295] [INFO] [config.py:957:print] communication_data_type ...... None +[2023-04-14 08:47:21,295] [INFO] [config.py:957:print] compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}} +[2023-04-14 08:47:21,295] [INFO] [config.py:957:print] curriculum_enabled_legacy .... False +[2023-04-14 08:47:21,295] [INFO] [config.py:957:print] curriculum_params_legacy ..... False +[2023-04-14 08:47:21,295] [INFO] [config.py:957:print] data_efficiency_config ....... {'enabled': False, 'seed': 1234, 'data_sampling': {'enabled': False, 'num_epochs': 1000, 'num_workers': 0, 'curriculum_learning': {'enabled': False}}, 'data_routing': {'enabled': False, 'random_ltd': {'enabled': False, 'layer_token_lr_schedule': {'enabled': False}}}} +[2023-04-14 08:47:21,295] [INFO] [config.py:957:print] data_efficiency_enabled ...... False +[2023-04-14 08:47:21,295] [INFO] [config.py:957:print] dataloader_drop_last ......... False +[2023-04-14 08:47:21,295] [INFO] [config.py:957:print] disable_allgather ............ False +[2023-04-14 08:47:21,295] [INFO] [config.py:957:print] dump_state ................... False +[2023-04-14 08:47:21,295] [INFO] [config.py:957:print] dynamic_loss_scale_args ...... None +[2023-04-14 08:47:21,295] [INFO] [config.py:957:print] eigenvalue_enabled ........... False +[2023-04-14 08:47:21,296] [INFO] [config.py:957:print] eigenvalue_gas_boundary_resolution 1 +[2023-04-14 08:47:21,296] [INFO] [config.py:957:print] eigenvalue_layer_name ........ bert.encoder.layer +[2023-04-14 08:47:21,296] [INFO] [config.py:957:print] eigenvalue_layer_num ......... 0 +[2023-04-14 08:47:21,296] [INFO] [config.py:957:print] eigenvalue_max_iter .......... 100 +[2023-04-14 08:47:21,296] [INFO] [config.py:957:print] eigenvalue_stability ......... 1e-06 +[2023-04-14 08:47:21,296] [INFO] [config.py:957:print] eigenvalue_tol ............... 0.01 +[2023-04-14 08:47:21,296] [INFO] [config.py:957:print] eigenvalue_verbose ........... False +[2023-04-14 08:47:21,296] [INFO] [config.py:957:print] elasticity_enabled ........... False +[2023-04-14 08:47:21,296] [INFO] [config.py:957:print] flops_profiler_config ........ { + "enabled": false, + "profile_step": 1, + "module_depth": -1, + "top_modules": 1, + "detailed": true, + "output_file": null +} +[2023-04-14 08:47:21,296] [INFO] [config.py:957:print] fp16_auto_cast ............... False +[2023-04-14 08:47:21,296] [INFO] [config.py:957:print] fp16_enabled ................. True +[2023-04-14 08:47:21,296] [INFO] [config.py:957:print] fp16_master_weights_and_gradients False +[2023-04-14 08:47:21,296] [INFO] [config.py:957:print] global_rank .................. 0 +[2023-04-14 08:47:21,296] [INFO] [config.py:957:print] grad_accum_dtype ............. None +[2023-04-14 08:47:21,296] [INFO] [config.py:957:print] gradient_accumulation_steps .. 1 +[2023-04-14 08:47:21,296] [INFO] [config.py:957:print] gradient_clipping ............ 1.0 +[2023-04-14 08:47:21,296] [INFO] [config.py:957:print] gradient_predivide_factor .... 1.0 +[2023-04-14 08:47:21,296] [INFO] [config.py:957:print] hybrid_engine ................ enabled=False max_out_tokens=512 inference_tp_size=1 release_inference_cache=False pin_parameters=True tp_gather_partition_size=8 +[2023-04-14 08:47:21,296] [INFO] [config.py:957:print] initial_dynamic_scale ........ 65536 +[2023-04-14 08:47:21,296] [INFO] [config.py:957:print] load_universal_checkpoint .... False +[2023-04-14 08:47:21,296] [INFO] [config.py:957:print] loss_scale ................... 0 +[2023-04-14 08:47:21,296] [INFO] [config.py:957:print] memory_breakdown ............. False +[2023-04-14 08:47:21,296] [INFO] [config.py:957:print] monitor_config ............... tensorboard=TensorBoardConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') wandb=WandbConfig(enabled=False, group=None, team=None, project='deepspeed') csv_monitor=CSVConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') enabled=False +[2023-04-14 08:47:21,296] [INFO] [config.py:957:print] nebula_config ................ { + "enabled": false, + "persistent_storage_path": null, + "persistent_time_interval": 100, + "num_of_version_in_retention": 2, + "enable_nebula_load": true, + "load_path": null +} +[2023-04-14 08:47:21,296] [INFO] [config.py:957:print] optimizer_legacy_fusion ...... False +[2023-04-14 08:47:21,296] [INFO] [config.py:957:print] optimizer_name ............... None +[2023-04-14 08:47:21,296] [INFO] [config.py:957:print] optimizer_params ............. None +[2023-04-14 08:47:21,296] [INFO] [config.py:957:print] pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0} +[2023-04-14 08:47:21,296] [INFO] [config.py:957:print] pld_enabled .................. False +[2023-04-14 08:47:21,296] [INFO] [config.py:957:print] pld_params ................... False +[2023-04-14 08:47:21,296] [INFO] [config.py:957:print] prescale_gradients ........... False +[2023-04-14 08:47:21,296] [INFO] [config.py:957:print] scheduler_name ............... None +[2023-04-14 08:47:21,296] [INFO] [config.py:957:print] scheduler_params ............. None +[2023-04-14 08:47:21,296] [INFO] [config.py:957:print] sparse_attention ............. None +[2023-04-14 08:47:21,296] [INFO] [config.py:957:print] sparse_gradients_enabled ..... False +[2023-04-14 08:47:21,296] [INFO] [config.py:957:print] steps_per_print .............. 10 +[2023-04-14 08:47:21,296] [INFO] [config.py:957:print] train_batch_size ............. 32 +[2023-04-14 08:47:21,296] [INFO] [config.py:957:print] train_micro_batch_size_per_gpu 4 +[2023-04-14 08:47:21,296] [INFO] [config.py:957:print] use_node_local_storage ....... False +[2023-04-14 08:47:21,297] [INFO] [config.py:957:print] wall_clock_breakdown ......... False +[2023-04-14 08:47:21,297] [INFO] [config.py:957:print] world_size ................... 8 +[2023-04-14 08:47:21,297] [INFO] [config.py:957:print] zero_allow_untested_optimizer False +[2023-04-14 08:47:21,297] [INFO] [config.py:957:print] zero_config .................. stage=0 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=500,000,000 allgather_partitions=True allgather_bucket_size=500,000,000 overlap_comm=False load_from_fp32_weights=True elastic_checkpoint=False offload_param=DeepSpeedZeroOffloadParamConfig(device='none', nvme_path=None, buffer_count=5, buffer_size=100,000,000, max_in_cpu=1,000,000,000, pin_memory=False) offload_optimizer=None sub_group_size=1,000,000,000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=50,000,000 param_persistence_threshold=10000 model_persistence_threshold=sys.maxsize max_live_parameters=1,000,000,000 max_reuse_distance=1,000,000,000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False memory_efficient_linear=False +[2023-04-14 08:47:21,297] [INFO] [config.py:957:print] zero_enabled ................. False +[2023-04-14 08:47:21,297] [INFO] [config.py:957:print] zero_force_ds_cpu_optimizer .. True +[2023-04-14 08:47:21,297] [INFO] [config.py:957:print] zero_optimization_stage ...... 0 +[2023-04-14 08:47:21,297] [INFO] [config.py:943:print_user_config] json = { + "train_batch_size": 32, + "train_micro_batch_size_per_gpu": 4, + "steps_per_print": 10, + "zero_optimization": { + "stage": 0, + "stage3_param_persistence_threshold": 1.000000e+04, + "offload_param": { + "device": "none" + }, + "memory_efficient_linear": false + }, + "fp16": { + "enabled": true + }, + "gradient_clipping": 1.0, + "prescale_gradients": false, + "wall_clock_breakdown": false +} +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.0011258125305175781 seconds +*****************[end] Initialized Reward Model [end] (duration: 37.09s)****************** +***** Running training ***** +Beginning of Epoch 1/1, Total Generation Batches 8260 +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.009740114212036133 seconds +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.001196146011352539 seconds +Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +No modifications detected for re-loaded extension module utils, skipping build step... +Loading extension module utils... +Time to load utils op: 0.001905679702758789 seconds +------------------------------------------------------ +Free memory : 28.318542 (GigaBytes) +Total memory: 39.586121 (GigaBytes) +Requested memory: 0.515625 (GigaBytes) +Setting maximum total tokens (input + output) to 512 +WorkSpace: 0x7efdf6000000 +------------------------------------------------------ +[2023-04-14 08:47:24,971] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +[2023-04-14 08:47:25,055] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 0|ppo_ep: 1|act_loss: 0.08221435546875|cri_loss: 0.051910400390625|unsuper_loss: 0.0 +average reward score: 0.74267578125 +------------------------------------------------------------------------------------- +|E2E latency=3.40s |Gather latency=0.00s (0.00%) |Generate time=2.68s (79.04%) |Training time=0.62s (18.39%) |Others=0.09 (2.57%)|CurSamplesPerSec=9.42 |AvgSamplesPerSec=9.42 +[2023-04-14 08:47:27,082] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +[2023-04-14 08:47:27,167] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 1|ppo_ep: 1|act_loss: -0.0277862548828125|cri_loss: -0.0082244873046875|unsuper_loss: 0.0 +average reward score: 1.08203125 +------------------------------------------------------------------------------------- +|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.64s (77.71%) |Training time=0.38s (18.12%) |Others=0.09 (4.17%)|CurSamplesPerSec=15.15 |AvgSamplesPerSec=11.62 +[2023-04-14 08:47:29,187] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384 +[2023-04-14 08:47:29,272] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384 +epoch: 0|step: 2|ppo_ep: 1|act_loss: 0.009063720703125|cri_loss: 0.0110931396484375|unsuper_loss: 0.0 +average reward score: 0.4833984375 +------------------------------------------------------------------------------------- +|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.60s (76.06%) |Training time=0.41s (19.68%) |Others=0.09 (4.26%)|CurSamplesPerSec=15.20 |AvgSamplesPerSec=12.61 +[2023-04-14 08:47:31,435] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192 +epoch: 0|step: 3|ppo_ep: 1|act_loss: 0.374755859375|cri_loss: 0.230712890625|unsuper_loss: 0.0 +average reward score: 0.4599609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.85%) |Training time=0.45s (20.99%) |Others=0.09 (4.17%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=13.09 +epoch: 0|step: 4|ppo_ep: 1|act_loss: 0.25146484375|cri_loss: 0.15087890625|unsuper_loss: 0.0 +average reward score: 0.5869140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.82%) |Training time=0.44s (20.62%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=13.41 +epoch: 0|step: 5|ppo_ep: 1|act_loss: -0.0999755859375|cri_loss: -0.04248046875|unsuper_loss: 0.0 +average reward score: 1.0810546875 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.79s (75.75%) |Training time=0.47s (20.04%) |Others=0.10 (4.21%)|CurSamplesPerSec=13.54 |AvgSamplesPerSec=13.43 +epoch: 0|step: 6|ppo_ep: 1|act_loss: -0.05633544921875|cri_loss: -0.0228271484375|unsuper_loss: 0.0 +average reward score: 1.267578125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.19%) |Training time=0.43s (20.08%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=13.64 +epoch: 0|step: 7|ppo_ep: 1|act_loss: 0.14599609375|cri_loss: 0.0970458984375|unsuper_loss: 0.0 +average reward score: 0.63232421875 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.83%) |Training time=0.44s (20.56%) |Others=0.10 (4.61%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=13.79 +epoch: 0|step: 8|ppo_ep: 1|act_loss: -0.1309814453125|cri_loss: -0.061859130859375|unsuper_loss: 0.0 +average reward score: 1.03125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.23%) |Training time=0.45s (21.08%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=13.91 +[2023-04-14 08:47:44,382] [INFO] [logging.py:96:log_dist] [Rank 0] step=10, skipped=3, lr=[6.755000000000001e-07, 6.755000000000001e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 08:47:44,400] [INFO] [timer.py:199:stop] epoch=0/micro_step=10/global_step=10, RunningAvgSamplesPerSec=113.50591974216755, CurrSamplesPerSec=110.00055566710786, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 08:47:44,493] [INFO] [logging.py:96:log_dist] [Rank 0] step=10, skipped=4, lr=[3.0000000000000004e-07, 3.0000000000000004e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 9|ppo_ep: 1|act_loss: 0.1016845703125|cri_loss: 0.0560302734375|unsuper_loss: 0.0 +average reward score: 0.6484375 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.16%) |Training time=0.45s (21.26%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.01 +epoch: 0|step: 10|ppo_ep: 1|act_loss: -0.150390625|cri_loss: -0.06866455078125|unsuper_loss: 0.0 +average reward score: 0.9638671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.93%) |Training time=0.46s (21.39%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.09 +epoch: 0|step: 11|ppo_ep: 1|act_loss: -0.00390625|cri_loss: 0.007171630859375|unsuper_loss: 0.0 +average reward score: 0.94287109375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.15%) |Training time=0.45s (21.25%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.16 +epoch: 0|step: 12|ppo_ep: 1|act_loss: -0.043121337890625|cri_loss: -0.01629638671875|unsuper_loss: 0.0 +average reward score: 1.5361328125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.16%) |Training time=0.45s (21.16%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.22 +epoch: 0|step: 13|ppo_ep: 1|act_loss: 0.06817626953125|cri_loss: 0.04638671875|unsuper_loss: 0.0 +average reward score: 1.15234375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.95%) |Training time=0.46s (21.47%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.27 +epoch: 0|step: 14|ppo_ep: 1|act_loss: -0.041015625|cri_loss: -0.0144195556640625|unsuper_loss: 0.0 +average reward score: 1.4755859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.02%) |Training time=0.46s (21.29%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.31 +epoch: 0|step: 15|ppo_ep: 1|act_loss: -0.01495361328125|cri_loss: -0.003643035888671875|unsuper_loss: 0.0 +average reward score: 1.29296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.40%) |Training time=0.48s (22.04%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.33 +epoch: 0|step: 16|ppo_ep: 1|act_loss: 0.165283203125|cri_loss: 0.108154296875|unsuper_loss: 0.0 +average reward score: 1.4296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.28%) |Training time=0.48s (21.95%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.36 +epoch: 0|step: 17|ppo_ep: 1|act_loss: -0.022979736328125|cri_loss: -0.0068511962890625|unsuper_loss: 0.0 +average reward score: 1.6962890625 +------------------------------------------------------------------------------------- +|E2E latency=3.46s |Gather latency=0.00s (0.00%) |Generate time=1.59s (46.04%) |Training time=0.48s (13.84%) |Others=1.39 (40.12%)|CurSamplesPerSec=9.25 |AvgSamplesPerSec=13.93 +epoch: 0|step: 18|ppo_ep: 1|act_loss: -0.06494140625|cri_loss: -0.02099609375|unsuper_loss: 0.0 +average reward score: 1.404296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.66%) |Training time=0.45s (20.69%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=13.98 +[2023-04-14 08:48:07,384] [INFO] [logging.py:96:log_dist] [Rank 0] step=20, skipped=3, lr=[1.6405000000000002e-06, 1.6405000000000002e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 08:48:07,402] [INFO] [timer.py:199:stop] epoch=0/micro_step=20/global_step=20, RunningAvgSamplesPerSec=109.13579418396058, CurrSamplesPerSec=98.81244018293347, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 08:48:07,495] [INFO] [logging.py:96:log_dist] [Rank 0] step=20, skipped=4, lr=[8.000000000000001e-07, 8.000000000000001e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 19|ppo_ep: 1|act_loss: 0.1060791015625|cri_loss: 0.056182861328125|unsuper_loss: 0.0 +average reward score: 1.443359375 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.88%) |Training time=0.49s (20.82%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=13.96 +[2023-04-14 08:48:09,536] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192 +epoch: 0|step: 20|ppo_ep: 1|act_loss: 0.0240478515625|cri_loss: 0.0189361572265625|unsuper_loss: 0.0 +average reward score: 1.62109375 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.13%) |Training time=0.45s (21.13%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.01 +epoch: 0|step: 21|ppo_ep: 1|act_loss: -0.0139617919921875|cri_loss: 0.00112152099609375|unsuper_loss: 0.0 +average reward score: 1.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.64%) |Training time=0.48s (21.81%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.04 +epoch: 0|step: 22|ppo_ep: 1|act_loss: 0.06494140625|cri_loss: 0.0443115234375|unsuper_loss: 0.0 +average reward score: 1.67578125 +------------------------------------------------------------------------------------- +|E2E latency=2.87s |Gather latency=0.00s (0.00%) |Generate time=1.58s (55.15%) |Training time=0.47s (16.54%) |Others=0.81 (28.32%)|CurSamplesPerSec=11.15 |AvgSamplesPerSec=13.88 +epoch: 0|step: 23|ppo_ep: 1|act_loss: 0.06280517578125|cri_loss: 0.04296875|unsuper_loss: 0.0 +average reward score: 1.5107421875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.40%) |Training time=0.48s (22.04%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=13.91 +epoch: 0|step: 24|ppo_ep: 1|act_loss: 0.11566162109375|cri_loss: 0.0645751953125|unsuper_loss: 0.0 +average reward score: 1.412109375 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.68s (74.14%) |Training time=0.49s (21.43%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.13 |AvgSamplesPerSec=13.92 +epoch: 0|step: 25|ppo_ep: 1|act_loss: 0.04132080078125|cri_loss: 0.030853271484375|unsuper_loss: 0.0 +average reward score: 2.07421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.28%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=13.95 +epoch: 0|step: 26|ppo_ep: 1|act_loss: 0.0771484375|cri_loss: 0.043548583984375|unsuper_loss: 0.0 +average reward score: 2.001953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.28%) |Training time=0.48s (22.04%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=13.98 +epoch: 0|step: 27|ppo_ep: 1|act_loss: 0.052703857421875|cri_loss: 0.030303955078125|unsuper_loss: 0.0 +average reward score: 1.6484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.17%) |Training time=0.48s (22.14%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.01 +epoch: 0|step: 28|ppo_ep: 1|act_loss: -0.023284912109375|cri_loss: -0.00701904296875|unsuper_loss: 0.0 +average reward score: 1.5791015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.55%) |Training time=0.47s (21.82%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.04 +[2023-04-14 08:48:29,822] [INFO] [logging.py:96:log_dist] [Rank 0] step=30, skipped=4, lr=[2.5090000000000005e-06, 2.5090000000000005e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 08:48:29,840] [INFO] [timer.py:199:stop] epoch=0/micro_step=30/global_step=30, RunningAvgSamplesPerSec=106.47952620942749, CurrSamplesPerSec=99.61674735645987, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 08:48:29,933] [INFO] [logging.py:96:log_dist] [Rank 0] step=30, skipped=4, lr=[1.3e-06, 1.3e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 29|ppo_ep: 1|act_loss: -0.11663818359375|cri_loss: -0.05499267578125|unsuper_loss: 0.0 +average reward score: 1.8896484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.10%) |Training time=0.48s (22.37%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.06 +epoch: 0|step: 30|ppo_ep: 1|act_loss: -0.067626953125|cri_loss: -0.02947998046875|unsuper_loss: 0.0 +average reward score: 1.865234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.19%) |Training time=0.48s (22.18%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.08 +epoch: 0|step: 31|ppo_ep: 1|act_loss: -0.127685546875|cri_loss: -0.048828125|unsuper_loss: 0.0 +average reward score: 1.484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.27%) |Training time=0.48s (22.18%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.11 +epoch: 0|step: 32|ppo_ep: 1|act_loss: 0.05084228515625|cri_loss: 0.0467529296875|unsuper_loss: 0.0 +average reward score: 1.5107421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.43%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.13 +epoch: 0|step: 33|ppo_ep: 1|act_loss: -0.0036468505859375|cri_loss: 0.000675201416015625|unsuper_loss: 0.0 +average reward score: 1.853515625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.07%) |Training time=0.46s (21.35%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.15 +epoch: 0|step: 34|ppo_ep: 1|act_loss: 0.03912353515625|cri_loss: 0.0289306640625|unsuper_loss: 0.0 +average reward score: 2.162109375 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.04%) |Training time=0.48s (20.58%) |Others=0.10 (4.38%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.14 +epoch: 0|step: 35|ppo_ep: 1|act_loss: 0.00030517578125|cri_loss: 0.0166015625|unsuper_loss: 0.0 +average reward score: 2.197265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.20%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.15 +epoch: 0|step: 36|ppo_ep: 1|act_loss: 0.02532958984375|cri_loss: 0.0178680419921875|unsuper_loss: 0.0 +average reward score: 2.248046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.47s (21.90%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.17 +epoch: 0|step: 37|ppo_ep: 1|act_loss: 0.0909423828125|cri_loss: 0.1270751953125|unsuper_loss: 0.0 +average reward score: 2.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.48s (22.07%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.19 +epoch: 0|step: 38|ppo_ep: 1|act_loss: 0.1304931640625|cri_loss: 0.0841064453125|unsuper_loss: 0.0 +average reward score: 2.462890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.79%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.20 +[2023-04-14 08:48:51,574] [INFO] [logging.py:96:log_dist] [Rank 0] step=40, skipped=4, lr=[3.474e-06, 3.474e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 08:48:51,593] [INFO] [timer.py:199:stop] epoch=0/micro_step=40/global_step=40, RunningAvgSamplesPerSec=105.52605461942416, CurrSamplesPerSec=102.21813606772618, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 08:48:51,686] [INFO] [logging.py:96:log_dist] [Rank 0] step=40, skipped=4, lr=[1.8000000000000001e-06, 1.8000000000000001e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 39|ppo_ep: 1|act_loss: 0.02679443359375|cri_loss: 0.0269775390625|unsuper_loss: 0.0 +average reward score: 2.24609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.30%) |Training time=0.48s (22.05%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.22 +epoch: 0|step: 40|ppo_ep: 1|act_loss: 0.11529541015625|cri_loss: 0.06890869140625|unsuper_loss: 0.0 +average reward score: 2.169921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.29%) |Training time=0.48s (22.07%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.23 +epoch: 0|step: 41|ppo_ep: 1|act_loss: 0.003875732421875|cri_loss: 0.0196533203125|unsuper_loss: 0.0 +average reward score: 2.478515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (22.01%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.25 +epoch: 0|step: 42|ppo_ep: 1|act_loss: 0.06878662109375|cri_loss: 0.043701171875|unsuper_loss: 0.0 +average reward score: 2.244140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.45%) |Training time=0.47s (21.89%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.26 +epoch: 0|step: 43|ppo_ep: 1|act_loss: 0.039947509765625|cri_loss: 0.03765869140625|unsuper_loss: 0.0 +average reward score: 2.173828125 +------------------------------------------------------------------------------------- +|E2E latency=2.42s |Gather latency=0.00s (0.00%) |Generate time=1.58s (65.32%) |Training time=0.47s (19.58%) |Others=0.37 (15.10%)|CurSamplesPerSec=13.20 |AvgSamplesPerSec=14.23 +epoch: 0|step: 44|ppo_ep: 1|act_loss: -0.0477294921875|cri_loss: -0.0135498046875|unsuper_loss: 0.0 +average reward score: 2.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.42%) |Training time=0.47s (21.88%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.25 +epoch: 0|step: 45|ppo_ep: 1|act_loss: 0.006374359130859375|cri_loss: 0.0103607177734375|unsuper_loss: 0.0 +average reward score: 2.35546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (22.02%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.26 +epoch: 0|step: 46|ppo_ep: 1|act_loss: -0.04522705078125|cri_loss: -0.01043701171875|unsuper_loss: 0.0 +average reward score: 2.5859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.21%) |Training time=0.48s (22.17%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.27 +epoch: 0|step: 47|ppo_ep: 1|act_loss: -0.21044921875|cri_loss: -0.07177734375|unsuper_loss: 0.0 +average reward score: 1.8818359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.62%) |Training time=0.47s (21.80%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.28 +epoch: 0|step: 48|ppo_ep: 1|act_loss: -0.071044921875|cri_loss: -0.0281982421875|unsuper_loss: 0.0 +average reward score: 1.77734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.25%) |Training time=0.44s (20.25%) |Others=0.12 (5.50%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.29 +[2023-04-14 08:49:13,586] [INFO] [logging.py:96:log_dist] [Rank 0] step=50, skipped=4, lr=[4.439e-06, 4.439e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 08:49:13,604] [INFO] [timer.py:199:stop] epoch=0/micro_step=50/global_step=50, RunningAvgSamplesPerSec=105.16244786155492, CurrSamplesPerSec=102.06640283983927, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 08:49:13,697] [INFO] [logging.py:96:log_dist] [Rank 0] step=50, skipped=4, lr=[2.3000000000000004e-06, 2.3000000000000004e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 49|ppo_ep: 1|act_loss: -0.0042724609375|cri_loss: 0.0188140869140625|unsuper_loss: 0.0 +average reward score: 2.279296875 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.25%) |Training time=0.48s (20.51%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.79 |AvgSamplesPerSec=14.28 +epoch: 0|step: 50|ppo_ep: 1|act_loss: 0.0303955078125|cri_loss: 0.0217437744140625|unsuper_loss: 0.0 +average reward score: 1.8505859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.13%) |Training time=0.48s (22.24%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.29 +epoch: 0|step: 51|ppo_ep: 1|act_loss: 0.0499267578125|cri_loss: 0.031097412109375|unsuper_loss: 0.0 +average reward score: 1.7294921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.47s (21.99%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.30 +epoch: 0|step: 52|ppo_ep: 1|act_loss: 0.080322265625|cri_loss: 0.04913330078125|unsuper_loss: 0.0 +average reward score: 2.24609375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.98%) |Training time=0.49s (22.40%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.31 +epoch: 0|step: 53|ppo_ep: 1|act_loss: -0.0433349609375|cri_loss: -0.0122833251953125|unsuper_loss: 0.0 +average reward score: 2.27734375 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.68s (74.28%) |Training time=0.48s (21.29%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.18 |AvgSamplesPerSec=14.31 +epoch: 0|step: 54|ppo_ep: 1|act_loss: -0.0277557373046875|cri_loss: -0.0034637451171875|unsuper_loss: 0.0 +average reward score: 2.482421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.48%) |Training time=0.47s (21.87%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.32 +epoch: 0|step: 55|ppo_ep: 1|act_loss: -0.006175994873046875|cri_loss: 0.000583648681640625|unsuper_loss: 0.0 +average reward score: 2.197265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.10%) |Training time=0.48s (22.33%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.33 +epoch: 0|step: 56|ppo_ep: 1|act_loss: 0.05126953125|cri_loss: 0.0340576171875|unsuper_loss: 0.0 +average reward score: 1.505859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.53%) |Training time=0.47s (21.83%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.33 +epoch: 0|step: 57|ppo_ep: 1|act_loss: 0.0333251953125|cri_loss: 0.02850341796875|unsuper_loss: 0.0 +average reward score: 1.888671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.19%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.34 +epoch: 0|step: 58|ppo_ep: 1|act_loss: 0.0157318115234375|cri_loss: 0.017181396484375|unsuper_loss: 0.0 +average reward score: 2.6796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.10%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.35 +[2023-04-14 08:49:35,261] [INFO] [logging.py:96:log_dist] [Rank 0] step=60, skipped=4, lr=[5.404000000000001e-06, 5.404000000000001e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 08:49:35,280] [INFO] [timer.py:199:stop] epoch=0/micro_step=60/global_step=60, RunningAvgSamplesPerSec=104.51481421567632, CurrSamplesPerSec=102.19540958042981, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 08:49:35,373] [INFO] [logging.py:96:log_dist] [Rank 0] step=60, skipped=4, lr=[2.8000000000000003e-06, 2.8000000000000003e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 59|ppo_ep: 1|act_loss: 0.1995849609375|cri_loss: 0.1263427734375|unsuper_loss: 0.0 +average reward score: 2.279296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.29%) |Training time=0.48s (22.07%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.36 +epoch: 0|step: 60|ppo_ep: 1|act_loss: 0.0227508544921875|cri_loss: 0.0230255126953125|unsuper_loss: 0.0 +average reward score: 2.01953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.24%) |Training time=0.48s (22.09%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.37 +epoch: 0|step: 61|ppo_ep: 1|act_loss: -0.019378662109375|cri_loss: 0.0001220703125|unsuper_loss: 0.0 +average reward score: 2.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.40%) |Training time=0.47s (21.88%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.37 +epoch: 0|step: 62|ppo_ep: 1|act_loss: 0.0308380126953125|cri_loss: 0.02142333984375|unsuper_loss: 0.0 +average reward score: 2.513671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.13%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.38 +epoch: 0|step: 63|ppo_ep: 1|act_loss: 0.01335906982421875|cri_loss: 0.0110626220703125|unsuper_loss: 0.0 +average reward score: 2.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.46s (21.16%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.39 +epoch: 0|step: 64|ppo_ep: 1|act_loss: 0.07464599609375|cri_loss: 0.04296875|unsuper_loss: 0.0 +average reward score: 2.30859375 +------------------------------------------------------------------------------------- +|E2E latency=2.76s |Gather latency=0.00s (0.00%) |Generate time=1.75s (63.68%) |Training time=0.46s (16.86%) |Others=0.54 (19.46%)|CurSamplesPerSec=11.61 |AvgSamplesPerSec=14.34 +epoch: 0|step: 65|ppo_ep: 1|act_loss: -0.02947998046875|cri_loss: 0.002777099609375|unsuper_loss: 0.0 +average reward score: 2.33984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.47s (21.80%) |Others=0.10 (4.82%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.34 +epoch: 0|step: 66|ppo_ep: 1|act_loss: 0.08624267578125|cri_loss: 0.05859375|unsuper_loss: 0.0 +average reward score: 2.13671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.48s (22.27%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.35 +epoch: 0|step: 67|ppo_ep: 1|act_loss: 0.03778076171875|cri_loss: 0.02117919921875|unsuper_loss: 0.0 +average reward score: 1.986328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (21.91%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.36 +epoch: 0|step: 68|ppo_ep: 1|act_loss: 0.0061492919921875|cri_loss: 0.01113128662109375|unsuper_loss: 0.0 +average reward score: 2.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.48s (22.02%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.36 +[2023-04-14 08:49:57,426] [INFO] [logging.py:96:log_dist] [Rank 0] step=70, skipped=4, lr=[6.369000000000001e-06, 6.369000000000001e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 08:49:57,445] [INFO] [timer.py:199:stop] epoch=0/micro_step=70/global_step=70, RunningAvgSamplesPerSec=104.34243082596494, CurrSamplesPerSec=101.61912549440034, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 08:49:57,538] [INFO] [logging.py:96:log_dist] [Rank 0] step=70, skipped=4, lr=[3.3000000000000006e-06, 3.3000000000000006e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 69|ppo_ep: 1|act_loss: 0.04266357421875|cri_loss: 0.057037353515625|unsuper_loss: 0.0 +average reward score: 2.65234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.18%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.37 +epoch: 0|step: 70|ppo_ep: 1|act_loss: -0.05322265625|cri_loss: 0.0018310546875|unsuper_loss: 0.0 +average reward score: 2.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.26%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.38 +epoch: 0|step: 71|ppo_ep: 1|act_loss: -0.072509765625|cri_loss: -0.027374267578125|unsuper_loss: 0.0 +average reward score: 2.64453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.47s (22.01%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.38 +epoch: 0|step: 72|ppo_ep: 1|act_loss: 0.0211181640625|cri_loss: 0.0151519775390625|unsuper_loss: 0.0 +average reward score: 1.9931640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.33%) |Training time=0.47s (22.03%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.39 +epoch: 0|step: 73|ppo_ep: 1|act_loss: -0.056396484375|cri_loss: -0.0245513916015625|unsuper_loss: 0.0 +average reward score: 2.205078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.47s (22.00%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.39 +epoch: 0|step: 74|ppo_ep: 1|act_loss: -0.10888671875|cri_loss: -0.047149658203125|unsuper_loss: 0.0 +average reward score: 2.083984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.63%) |Training time=0.47s (21.71%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.40 +epoch: 0|step: 75|ppo_ep: 1|act_loss: -0.0101776123046875|cri_loss: 2.288818359375e-05|unsuper_loss: 0.0 +average reward score: 2.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.16%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41 +epoch: 0|step: 76|ppo_ep: 1|act_loss: 0.1962890625|cri_loss: 0.11920166015625|unsuper_loss: 0.0 +average reward score: 2.0234375 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.60s (69.30%) |Training time=0.47s (20.44%) |Others=0.24 (10.26%)|CurSamplesPerSec=13.87 |AvgSamplesPerSec=14.40 +epoch: 0|step: 77|ppo_ep: 1|act_loss: 0.2744140625|cri_loss: 0.161865234375|unsuper_loss: 0.0 +average reward score: 2.255859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.50%) |Training time=0.47s (21.85%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.40 +epoch: 0|step: 78|ppo_ep: 1|act_loss: 0.05914306640625|cri_loss: 0.03167724609375|unsuper_loss: 0.0 +average reward score: 1.486328125 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.15%) |Training time=0.49s (22.20%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.41 +[2023-04-14 08:50:19,193] [INFO] [logging.py:96:log_dist] [Rank 0] step=80, skipped=4, lr=[7.3340000000000004e-06, 7.3340000000000004e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 08:50:19,212] [INFO] [timer.py:199:stop] epoch=0/micro_step=80/global_step=80, RunningAvgSamplesPerSec=104.23033362572767, CurrSamplesPerSec=106.49301658358083, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 08:50:19,304] [INFO] [logging.py:96:log_dist] [Rank 0] step=80, skipped=4, lr=[3.8000000000000005e-06, 3.8000000000000005e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 79|ppo_ep: 1|act_loss: 0.04840087890625|cri_loss: 0.0286712646484375|unsuper_loss: 0.0 +average reward score: 2.51953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.51%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.41 +epoch: 0|step: 80|ppo_ep: 1|act_loss: -0.1134033203125|cri_loss: -0.039398193359375|unsuper_loss: 0.0 +average reward score: 2.19140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.75%) |Training time=0.46s (21.58%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.42 +epoch: 0|step: 81|ppo_ep: 1|act_loss: -0.018402099609375|cri_loss: -0.006984710693359375|unsuper_loss: 0.0 +average reward score: 1.7470703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.67%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42 +epoch: 0|step: 82|ppo_ep: 1|act_loss: 0.0022106170654296875|cri_loss: 0.00585174560546875|unsuper_loss: 0.0 +average reward score: 2.59375 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.33%) |Training time=0.48s (21.58%) |Others=0.14 (6.08%)|CurSamplesPerSec=14.34 |AvgSamplesPerSec=14.42 +epoch: 0|step: 83|ppo_ep: 1|act_loss: -0.027191162109375|cri_loss: -0.01220703125|unsuper_loss: 0.0 +average reward score: 2.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.72%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.43 +epoch: 0|step: 84|ppo_ep: 1|act_loss: 0.021575927734375|cri_loss: 0.0121307373046875|unsuper_loss: 0.0 +average reward score: 2.166015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.68%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43 +epoch: 0|step: 85|ppo_ep: 1|act_loss: -0.00457763671875|cri_loss: 0.0264892578125|unsuper_loss: 0.0 +average reward score: 2.62890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.74%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.43 +epoch: 0|step: 86|ppo_ep: 1|act_loss: 0.05242919921875|cri_loss: 0.030517578125|unsuper_loss: 0.0 +average reward score: 2.1796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.71%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44 +epoch: 0|step: 87|ppo_ep: 1|act_loss: -0.063720703125|cri_loss: -0.02862548828125|unsuper_loss: 0.0 +average reward score: 2.689453125 +------------------------------------------------------------------------------------- +|E2E latency=3.10s |Gather latency=0.00s (0.00%) |Generate time=1.59s (51.29%) |Training time=0.47s (15.14%) |Others=1.04 (33.57%)|CurSamplesPerSec=10.32 |AvgSamplesPerSec=14.37 +epoch: 0|step: 88|ppo_ep: 1|act_loss: 0.04443359375|cri_loss: 0.0270843505859375|unsuper_loss: 0.0 +average reward score: 2.146484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.47s (21.85%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.38 +[2023-04-14 08:50:41,764] [INFO] [logging.py:96:log_dist] [Rank 0] step=90, skipped=4, lr=[8.299000000000001e-06, 8.299000000000001e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 08:50:41,783] [INFO] [timer.py:199:stop] epoch=0/micro_step=90/global_step=90, RunningAvgSamplesPerSec=104.24126059063528, CurrSamplesPerSec=104.1532635552079, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 08:50:41,875] [INFO] [logging.py:96:log_dist] [Rank 0] step=90, skipped=4, lr=[4.3e-06, 4.3e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 89|ppo_ep: 1|act_loss: 0.021820068359375|cri_loss: 0.01277923583984375|unsuper_loss: 0.0 +average reward score: 2.080078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.67%) |Training time=0.47s (21.79%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.38 +epoch: 0|step: 90|ppo_ep: 1|act_loss: 0.04132080078125|cri_loss: 0.025177001953125|unsuper_loss: 0.0 +average reward score: 2.509765625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.80%) |Training time=0.49s (22.61%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.39 +epoch: 0|step: 91|ppo_ep: 1|act_loss: -0.03411865234375|cri_loss: -0.014068603515625|unsuper_loss: 0.0 +average reward score: 2.0859375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.56%) |Training time=0.50s (22.97%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.39 +epoch: 0|step: 92|ppo_ep: 1|act_loss: 0.0838623046875|cri_loss: 0.04376220703125|unsuper_loss: 0.0 +average reward score: 1.935546875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.47%) |Training time=0.50s (22.94%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.39 +epoch: 0|step: 93|ppo_ep: 1|act_loss: 0.052032470703125|cri_loss: 0.028717041015625|unsuper_loss: 0.0 +average reward score: 1.8779296875 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.63s (69.96%) |Training time=0.59s (25.52%) |Others=0.11 (4.52%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.38 +epoch: 0|step: 94|ppo_ep: 1|act_loss: 0.02972412109375|cri_loss: 0.0212554931640625|unsuper_loss: 0.0 +average reward score: 2.41796875 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.25%) |Training time=0.51s (23.21%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.39 +epoch: 0|step: 95|ppo_ep: 1|act_loss: -0.09625244140625|cri_loss: -0.038818359375|unsuper_loss: 0.0 +average reward score: 2.189453125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.46%) |Training time=0.50s (23.04%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.39 +epoch: 0|step: 96|ppo_ep: 1|act_loss: -0.003040313720703125|cri_loss: 0.001705169677734375|unsuper_loss: 0.0 +average reward score: 2.337890625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.65%) |Training time=0.50s (22.75%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.39 +epoch: 0|step: 97|ppo_ep: 1|act_loss: 0.00817108154296875|cri_loss: 0.006500244140625|unsuper_loss: 0.0 +average reward score: 1.982421875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.65%) |Training time=0.50s (22.78%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.39 +epoch: 0|step: 98|ppo_ep: 1|act_loss: 0.0372314453125|cri_loss: 0.0206298828125|unsuper_loss: 0.0 +average reward score: 2.24609375 +------------------------------------------------------------------------------------- +|E2E latency=2.80s |Gather latency=0.00s (0.00%) |Generate time=1.58s (56.59%) |Training time=0.50s (17.98%) |Others=0.71 (25.43%)|CurSamplesPerSec=11.43 |AvgSamplesPerSec=14.36 +[2023-04-14 08:51:04,421] [INFO] [logging.py:96:log_dist] [Rank 0] step=100, skipped=4, lr=[9.264e-06, 9.264e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 08:51:04,439] [INFO] [timer.py:199:stop] epoch=0/micro_step=100/global_step=100, RunningAvgSamplesPerSec=102.92779009185206, CurrSamplesPerSec=92.65775833740642, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 08:51:04,532] [INFO] [logging.py:96:log_dist] [Rank 0] step=100, skipped=4, lr=[4.800000000000001e-06, 4.800000000000001e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 99|ppo_ep: 1|act_loss: 0.050079345703125|cri_loss: 0.0273284912109375|unsuper_loss: 0.0 +average reward score: 2.841796875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.36%) |Training time=0.51s (23.17%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.36 +epoch: 0|step: 100|ppo_ep: 1|act_loss: -0.035552978515625|cri_loss: -0.0124969482421875|unsuper_loss: 0.0 +average reward score: 2.423828125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.53%) |Training time=0.50s (22.88%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.36 +epoch: 0|step: 101|ppo_ep: 1|act_loss: 0.02716064453125|cri_loss: 0.019012451171875|unsuper_loss: 0.0 +average reward score: 2.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.32%) |Training time=0.51s (23.11%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.36 +epoch: 0|step: 102|ppo_ep: 1|act_loss: 0.03790283203125|cri_loss: 0.020172119140625|unsuper_loss: 0.0 +average reward score: 1.931640625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.43%) |Training time=0.50s (23.01%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.37 +epoch: 0|step: 103|ppo_ep: 1|act_loss: -0.005924224853515625|cri_loss: -0.000888824462890625|unsuper_loss: 0.0 +average reward score: 2.119140625 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.53%) |Training time=0.51s (23.04%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.37 +epoch: 0|step: 104|ppo_ep: 1|act_loss: -0.02667236328125|cri_loss: 0.0159912109375|unsuper_loss: 0.0 +average reward score: 2.685546875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.51%) |Training time=0.50s (22.91%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.37 +epoch: 0|step: 105|ppo_ep: 1|act_loss: 0.0069122314453125|cri_loss: 0.00909423828125|unsuper_loss: 0.0 +average reward score: 3.01171875 +------------------------------------------------------------------------------------- +|E2E latency=3.44s |Gather latency=0.00s (0.00%) |Generate time=1.58s (46.01%) |Training time=0.50s (14.67%) |Others=1.35 (39.32%)|CurSamplesPerSec=9.30 |AvgSamplesPerSec=14.30 +epoch: 0|step: 106|ppo_ep: 1|act_loss: -0.02008056640625|cri_loss: -0.00409698486328125|unsuper_loss: 0.0 +average reward score: 4.37109375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.17%) |Training time=0.51s (23.28%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.30 +epoch: 0|step: 107|ppo_ep: 1|act_loss: 0.0693359375|cri_loss: 0.03973388671875|unsuper_loss: 0.0 +average reward score: 2.705078125 +------------------------------------------------------------------------------------- +|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.77s (74.59%) |Training time=0.50s (21.21%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.49 |AvgSamplesPerSec=14.29 +epoch: 0|step: 108|ppo_ep: 1|act_loss: 0.0755615234375|cri_loss: 0.046112060546875|unsuper_loss: 0.0 +average reward score: 3.279296875 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.24%) |Training time=0.51s (23.12%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.29 +[2023-04-14 08:51:27,825] [INFO] [logging.py:96:log_dist] [Rank 0] step=110, skipped=4, lr=[9.649987126724682e-06, 9.649987126724682e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 08:51:27,843] [INFO] [timer.py:199:stop] epoch=0/micro_step=110/global_step=110, RunningAvgSamplesPerSec=101.84287411563196, CurrSamplesPerSec=84.43894398574668, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 08:51:27,936] [INFO] [logging.py:96:log_dist] [Rank 0] step=110, skipped=4, lr=[4.999993329909162e-06, 4.999993329909162e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 109|ppo_ep: 1|act_loss: 0.0775146484375|cri_loss: 0.044769287109375|unsuper_loss: 0.0 +average reward score: 3.353515625 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.41%) |Training time=0.54s (24.22%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.31 |AvgSamplesPerSec=14.29 +epoch: 0|step: 110|ppo_ep: 1|act_loss: 0.1475830078125|cri_loss: 0.1015625|unsuper_loss: 0.0 +average reward score: 3.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.62s (71.42%) |Training time=0.53s (23.33%) |Others=0.12 (5.25%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.29 +epoch: 0|step: 111|ppo_ep: 1|act_loss: -0.0105133056640625|cri_loss: -0.00122833251953125|unsuper_loss: 0.0 +average reward score: 3.443359375 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.65s (72.47%) |Training time=0.53s (23.06%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.02 |AvgSamplesPerSec=14.29 +epoch: 0|step: 112|ppo_ep: 1|act_loss: -0.0516357421875|cri_loss: -0.0228424072265625|unsuper_loss: 0.0 +average reward score: 3.765625 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.88%) |Training time=0.52s (23.59%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.29 +epoch: 0|step: 113|ppo_ep: 1|act_loss: -0.0172882080078125|cri_loss: 0.0036468505859375|unsuper_loss: 0.0 +average reward score: 3.423828125 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.68%) |Training time=0.53s (23.89%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.46 |AvgSamplesPerSec=14.29 +epoch: 0|step: 114|ppo_ep: 1|act_loss: 0.12249755859375|cri_loss: 0.0792236328125|unsuper_loss: 0.0 +average reward score: 2.9453125 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.01%) |Training time=0.52s (23.45%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.29 +epoch: 0|step: 115|ppo_ep: 1|act_loss: 0.03033447265625|cri_loss: 0.023590087890625|unsuper_loss: 0.0 +average reward score: 3.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.05%) |Training time=0.52s (23.51%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.46 |AvgSamplesPerSec=14.30 +epoch: 0|step: 116|ppo_ep: 1|act_loss: 0.004199981689453125|cri_loss: 0.00466156005859375|unsuper_loss: 0.0 +average reward score: 3.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.08s |Gather latency=0.00s (0.00%) |Generate time=1.59s (76.49%) |Training time=0.39s (18.63%) |Others=0.10 (4.88%)|CurSamplesPerSec=15.41 |AvgSamplesPerSec=14.30 +epoch: 0|step: 117|ppo_ep: 1|act_loss: 0.0142974853515625|cri_loss: 0.00982666015625|unsuper_loss: 0.0 +average reward score: 3.892578125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.63%) |Training time=0.50s (22.89%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.31 +epoch: 0|step: 118|ppo_ep: 1|act_loss: 0.09619140625|cri_loss: 0.05419921875|unsuper_loss: 0.0 +average reward score: 3.646484375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.78%) |Training time=0.50s (22.64%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.31 +[2023-04-14 08:51:50,104] [INFO] [logging.py:96:log_dist] [Rank 0] step=120, skipped=4, lr=[9.649908456957608e-06, 9.649908456957608e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 08:51:50,123] [INFO] [timer.py:199:stop] epoch=0/micro_step=120/global_step=120, RunningAvgSamplesPerSec=101.11741645963838, CurrSamplesPerSec=93.20669082868693, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 08:51:50,215] [INFO] [logging.py:96:log_dist] [Rank 0] step=120, skipped=4, lr=[4.999952568371817e-06, 4.999952568371817e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 119|ppo_ep: 1|act_loss: 0.18896484375|cri_loss: 0.116455078125|unsuper_loss: 0.0 +average reward score: 3.484375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.48%) |Training time=0.51s (23.06%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.31 +epoch: 0|step: 120|ppo_ep: 1|act_loss: -0.066162109375|cri_loss: -0.029693603515625|unsuper_loss: 0.0 +average reward score: 3.103515625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.44%) |Training time=0.50s (23.01%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.31 +epoch: 0|step: 121|ppo_ep: 1|act_loss: -0.02349853515625|cri_loss: -0.00815582275390625|unsuper_loss: 0.0 +average reward score: 3.55859375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.30%) |Training time=0.51s (23.15%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.32 +epoch: 0|step: 122|ppo_ep: 1|act_loss: 0.03350830078125|cri_loss: 0.0233154296875|unsuper_loss: 0.0 +average reward score: 3.19921875 +------------------------------------------------------------------------------------- +|E2E latency=3.18s |Gather latency=0.00s (0.00%) |Generate time=1.75s (55.13%) |Training time=0.52s (16.37%) |Others=0.91 (28.50%)|CurSamplesPerSec=10.07 |AvgSamplesPerSec=14.27 +epoch: 0|step: 123|ppo_ep: 1|act_loss: -0.057373046875|cri_loss: -0.022796630859375|unsuper_loss: 0.0 +average reward score: 3.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.49%) |Training time=0.50s (22.68%) |Others=0.11 (4.82%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.27 +epoch: 0|step: 124|ppo_ep: 1|act_loss: 0.162109375|cri_loss: 0.1400146484375|unsuper_loss: 0.0 +average reward score: 4.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.00%) |Training time=0.49s (22.40%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.27 +epoch: 0|step: 125|ppo_ep: 1|act_loss: 0.07275390625|cri_loss: 0.042236328125|unsuper_loss: 0.0 +average reward score: 4.015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.89%) |Training time=0.49s (22.46%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.28 +epoch: 0|step: 126|ppo_ep: 1|act_loss: -0.01470184326171875|cri_loss: -0.003173828125|unsuper_loss: 0.0 +average reward score: 3.93359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.49s (22.36%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.28 +epoch: 0|step: 127|ppo_ep: 1|act_loss: 0.09210205078125|cri_loss: 0.05419921875|unsuper_loss: 0.0 +average reward score: 3.919921875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.68%) |Training time=0.50s (22.76%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.28 +epoch: 0|step: 128|ppo_ep: 1|act_loss: 0.0026092529296875|cri_loss: 0.01064300537109375|unsuper_loss: 0.0 +average reward score: 3.953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.71%) |Training time=0.49s (22.66%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.29 +[2023-04-14 08:52:12,913] [INFO] [logging.py:96:log_dist] [Rank 0] step=130, skipped=4, lr=[9.649758270407744e-06, 9.649758270407744e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 08:52:12,931] [INFO] [timer.py:199:stop] epoch=0/micro_step=130/global_step=130, RunningAvgSamplesPerSec=100.66527352211149, CurrSamplesPerSec=96.38733686515383, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 08:52:13,024] [INFO] [logging.py:96:log_dist] [Rank 0] step=130, skipped=4, lr=[4.999874751506603e-06, 4.999874751506603e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 129|ppo_ep: 1|act_loss: 0.0672607421875|cri_loss: 0.040771484375|unsuper_loss: 0.0 +average reward score: 4.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.90%) |Training time=0.49s (22.53%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.29 +epoch: 0|step: 130|ppo_ep: 1|act_loss: -0.0712890625|cri_loss: -0.033447265625|unsuper_loss: 0.0 +average reward score: 4.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.99%) |Training time=0.49s (22.39%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.29 +epoch: 0|step: 131|ppo_ep: 1|act_loss: -0.0435791015625|cri_loss: -0.015869140625|unsuper_loss: 0.0 +average reward score: 4.5234375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.73%) |Training time=0.50s (22.74%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.30 +epoch: 0|step: 132|ppo_ep: 1|act_loss: 0.0189971923828125|cri_loss: 0.0142822265625|unsuper_loss: 0.0 +average reward score: 3.80859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.03%) |Training time=0.49s (22.35%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.30 +epoch: 0|step: 133|ppo_ep: 1|act_loss: 0.0258636474609375|cri_loss: 0.017120361328125|unsuper_loss: 0.0 +average reward score: 4.05078125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.54%) |Training time=0.50s (22.89%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.30 +epoch: 0|step: 134|ppo_ep: 1|act_loss: -0.128662109375|cri_loss: -0.00958251953125|unsuper_loss: 0.0 +average reward score: 4.80078125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.77%) |Training time=0.49s (22.54%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.30 +epoch: 0|step: 135|ppo_ep: 1|act_loss: -0.119384765625|cri_loss: -0.05499267578125|unsuper_loss: 0.0 +average reward score: 3.833984375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.76%) |Training time=0.49s (22.65%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.31 +epoch: 0|step: 136|ppo_ep: 1|act_loss: -0.12493896484375|cri_loss: -0.05194091796875|unsuper_loss: 0.0 +average reward score: 3.76171875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.74%) |Training time=0.49s (22.60%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.31 +epoch: 0|step: 137|ppo_ep: 1|act_loss: 0.04376220703125|cri_loss: 0.0254669189453125|unsuper_loss: 0.0 +average reward score: 4.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.81%) |Training time=0.49s (22.38%) |Others=0.10 (4.81%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.31 +epoch: 0|step: 138|ppo_ep: 1|act_loss: -0.00145721435546875|cri_loss: 0.002796173095703125|unsuper_loss: 0.0 +average reward score: 4.17578125 +------------------------------------------------------------------------------------- +|E2E latency=2.38s |Gather latency=0.00s (0.00%) |Generate time=1.65s (69.33%) |Training time=0.61s (25.81%) |Others=0.12 (4.85%)|CurSamplesPerSec=13.46 |AvgSamplesPerSec=14.31 +[2023-04-14 08:52:35,041] [INFO] [logging.py:96:log_dist] [Rank 0] step=140, skipped=4, lr=[9.649536569301218e-06, 9.649536569301218e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 08:52:35,060] [INFO] [timer.py:199:stop] epoch=0/micro_step=140/global_step=140, RunningAvgSamplesPerSec=99.93906583355329, CurrSamplesPerSec=69.75915377003102, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 08:52:35,152] [INFO] [logging.py:96:log_dist] [Rank 0] step=140, skipped=4, lr=[4.9997598804669524e-06, 4.9997598804669524e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 139|ppo_ep: 1|act_loss: -0.0291748046875|cri_loss: -0.00913238525390625|unsuper_loss: 0.0 +average reward score: 4.02734375 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.60s (68.45%) |Training time=0.64s (27.28%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.30 +epoch: 0|step: 140|ppo_ep: 1|act_loss: 0.0887451171875|cri_loss: 0.0537109375|unsuper_loss: 0.0 +average reward score: 4.5234375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.62%) |Training time=0.50s (22.80%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.30 +epoch: 0|step: 141|ppo_ep: 1|act_loss: -0.0107574462890625|cri_loss: -0.003376007080078125|unsuper_loss: 0.0 +average reward score: 4.21484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.87%) |Training time=0.49s (22.62%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.31 +epoch: 0|step: 142|ppo_ep: 1|act_loss: -0.044647216796875|cri_loss: -0.01788330078125|unsuper_loss: 0.0 +average reward score: 5.0234375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.71%) |Training time=0.49s (22.69%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.31 +epoch: 0|step: 143|ppo_ep: 1|act_loss: 0.0205841064453125|cri_loss: 0.013397216796875|unsuper_loss: 0.0 +average reward score: 3.9609375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.21%) |Training time=0.51s (23.10%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.31 +epoch: 0|step: 144|ppo_ep: 1|act_loss: -0.0972900390625|cri_loss: -0.045196533203125|unsuper_loss: 0.0 +average reward score: 4.68359375 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.70%) |Training time=0.52s (23.78%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.31 +epoch: 0|step: 145|ppo_ep: 1|act_loss: -0.054412841796875|cri_loss: -0.023834228515625|unsuper_loss: 0.0 +average reward score: 5.06640625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.83%) |Training time=0.52s (23.63%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.54 |AvgSamplesPerSec=14.31 +epoch: 0|step: 146|ppo_ep: 1|act_loss: 0.040130615234375|cri_loss: 0.0330810546875|unsuper_loss: 0.0 +average reward score: 4.31640625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.13%) |Training time=0.51s (23.32%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.32 +epoch: 0|step: 147|ppo_ep: 1|act_loss: -0.01995849609375|cri_loss: -0.001068115234375|unsuper_loss: 0.0 +average reward score: 4.13671875 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.60%) |Training time=0.53s (23.86%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.32 +epoch: 0|step: 148|ppo_ep: 1|act_loss: 0.0291748046875|cri_loss: 0.0229949951171875|unsuper_loss: 0.0 +average reward score: 5.375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.05%) |Training time=0.51s (23.41%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.32 +[2023-04-14 08:52:56,981] [INFO] [logging.py:96:log_dist] [Rank 0] step=150, skipped=4, lr=[9.649243356924173e-06, 9.649243356924173e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 08:52:57,000] [INFO] [timer.py:199:stop] epoch=0/micro_step=150/global_step=150, RunningAvgSamplesPerSec=99.35793580311184, CurrSamplesPerSec=91.16621508445346, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 08:52:57,092] [INFO] [logging.py:96:log_dist] [Rank 0] step=150, skipped=4, lr=[4.99960795695553e-06, 4.99960795695553e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 149|ppo_ep: 1|act_loss: -0.01128387451171875|cri_loss: 0.0003204345703125|unsuper_loss: 0.0 +average reward score: 5.21484375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.14%) |Training time=0.51s (23.33%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.32 +epoch: 0|step: 150|ppo_ep: 1|act_loss: 0.0914306640625|cri_loss: 0.050262451171875|unsuper_loss: 0.0 +average reward score: 5.7265625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.35%) |Training time=0.51s (23.07%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.32 +epoch: 0|step: 151|ppo_ep: 1|act_loss: 0.03863525390625|cri_loss: 0.022003173828125|unsuper_loss: 0.0 +average reward score: 4.93359375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.25%) |Training time=0.51s (23.04%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.32 +epoch: 0|step: 152|ppo_ep: 1|act_loss: -0.0318603515625|cri_loss: -0.01226806640625|unsuper_loss: 0.0 +average reward score: 5.48828125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.73%) |Training time=0.50s (22.70%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.33 +epoch: 0|step: 153|ppo_ep: 1|act_loss: 0.026123046875|cri_loss: 0.0231781005859375|unsuper_loss: 0.0 +average reward score: 6.06640625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.58%) |Training time=0.50s (22.94%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.33 +epoch: 0|step: 154|ppo_ep: 1|act_loss: -0.1361083984375|cri_loss: -0.059722900390625|unsuper_loss: 0.0 +average reward score: 6.58984375 +------------------------------------------------------------------------------------- +|E2E latency=2.38s |Gather latency=0.00s (0.00%) |Generate time=1.77s (74.35%) |Training time=0.51s (21.45%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.46 |AvgSamplesPerSec=14.32 +epoch: 0|step: 155|ppo_ep: 1|act_loss: 0.053131103515625|cri_loss: 0.0291748046875|unsuper_loss: 0.0 +average reward score: 6.1796875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.24%) |Training time=0.51s (23.20%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.32 +epoch: 0|step: 156|ppo_ep: 1|act_loss: 0.0271148681640625|cri_loss: 0.018280029296875|unsuper_loss: 0.0 +average reward score: 5.79296875 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.74%) |Training time=0.53s (23.75%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.32 +epoch: 0|step: 157|ppo_ep: 1|act_loss: 0.05975341796875|cri_loss: 0.036865234375|unsuper_loss: 0.0 +average reward score: 6.02734375 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.59s (68.33%) |Training time=0.50s (21.58%) |Others=0.23 (10.09%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.32 +epoch: 0|step: 158|ppo_ep: 1|act_loss: 0.025848388671875|cri_loss: 0.0191802978515625|unsuper_loss: 0.0 +average reward score: 5.90625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.08%) |Training time=0.51s (23.36%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.32 +[2023-04-14 08:53:19,249] [INFO] [logging.py:96:log_dist] [Rank 0] step=160, skipped=4, lr=[9.648878637622726e-06, 9.648878637622726e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 08:53:19,268] [INFO] [timer.py:199:stop] epoch=0/micro_step=160/global_step=160, RunningAvgSamplesPerSec=98.90795954004966, CurrSamplesPerSec=93.22086815920744, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 08:53:19,360] [INFO] [logging.py:96:log_dist] [Rank 0] step=160, skipped=4, lr=[4.99941898322421e-06, 4.99941898322421e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 159|ppo_ep: 1|act_loss: 0.0384521484375|cri_loss: 0.0228424072265625|unsuper_loss: 0.0 +average reward score: 7.49609375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.33%) |Training time=0.51s (23.12%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.32 +epoch: 0|step: 160|ppo_ep: 1|act_loss: 0.0286102294921875|cri_loss: 0.020538330078125|unsuper_loss: 0.0 +average reward score: 5.2734375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.16%) |Training time=0.51s (23.28%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.32 +epoch: 0|step: 161|ppo_ep: 1|act_loss: -0.0103302001953125|cri_loss: -0.00080108642578125|unsuper_loss: 0.0 +average reward score: 5.40234375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.35%) |Training time=0.51s (23.17%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.33 +epoch: 0|step: 162|ppo_ep: 1|act_loss: 0.14404296875|cri_loss: 0.08203125|unsuper_loss: 0.0 +average reward score: 6.02734375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.39%) |Training time=0.50s (23.04%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.33 +epoch: 0|step: 163|ppo_ep: 1|act_loss: 0.0390625|cri_loss: 0.0228118896484375|unsuper_loss: 0.0 +average reward score: 5.38671875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.72%) |Training time=0.50s (22.77%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.33 +epoch: 0|step: 164|ppo_ep: 1|act_loss: -0.00506591796875|cri_loss: 0.0015716552734375|unsuper_loss: 0.0 +average reward score: 5.30859375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.39%) |Training time=0.50s (23.05%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.33 +epoch: 0|step: 165|ppo_ep: 1|act_loss: -0.0205078125|cri_loss: -0.007106781005859375|unsuper_loss: 0.0 +average reward score: 5.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.27%) |Training time=0.51s (23.17%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.33 +epoch: 0|step: 166|ppo_ep: 1|act_loss: 0.03875732421875|cri_loss: 0.021942138671875|unsuper_loss: 0.0 +average reward score: 5.3671875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.58%) |Training time=0.50s (22.84%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.34 +epoch: 0|step: 167|ppo_ep: 1|act_loss: -0.048583984375|cri_loss: -0.0202789306640625|unsuper_loss: 0.0 +average reward score: 4.83984375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.05%) |Training time=0.49s (22.47%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.34 +epoch: 0|step: 168|ppo_ep: 1|act_loss: 0.0002899169921875|cri_loss: 0.0030384063720703125|unsuper_loss: 0.0 +average reward score: 5.38671875 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.58%) |Training time=0.50s (22.31%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.39 |AvgSamplesPerSec=14.34 +[2023-04-14 08:53:41,327] [INFO] [logging.py:96:log_dist] [Rank 0] step=170, skipped=4, lr=[9.648442416802894e-06, 9.648442416802894e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 08:53:41,345] [INFO] [timer.py:199:stop] epoch=0/micro_step=170/global_step=170, RunningAvgSamplesPerSec=98.61371514796177, CurrSamplesPerSec=93.73256923212298, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 08:53:41,442] [INFO] [logging.py:96:log_dist] [Rank 0] step=170, skipped=4, lr=[4.999192962074038e-06, 4.999192962074038e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 169|ppo_ep: 1|act_loss: 0.0283203125|cri_loss: 0.018218994140625|unsuper_loss: 0.0 +average reward score: 5.25 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.29%) |Training time=0.50s (21.39%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.58 |AvgSamplesPerSec=14.33 +epoch: 0|step: 170|ppo_ep: 1|act_loss: -0.010101318359375|cri_loss: -0.0031566619873046875|unsuper_loss: 0.0 +average reward score: 5.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.03%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.34 +epoch: 0|step: 171|ppo_ep: 1|act_loss: -0.04736328125|cri_loss: -0.0209808349609375|unsuper_loss: 0.0 +average reward score: 5.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.03%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.34 +epoch: 0|step: 172|ppo_ep: 1|act_loss: -0.06488037109375|cri_loss: -0.029296875|unsuper_loss: 0.0 +average reward score: 5.10546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.40%) |Training time=0.47s (21.95%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.34 +epoch: 0|step: 173|ppo_ep: 1|act_loss: -0.0254974365234375|cri_loss: -0.0096588134765625|unsuper_loss: 0.0 +average reward score: 5.57421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.91%) |Training time=0.49s (22.49%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34 +epoch: 0|step: 174|ppo_ep: 1|act_loss: -0.0384521484375|cri_loss: -0.0167694091796875|unsuper_loss: 0.0 +average reward score: 5.47265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.74%) |Training time=0.47s (21.59%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.35 +epoch: 0|step: 175|ppo_ep: 1|act_loss: -0.004512786865234375|cri_loss: -0.000179290771484375|unsuper_loss: 0.0 +average reward score: 5.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.89%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.35 +epoch: 0|step: 176|ppo_ep: 1|act_loss: 0.031982421875|cri_loss: 0.019287109375|unsuper_loss: 0.0 +average reward score: 5.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.90%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.35 +epoch: 0|step: 177|ppo_ep: 1|act_loss: 0.0201873779296875|cri_loss: 0.01238250732421875|unsuper_loss: 0.0 +average reward score: 5.04296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.62%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.36 +epoch: 0|step: 178|ppo_ep: 1|act_loss: 0.0927734375|cri_loss: 0.04998779296875|unsuper_loss: 0.0 +average reward score: 5.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.40s |Gather latency=0.00s (0.00%) |Generate time=1.58s (66.01%) |Training time=0.49s (20.57%) |Others=0.32 (13.42%)|CurSamplesPerSec=13.36 |AvgSamplesPerSec=14.35 +[2023-04-14 08:54:03,202] [INFO] [logging.py:96:log_dist] [Rank 0] step=180, skipped=4, lr=[9.647934700930525e-06, 9.647934700930525e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 08:54:03,220] [INFO] [timer.py:199:stop] epoch=0/micro_step=180/global_step=180, RunningAvgSamplesPerSec=98.71604805374989, CurrSamplesPerSec=87.31243836243797, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 08:54:03,313] [INFO] [logging.py:96:log_dist] [Rank 0] step=180, skipped=4, lr=[4.998929896855195e-06, 4.998929896855195e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 179|ppo_ep: 1|act_loss: 0.01641845703125|cri_loss: 0.0104217529296875|unsuper_loss: 0.0 +average reward score: 5.8515625 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.49%) |Training time=0.53s (23.88%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.42 |AvgSamplesPerSec=14.35 +epoch: 0|step: 180|ppo_ep: 1|act_loss: -0.001728057861328125|cri_loss: 0.001316070556640625|unsuper_loss: 0.0 +average reward score: 5.23046875 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.03%) |Training time=0.50s (22.31%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.40 |AvgSamplesPerSec=14.35 +epoch: 0|step: 181|ppo_ep: 1|act_loss: -0.0086212158203125|cri_loss: -0.0017242431640625|unsuper_loss: 0.0 +average reward score: 5.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.65s (73.73%) |Training time=0.48s (21.66%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.32 |AvgSamplesPerSec=14.35 +epoch: 0|step: 182|ppo_ep: 1|act_loss: 0.02783203125|cri_loss: 0.0161590576171875|unsuper_loss: 0.0 +average reward score: 5.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.40%) |Training time=0.44s (19.93%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.35 +epoch: 0|step: 183|ppo_ep: 1|act_loss: -0.01708984375|cri_loss: -0.005275726318359375|unsuper_loss: 0.0 +average reward score: 5.3515625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.05%) |Training time=0.50s (22.47%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.51 |AvgSamplesPerSec=14.35 +epoch: 0|step: 184|ppo_ep: 1|act_loss: -0.0195159912109375|cri_loss: -0.008209228515625|unsuper_loss: 0.0 +average reward score: 4.60546875 +------------------------------------------------------------------------------------- +|E2E latency=2.38s |Gather latency=0.00s (0.00%) |Generate time=1.82s (76.48%) |Training time=0.46s (19.21%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.47 |AvgSamplesPerSec=14.35 +epoch: 0|step: 185|ppo_ep: 1|act_loss: -0.00017547607421875|cri_loss: 0.0018596649169921875|unsuper_loss: 0.0 +average reward score: 5.6640625 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.95%) |Training time=0.45s (20.43%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.35 +[2023-04-14 08:54:18,825] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, but hysteresis is 2. Reducing hysteresis to 1 +[2023-04-14 08:54:18,911] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 186|ppo_ep: 1|act_loss: -0.01111602783203125|cri_loss: -0.004123687744140625|unsuper_loss: 0.0 +average reward score: 4.5703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.23%) |Training time=0.42s (19.42%) |Others=0.09 (4.35%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.35 +epoch: 0|step: 187|ppo_ep: 1|act_loss: -0.0054168701171875|cri_loss: -0.000263214111328125|unsuper_loss: 0.0 +average reward score: 4.9609375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.84%) |Training time=0.47s (21.53%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.35 +epoch: 0|step: 188|ppo_ep: 1|act_loss: 0.01232147216796875|cri_loss: 0.01074981689453125|unsuper_loss: 0.0 +average reward score: 6.3125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.84%) |Training time=0.47s (21.45%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.35 +[2023-04-14 08:54:25,388] [INFO] [logging.py:96:log_dist] [Rank 0] step=190, skipped=5, lr=[9.647416634573466e-06, 9.647416634573466e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 08:54:25,406] [INFO] [timer.py:199:stop] epoch=0/micro_step=190/global_step=190, RunningAvgSamplesPerSec=99.07856090082578, CurrSamplesPerSec=102.86956528403809, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 08:54:25,499] [INFO] [logging.py:96:log_dist] [Rank 0] step=190, skipped=5, lr=[4.998661468690914e-06, 4.998661468690914e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 189|ppo_ep: 1|act_loss: -0.072509765625|cri_loss: -0.030487060546875|unsuper_loss: 0.0 +average reward score: 6.27734375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.93%) |Training time=0.47s (21.60%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.35 +epoch: 0|step: 190|ppo_ep: 1|act_loss: -0.04168701171875|cri_loss: -0.017730712890625|unsuper_loss: 0.0 +average reward score: 5.90625 +------------------------------------------------------------------------------------- +|E2E latency=2.84s |Gather latency=0.00s (0.00%) |Generate time=1.62s (57.15%) |Training time=0.47s (16.46%) |Others=0.75 (26.39%)|CurSamplesPerSec=11.28 |AvgSamplesPerSec=14.33 +epoch: 0|step: 191|ppo_ep: 1|act_loss: 0.0439453125|cri_loss: 0.028289794921875|unsuper_loss: 0.0 +average reward score: 6.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.23%) |Training time=0.46s (21.21%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.33 +epoch: 0|step: 192|ppo_ep: 1|act_loss: 0.000202178955078125|cri_loss: 0.003143310546875|unsuper_loss: 0.0 +average reward score: 5.27734375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.94%) |Training time=0.47s (21.45%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.34 +epoch: 0|step: 193|ppo_ep: 1|act_loss: 0.0016498565673828125|cri_loss: 0.00395965576171875|unsuper_loss: 0.0 +average reward score: 6.703125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.34%) |Training time=0.46s (21.02%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.34 +epoch: 0|step: 194|ppo_ep: 1|act_loss: -0.04425048828125|cri_loss: -0.016937255859375|unsuper_loss: 0.0 +average reward score: 6.85546875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.45%) |Training time=0.46s (20.94%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.34 +epoch: 0|step: 195|ppo_ep: 1|act_loss: -0.0312347412109375|cri_loss: -0.0128936767578125|unsuper_loss: 0.0 +average reward score: 6.02734375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.63%) |Training time=0.46s (20.86%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.34 +epoch: 0|step: 196|ppo_ep: 1|act_loss: 0.04901123046875|cri_loss: 0.030487060546875|unsuper_loss: 0.0 +average reward score: 6.328125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.86%) |Training time=0.43s (19.50%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.34 +epoch: 0|step: 197|ppo_ep: 1|act_loss: 0.0048675537109375|cri_loss: 0.004283905029296875|unsuper_loss: 0.0 +average reward score: 7.109375 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.70s (73.37%) |Training time=0.51s (22.19%) |Others=0.10 (4.44%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.34 +epoch: 0|step: 198|ppo_ep: 1|act_loss: 0.027496337890625|cri_loss: 0.01763916015625|unsuper_loss: 0.0 +average reward score: 6.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.62s (72.68%) |Training time=0.47s (20.95%) |Others=0.14 (6.38%)|CurSamplesPerSec=14.33 |AvgSamplesPerSec=14.34 +[2023-04-14 08:54:48,213] [INFO] [logging.py:96:log_dist] [Rank 0] step=200, skipped=5, lr=[9.646773099710006e-06, 9.646773099710006e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 08:54:48,231] [INFO] [timer.py:199:stop] epoch=0/micro_step=200/global_step=200, RunningAvgSamplesPerSec=99.47785048462806, CurrSamplesPerSec=113.295647388129, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 08:54:48,323] [INFO] [logging.py:96:log_dist] [Rank 0] step=200, skipped=5, lr=[4.998328030937827e-06, 4.998328030937827e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 199|ppo_ep: 1|act_loss: -0.10589599609375|cri_loss: -0.049896240234375|unsuper_loss: 0.0 +average reward score: 6.046875 +------------------------------------------------------------------------------------- +|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.76s (76.27%) |Training time=0.45s (19.35%) |Others=0.10 (4.38%)|CurSamplesPerSec=13.90 |AvgSamplesPerSec=14.34 +epoch: 0|step: 200|ppo_ep: 1|act_loss: 0.00897216796875|cri_loss: 0.0068206787109375|unsuper_loss: 0.0 +average reward score: 5.69140625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.10%) |Training time=0.47s (21.29%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.34 +epoch: 0|step: 201|ppo_ep: 1|act_loss: 9.5367431640625e-07|cri_loss: 0.0008797645568847656|unsuper_loss: 0.0 +average reward score: 5.99609375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.91%) |Training time=0.47s (21.50%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.34 +epoch: 0|step: 202|ppo_ep: 1|act_loss: -0.0102386474609375|cri_loss: -0.003673553466796875|unsuper_loss: 0.0 +average reward score: 5.32421875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.84%) |Training time=0.47s (21.48%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.34 +epoch: 0|step: 203|ppo_ep: 1|act_loss: -0.0355224609375|cri_loss: -0.014923095703125|unsuper_loss: 0.0 +average reward score: 4.9296875 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.86%) |Training time=0.48s (21.66%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.34 +epoch: 0|step: 204|ppo_ep: 1|act_loss: -0.04193115234375|cri_loss: -0.0184326171875|unsuper_loss: 0.0 +average reward score: 5.953125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.88%) |Training time=0.47s (21.52%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.34 +epoch: 0|step: 205|ppo_ep: 1|act_loss: 0.03240966796875|cri_loss: 0.0181121826171875|unsuper_loss: 0.0 +average reward score: 5.54296875 +------------------------------------------------------------------------------------- +|E2E latency=2.62s |Gather latency=0.00s (0.00%) |Generate time=1.63s (62.21%) |Training time=0.47s (17.92%) |Others=0.52 (19.87%)|CurSamplesPerSec=12.23 |AvgSamplesPerSec=14.33 +epoch: 0|step: 206|ppo_ep: 1|act_loss: 0.05206298828125|cri_loss: 0.0300445556640625|unsuper_loss: 0.0 +average reward score: 5.921875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.11%) |Training time=0.47s (21.30%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.33 +epoch: 0|step: 207|ppo_ep: 1|act_loss: -0.00235748291015625|cri_loss: 0.000972747802734375|unsuper_loss: 0.0 +average reward score: 5.6015625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.94%) |Training time=0.47s (21.55%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.33 +epoch: 0|step: 208|ppo_ep: 1|act_loss: 0.000396728515625|cri_loss: 0.006542205810546875|unsuper_loss: 0.0 +average reward score: 6.625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.83%) |Training time=0.47s (21.59%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.33 +[2023-04-14 08:55:10,546] [INFO] [logging.py:96:log_dist] [Rank 0] step=210, skipped=5, lr=[9.646058094537316e-06, 9.646058094537316e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 08:55:10,564] [INFO] [timer.py:199:stop] epoch=0/micro_step=210/global_step=210, RunningAvgSamplesPerSec=99.69265405081147, CurrSamplesPerSec=104.76069635587082, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 08:55:10,657] [INFO] [logging.py:96:log_dist] [Rank 0] step=210, skipped=5, lr=[4.997957561936433e-06, 4.997957561936433e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 209|ppo_ep: 1|act_loss: -0.060089111328125|cri_loss: -0.027008056640625|unsuper_loss: 0.0 +average reward score: 5.8125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.15%) |Training time=0.47s (21.36%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.34 +epoch: 0|step: 210|ppo_ep: 1|act_loss: -0.004329681396484375|cri_loss: 0.000911712646484375|unsuper_loss: 0.0 +average reward score: 6.90625 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.43%) |Training time=0.48s (21.94%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.51 |AvgSamplesPerSec=14.34 +epoch: 0|step: 211|ppo_ep: 1|act_loss: -0.03790283203125|cri_loss: -0.016845703125|unsuper_loss: 0.0 +average reward score: 7.890625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.05%) |Training time=0.45s (20.46%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.34 +epoch: 0|step: 212|ppo_ep: 1|act_loss: -0.022186279296875|cri_loss: -0.0086212158203125|unsuper_loss: 0.0 +average reward score: 6.6640625 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.46%) |Training time=0.46s (20.99%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.34 +epoch: 0|step: 213|ppo_ep: 1|act_loss: 0.007564544677734375|cri_loss: 0.007236480712890625|unsuper_loss: 0.0 +average reward score: 5.8203125 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.07%) |Training time=0.47s (19.74%) |Others=0.10 (4.18%)|CurSamplesPerSec=13.58 |AvgSamplesPerSec=14.33 +epoch: 0|step: 214|ppo_ep: 1|act_loss: 0.022003173828125|cri_loss: 0.01690673828125|unsuper_loss: 0.0 +average reward score: 6.3515625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.98%) |Training time=0.47s (21.41%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.34 +epoch: 0|step: 215|ppo_ep: 1|act_loss: -0.10302734375|cri_loss: -0.040618896484375|unsuper_loss: 0.0 +average reward score: 5.890625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.26%) |Training time=0.46s (21.16%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.34 +epoch: 0|step: 216|ppo_ep: 1|act_loss: -0.0308685302734375|cri_loss: -0.0083160400390625|unsuper_loss: 0.0 +average reward score: 5.8828125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.06%) |Training time=0.46s (21.18%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.34 +epoch: 0|step: 217|ppo_ep: 1|act_loss: 0.0677490234375|cri_loss: 0.042144775390625|unsuper_loss: 0.0 +average reward score: 5.3671875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.99%) |Training time=0.47s (21.39%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.34 +epoch: 0|step: 218|ppo_ep: 1|act_loss: 0.1029052734375|cri_loss: 0.0599365234375|unsuper_loss: 0.0 +average reward score: 6.484375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.53%) |Training time=0.48s (21.86%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.34 +[2023-04-14 08:55:32,679] [INFO] [logging.py:96:log_dist] [Rank 0] step=220, skipped=5, lr=[9.645271629653494e-06, 9.645271629653494e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 08:55:32,901] [INFO] [timer.py:199:stop] epoch=0/micro_step=220/global_step=220, RunningAvgSamplesPerSec=99.60717426750885, CurrSamplesPerSec=60.76466806591054, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 08:55:32,994] [INFO] [logging.py:96:log_dist] [Rank 0] step=220, skipped=5, lr=[4.9975500671779765e-06, 4.9975500671779765e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 219|ppo_ep: 1|act_loss: 0.13671875|cri_loss: 0.07745361328125|unsuper_loss: 0.0 +average reward score: 6.9453125 +------------------------------------------------------------------------------------- +|E2E latency=2.41s |Gather latency=0.00s (0.00%) |Generate time=1.62s (67.15%) |Training time=0.69s (28.68%) |Others=0.10 (4.17%)|CurSamplesPerSec=13.30 |AvgSamplesPerSec=14.34 +epoch: 0|step: 220|ppo_ep: 1|act_loss: 0.0733642578125|cri_loss: 0.04412841796875|unsuper_loss: 0.0 +average reward score: 7.16796875 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.43%) |Training time=0.46s (20.97%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.34 +[2023-04-14 08:55:37,263] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192 +epoch: 0|step: 221|ppo_ep: 1|act_loss: -0.236328125|cri_loss: -0.09283447265625|unsuper_loss: 0.0 +average reward score: 5.28125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.44%) |Training time=0.43s (19.93%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.34 +epoch: 0|step: 222|ppo_ep: 1|act_loss: -0.042388916015625|cri_loss: -0.016998291015625|unsuper_loss: 0.0 +average reward score: 6.92578125 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.33%) |Training time=0.46s (21.08%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.34 +epoch: 0|step: 223|ppo_ep: 1|act_loss: -0.0716552734375|cri_loss: -0.0293731689453125|unsuper_loss: 0.0 +average reward score: 5.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.29%) |Training time=0.47s (21.22%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.34 +epoch: 0|step: 224|ppo_ep: 1|act_loss: 0.0211181640625|cri_loss: 0.01495361328125|unsuper_loss: 0.0 +average reward score: 7.453125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.36%) |Training time=0.46s (21.05%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.34 +epoch: 0|step: 225|ppo_ep: 1|act_loss: 0.139404296875|cri_loss: 0.08172607421875|unsuper_loss: 0.0 +average reward score: 6.4296875 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.61%) |Training time=0.47s (21.21%) |Others=0.12 (5.18%)|CurSamplesPerSec=14.36 |AvgSamplesPerSec=14.34 +epoch: 0|step: 226|ppo_ep: 1|act_loss: 0.04437255859375|cri_loss: 0.02532958984375|unsuper_loss: 0.0 +average reward score: 4.3671875 +------------------------------------------------------------------------------------- +|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.76s (76.61%) |Training time=0.44s (18.98%) |Others=0.10 (4.41%)|CurSamplesPerSec=13.94 |AvgSamplesPerSec=14.34 +epoch: 0|step: 227|ppo_ep: 1|act_loss: -0.06201171875|cri_loss: -0.0190582275390625|unsuper_loss: 0.0 +average reward score: 5.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.41%) |Training time=0.49s (22.11%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.34 +epoch: 0|step: 228|ppo_ep: 1|act_loss: -0.092041015625|cri_loss: -0.03985595703125|unsuper_loss: 0.0 +average reward score: 5.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.62s (70.56%) |Training time=0.49s (21.35%) |Others=0.19 (8.08%)|CurSamplesPerSec=13.92 |AvgSamplesPerSec=14.34 +[2023-04-14 08:55:55,060] [INFO] [logging.py:96:log_dist] [Rank 0] step=230, skipped=6, lr=[9.64450272281792e-06, 9.64450272281792e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 08:55:55,078] [INFO] [timer.py:199:stop] epoch=0/micro_step=230/global_step=230, RunningAvgSamplesPerSec=99.88061376855845, CurrSamplesPerSec=101.70929986715883, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 08:55:55,171] [INFO] [logging.py:96:log_dist] [Rank 0] step=230, skipped=5, lr=[4.997105552702513e-06, 4.997105552702513e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 229|ppo_ep: 1|act_loss: 0.03997802734375|cri_loss: 0.023681640625|unsuper_loss: 0.0 +average reward score: 5.828125 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.66%) |Training time=0.48s (21.68%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.34 +epoch: 0|step: 230|ppo_ep: 1|act_loss: 0.036895751953125|cri_loss: 0.02520751953125|unsuper_loss: 0.0 +average reward score: 4.91015625 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.57%) |Training time=0.48s (21.84%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.51 |AvgSamplesPerSec=14.34 +epoch: 0|step: 231|ppo_ep: 1|act_loss: 0.041748046875|cri_loss: 0.02581787109375|unsuper_loss: 0.0 +average reward score: 4.92578125 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.91%) |Training time=0.47s (21.52%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.34 +epoch: 0|step: 232|ppo_ep: 1|act_loss: 0.1104736328125|cri_loss: 0.0606689453125|unsuper_loss: 0.0 +average reward score: 5.625 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.38%) |Training time=0.49s (22.09%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.43 |AvgSamplesPerSec=14.34 +epoch: 0|step: 233|ppo_ep: 1|act_loss: 0.022430419921875|cri_loss: 0.0141143798828125|unsuper_loss: 0.0 +average reward score: 4.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.80%) |Training time=0.48s (21.73%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.34 +epoch: 0|step: 234|ppo_ep: 1|act_loss: -0.097412109375|cri_loss: -0.044830322265625|unsuper_loss: 0.0 +average reward score: 5.31640625 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.62s (69.95%) |Training time=0.48s (20.82%) |Others=0.21 (9.22%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.34 +epoch: 0|step: 235|ppo_ep: 1|act_loss: 0.1146240234375|cri_loss: 0.08062744140625|unsuper_loss: 0.0 +average reward score: 4.8515625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.78%) |Training time=0.48s (21.63%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.34 +epoch: 0|step: 236|ppo_ep: 1|act_loss: 0.098876953125|cri_loss: 0.0562744140625|unsuper_loss: 0.0 +average reward score: 4.9140625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.00%) |Training time=0.47s (21.42%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.34 +epoch: 0|step: 237|ppo_ep: 1|act_loss: 0.246826171875|cri_loss: 0.1407470703125|unsuper_loss: 0.0 +average reward score: 4.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.42%) |Training time=0.49s (22.03%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.34 +epoch: 0|step: 238|ppo_ep: 1|act_loss: -0.043609619140625|cri_loss: -0.010223388671875|unsuper_loss: 0.0 +average reward score: 4.8359375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.62%) |Training time=0.48s (21.80%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.34 +[2023-04-14 08:56:17,202] [INFO] [logging.py:96:log_dist] [Rank 0] step=240, skipped=6, lr=[9.643580517474126e-06, 9.643580517474126e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 08:56:17,221] [INFO] [timer.py:199:stop] epoch=0/micro_step=240/global_step=240, RunningAvgSamplesPerSec=99.92462431230227, CurrSamplesPerSec=100.35225418814385, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 08:56:17,314] [INFO] [logging.py:96:log_dist] [Rank 0] step=240, skipped=5, lr=[4.996624025098819e-06, 4.996624025098819e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 239|ppo_ep: 1|act_loss: -0.1341552734375|cri_loss: -0.050537109375|unsuper_loss: 0.0 +average reward score: 4.5546875 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.61%) |Training time=0.48s (21.84%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.34 +epoch: 0|step: 240|ppo_ep: 1|act_loss: -0.0472412109375|cri_loss: -0.0107421875|unsuper_loss: 0.0 +average reward score: 4.12109375 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.17%) |Training time=0.46s (20.69%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.46 |AvgSamplesPerSec=14.34 +epoch: 0|step: 241|ppo_ep: 1|act_loss: -0.02374267578125|cri_loss: -0.00562286376953125|unsuper_loss: 0.0 +average reward score: 3.810546875 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.20%) |Training time=0.47s (21.24%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.35 +epoch: 0|step: 242|ppo_ep: 1|act_loss: 0.135498046875|cri_loss: 0.0750732421875|unsuper_loss: 0.0 +average reward score: 3.974609375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.54%) |Training time=0.48s (21.88%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.35 +epoch: 0|step: 243|ppo_ep: 1|act_loss: 0.01165008544921875|cri_loss: 0.016204833984375|unsuper_loss: 0.0 +average reward score: 4.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.81s (76.71%) |Training time=0.45s (19.00%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.53 |AvgSamplesPerSec=14.34 +epoch: 0|step: 244|ppo_ep: 1|act_loss: 0.11224365234375|cri_loss: 0.061309814453125|unsuper_loss: 0.0 +average reward score: 4.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.90%) |Training time=0.47s (21.52%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.34 +epoch: 0|step: 245|ppo_ep: 1|act_loss: 0.07666015625|cri_loss: 0.047027587890625|unsuper_loss: 0.0 +average reward score: 4.1640625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.58%) |Training time=0.48s (21.85%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.34 +epoch: 0|step: 246|ppo_ep: 1|act_loss: -0.057037353515625|cri_loss: -0.0242919921875|unsuper_loss: 0.0 +average reward score: 4.9375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.49%) |Training time=0.48s (21.94%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.34 +epoch: 0|step: 247|ppo_ep: 1|act_loss: -0.036651611328125|cri_loss: -0.016265869140625|unsuper_loss: 0.0 +average reward score: 4.28125 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.86%) |Training time=0.48s (21.66%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.35 +epoch: 0|step: 248|ppo_ep: 1|act_loss: 0.031280517578125|cri_loss: 0.019378662109375|unsuper_loss: 0.0 +average reward score: 4.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.43%) |Training time=0.49s (22.01%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.35 +[2023-04-14 08:56:39,389] [INFO] [logging.py:96:log_dist] [Rank 0] step=250, skipped=6, lr=[9.64258688914287e-06, 9.64258688914287e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 08:56:39,927] [INFO] [timer.py:199:stop] epoch=0/micro_step=250/global_step=250, RunningAvgSamplesPerSec=99.39271434606478, CurrSamplesPerSec=38.278583366131066, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 08:56:40,020] [INFO] [logging.py:96:log_dist] [Rank 0] step=250, skipped=5, lr=[4.996105491504296e-06, 4.996105491504296e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 249|ppo_ep: 1|act_loss: 0.09033203125|cri_loss: 0.050537109375|unsuper_loss: 0.0 +average reward score: 4.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.72s |Gather latency=0.00s (0.00%) |Generate time=1.62s (59.51%) |Training time=1.00s (36.79%) |Others=0.10 (3.71%)|CurSamplesPerSec=11.78 |AvgSamplesPerSec=14.33 +epoch: 0|step: 250|ppo_ep: 1|act_loss: -0.1090087890625|cri_loss: -0.0499267578125|unsuper_loss: 0.0 +average reward score: 4.80078125 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.45%) |Training time=0.48s (21.98%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.33 +epoch: 0|step: 251|ppo_ep: 1|act_loss: -0.18017578125|cri_loss: -0.0765380859375|unsuper_loss: 0.0 +average reward score: 4.68359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.78%) |Training time=0.45s (20.68%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.34 +epoch: 0|step: 252|ppo_ep: 1|act_loss: -0.17724609375|cri_loss: -0.08209228515625|unsuper_loss: 0.0 +average reward score: 4.9375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.97%) |Training time=0.47s (21.42%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.34 +epoch: 0|step: 253|ppo_ep: 1|act_loss: -0.12353515625|cri_loss: -0.0531005859375|unsuper_loss: 0.0 +average reward score: 5.26171875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.17%) |Training time=0.46s (21.22%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.34 +epoch: 0|step: 254|ppo_ep: 1|act_loss: 0.2373046875|cri_loss: 0.14013671875|unsuper_loss: 0.0 +average reward score: 4.77734375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.13%) |Training time=0.44s (20.23%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.34 +epoch: 0|step: 255|ppo_ep: 1|act_loss: -0.028411865234375|cri_loss: -0.00667572021484375|unsuper_loss: 0.0 +average reward score: 4.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.76s (77.44%) |Training time=0.41s (18.18%) |Others=0.10 (4.38%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.34 +epoch: 0|step: 256|ppo_ep: 1|act_loss: 0.0565185546875|cri_loss: 0.037017822265625|unsuper_loss: 0.0 +average reward score: 4.72265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.97%) |Training time=0.42s (19.35%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.34 +epoch: 0|step: 257|ppo_ep: 1|act_loss: 0.1236572265625|cri_loss: 0.067138671875|unsuper_loss: 0.0 +average reward score: 4.89453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.08%) |Training time=0.42s (19.34%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.34 +epoch: 0|step: 258|ppo_ep: 1|act_loss: 0.1390380859375|cri_loss: 0.0894775390625|unsuper_loss: 0.0 +average reward score: 4.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.31%) |Training time=0.43s (20.04%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.34 +[2023-04-14 08:57:01,938] [INFO] [logging.py:96:log_dist] [Rank 0] step=260, skipped=6, lr=[9.64152185255212e-06, 9.64152185255212e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 08:57:01,956] [INFO] [timer.py:199:stop] epoch=0/micro_step=260/global_step=260, RunningAvgSamplesPerSec=99.89679140432641, CurrSamplesPerSec=114.06243881213967, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 08:57:02,050] [INFO] [logging.py:96:log_dist] [Rank 0] step=260, skipped=5, lr=[4.9955499596048615e-06, 4.9955499596048615e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 259|ppo_ep: 1|act_loss: 0.0908203125|cri_loss: 0.06024169921875|unsuper_loss: 0.0 +average reward score: 4.9296875 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.80s (76.75%) |Training time=0.45s (19.04%) |Others=0.10 (4.21%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.34 +epoch: 0|step: 260|ppo_ep: 1|act_loss: 0.124267578125|cri_loss: 0.065673828125|unsuper_loss: 0.0 +average reward score: 5.125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.42%) |Training time=0.46s (20.96%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.34 +epoch: 0|step: 261|ppo_ep: 1|act_loss: 0.03094482421875|cri_loss: 0.02093505859375|unsuper_loss: 0.0 +average reward score: 4.8515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.58%) |Training time=0.45s (20.79%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.34 +epoch: 0|step: 262|ppo_ep: 1|act_loss: -0.03643798828125|cri_loss: -0.0055389404296875|unsuper_loss: 0.0 +average reward score: 4.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.85%) |Training time=0.44s (20.49%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.35 +epoch: 0|step: 263|ppo_ep: 1|act_loss: -0.098388671875|cri_loss: -0.0452880859375|unsuper_loss: 0.0 +average reward score: 4.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.06%) |Training time=0.44s (20.24%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.35 +epoch: 0|step: 264|ppo_ep: 1|act_loss: -0.1561279296875|cri_loss: -0.04656982421875|unsuper_loss: 0.0 +average reward score: 4.61328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.59%) |Training time=0.45s (20.75%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.35 +epoch: 0|step: 265|ppo_ep: 1|act_loss: -0.072021484375|cri_loss: -0.0289306640625|unsuper_loss: 0.0 +average reward score: 4.859375 +------------------------------------------------------------------------------------- +|E2E latency=2.72s |Gather latency=0.00s (0.00%) |Generate time=1.63s (60.04%) |Training time=0.45s (16.66%) |Others=0.63 (23.30%)|CurSamplesPerSec=11.76 |AvgSamplesPerSec=14.34 +epoch: 0|step: 266|ppo_ep: 1|act_loss: 0.00478363037109375|cri_loss: 0.006679534912109375|unsuper_loss: 0.0 +average reward score: 4.609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.65%) |Training time=0.45s (20.69%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.34 +epoch: 0|step: 267|ppo_ep: 1|act_loss: 0.04937744140625|cri_loss: 0.029876708984375|unsuper_loss: 0.0 +average reward score: 4.15625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.83%) |Training time=0.45s (20.63%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.34 +epoch: 0|step: 268|ppo_ep: 1|act_loss: 0.050994873046875|cri_loss: 0.0362548828125|unsuper_loss: 0.0 +average reward score: 4.35546875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.38%) |Training time=0.43s (19.99%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.34 +[2023-04-14 08:57:24,192] [INFO] [logging.py:96:log_dist] [Rank 0] step=270, skipped=6, lr=[9.640385423488292e-06, 9.640385423488292e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 08:57:24,211] [INFO] [timer.py:199:stop] epoch=0/micro_step=270/global_step=270, RunningAvgSamplesPerSec=100.33253846211156, CurrSamplesPerSec=118.06418606287716, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 08:57:24,304] [INFO] [logging.py:96:log_dist] [Rank 0] step=270, skipped=5, lr=[4.99495743763484e-06, 4.99495743763484e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 269|ppo_ep: 1|act_loss: -0.050506591796875|cri_loss: -0.022216796875|unsuper_loss: 0.0 +average reward score: 5.25390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.39%) |Training time=0.43s (20.00%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34 +epoch: 0|step: 270|ppo_ep: 1|act_loss: 0.007083892822265625|cri_loss: 0.00634765625|unsuper_loss: 0.0 +average reward score: 5.26171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.63%) |Training time=0.43s (19.72%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34 +epoch: 0|step: 271|ppo_ep: 1|act_loss: -0.02874755859375|cri_loss: -0.01068115234375|unsuper_loss: 0.0 +average reward score: 5.3828125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.24%) |Training time=0.43s (19.78%) |Others=0.11 (4.98%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.35 +epoch: 0|step: 272|ppo_ep: 1|act_loss: 0.158203125|cri_loss: 0.0850830078125|unsuper_loss: 0.0 +average reward score: 4.30859375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.42%) |Training time=0.46s (20.89%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.35 +epoch: 0|step: 273|ppo_ep: 1|act_loss: 0.04718017578125|cri_loss: 0.0275421142578125|unsuper_loss: 0.0 +average reward score: 4.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.38s |Gather latency=0.00s (0.00%) |Generate time=1.83s (76.95%) |Training time=0.45s (18.83%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.47 |AvgSamplesPerSec=14.34 +epoch: 0|step: 274|ppo_ep: 1|act_loss: -0.04803466796875|cri_loss: -0.0196685791015625|unsuper_loss: 0.0 +average reward score: 5.3671875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.56%) |Training time=0.45s (20.64%) |Others=0.11 (4.80%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.34 +epoch: 0|step: 275|ppo_ep: 1|act_loss: 0.01389312744140625|cri_loss: 0.0127716064453125|unsuper_loss: 0.0 +average reward score: 4.5859375 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.88%) |Training time=0.48s (21.61%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.34 +epoch: 0|step: 276|ppo_ep: 1|act_loss: -0.10394287109375|cri_loss: -0.044525146484375|unsuper_loss: 0.0 +average reward score: 4.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.63s (69.59%) |Training time=0.47s (20.00%) |Others=0.24 (10.40%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.34 +epoch: 0|step: 277|ppo_ep: 1|act_loss: -0.0570068359375|cri_loss: -0.0216827392578125|unsuper_loss: 0.0 +average reward score: 4.35546875 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.06%) |Training time=0.45s (20.43%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.34 +epoch: 0|step: 278|ppo_ep: 1|act_loss: -0.020050048828125|cri_loss: -0.0082244873046875|unsuper_loss: 0.0 +average reward score: 4.79296875 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.66%) |Training time=0.48s (21.65%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.40 |AvgSamplesPerSec=14.34 +[2023-04-14 08:57:46,450] [INFO] [logging.py:96:log_dist] [Rank 0] step=280, skipped=6, lr=[9.639177618796e-06, 9.639177618796e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 08:57:46,468] [INFO] [timer.py:199:stop] epoch=0/micro_step=280/global_step=280, RunningAvgSamplesPerSec=100.64003665757109, CurrSamplesPerSec=108.39001360752977, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 08:57:46,561] [INFO] [logging.py:96:log_dist] [Rank 0] step=280, skipped=5, lr=[4.994327934376836e-06, 4.994327934376836e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 279|ppo_ep: 1|act_loss: 0.05682373046875|cri_loss: 0.0301666259765625|unsuper_loss: 0.0 +average reward score: 4.76953125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.56%) |Training time=0.46s (20.92%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.34 +epoch: 0|step: 280|ppo_ep: 1|act_loss: 0.1710205078125|cri_loss: 0.101806640625|unsuper_loss: 0.0 +average reward score: 5.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.41%) |Training time=0.46s (20.99%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.34 +epoch: 0|step: 281|ppo_ep: 1|act_loss: 0.1187744140625|cri_loss: 0.06683349609375|unsuper_loss: 0.0 +average reward score: 5.09765625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.98%) |Training time=0.47s (21.51%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.35 +epoch: 0|step: 282|ppo_ep: 1|act_loss: 0.095703125|cri_loss: 0.05072021484375|unsuper_loss: 0.0 +average reward score: 5.14453125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.53%) |Training time=0.46s (20.85%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.35 +epoch: 0|step: 283|ppo_ep: 1|act_loss: -0.037811279296875|cri_loss: -0.017578125|unsuper_loss: 0.0 +average reward score: 4.9453125 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.13%) |Training time=0.47s (21.31%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.35 +epoch: 0|step: 284|ppo_ep: 1|act_loss: 0.009124755859375|cri_loss: 0.00556182861328125|unsuper_loss: 0.0 +average reward score: 5.03125 +------------------------------------------------------------------------------------- +|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.73s (75.48%) |Training time=0.46s (20.10%) |Others=0.10 (4.41%)|CurSamplesPerSec=13.97 |AvgSamplesPerSec=14.35 +epoch: 0|step: 285|ppo_ep: 1|act_loss: -0.05279541015625|cri_loss: -0.0239105224609375|unsuper_loss: 0.0 +average reward score: 5.234375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.38%) |Training time=0.44s (20.00%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.35 +epoch: 0|step: 286|ppo_ep: 1|act_loss: -0.0899658203125|cri_loss: -0.0411376953125|unsuper_loss: 0.0 +average reward score: 5.1796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.85%) |Training time=0.42s (19.49%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.35 +epoch: 0|step: 287|ppo_ep: 1|act_loss: -0.02752685546875|cri_loss: -0.01082611083984375|unsuper_loss: 0.0 +average reward score: 4.94140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.18%) |Training time=0.42s (19.26%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.35 +epoch: 0|step: 288|ppo_ep: 1|act_loss: -0.000614166259765625|cri_loss: 0.00179290771484375|unsuper_loss: 0.0 +average reward score: 4.75390625 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.21%) |Training time=0.43s (18.46%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.70 |AvgSamplesPerSec=14.35 +[2023-04-14 08:58:08,558] [INFO] [logging.py:96:log_dist] [Rank 0] step=290, skipped=6, lr=[9.637898456377828e-06, 9.637898456377828e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 08:58:08,576] [INFO] [timer.py:199:stop] epoch=0/micro_step=290/global_step=290, RunningAvgSamplesPerSec=101.01194688929871, CurrSamplesPerSec=112.96647476685857, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 08:58:08,668] [INFO] [logging.py:96:log_dist] [Rank 0] step=290, skipped=5, lr=[4.993661459161605e-06, 4.993661459161605e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 289|ppo_ep: 1|act_loss: -0.08251953125|cri_loss: -0.0379638671875|unsuper_loss: 0.0 +average reward score: 4.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.91%) |Training time=0.45s (20.57%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.35 +epoch: 0|step: 290|ppo_ep: 1|act_loss: 0.0092010498046875|cri_loss: 0.008026123046875|unsuper_loss: 0.0 +average reward score: 5.48828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.73%) |Training time=0.45s (20.64%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.35 +epoch: 0|step: 291|ppo_ep: 1|act_loss: 0.030975341796875|cri_loss: 0.016845703125|unsuper_loss: 0.0 +average reward score: 5.55859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.66%) |Training time=0.45s (20.78%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.35 +epoch: 0|step: 292|ppo_ep: 1|act_loss: 0.0404052734375|cri_loss: 0.0223846435546875|unsuper_loss: 0.0 +average reward score: 5.3828125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.07%) |Training time=0.44s (20.30%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.35 +epoch: 0|step: 293|ppo_ep: 1|act_loss: -0.0026226043701171875|cri_loss: 0.0005626678466796875|unsuper_loss: 0.0 +average reward score: 4.88671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.89%) |Training time=0.45s (20.56%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.35 +epoch: 0|step: 294|ppo_ep: 1|act_loss: 0.1007080078125|cri_loss: 0.056976318359375|unsuper_loss: 0.0 +average reward score: 5.7421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.53%) |Training time=0.45s (20.84%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.35 +epoch: 0|step: 295|ppo_ep: 1|act_loss: -0.01436614990234375|cri_loss: -0.00609588623046875|unsuper_loss: 0.0 +average reward score: 5.13671875 +------------------------------------------------------------------------------------- +|E2E latency=3.40s |Gather latency=0.00s (0.00%) |Generate time=1.62s (47.50%) |Training time=0.45s (13.18%) |Others=1.34 (39.33%)|CurSamplesPerSec=9.40 |AvgSamplesPerSec=14.33 +epoch: 0|step: 296|ppo_ep: 1|act_loss: -0.000537872314453125|cri_loss: 0.003475189208984375|unsuper_loss: 0.0 +average reward score: 4.97265625 +------------------------------------------------------------------------------------- +|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.95%) |Training time=0.39s (18.28%) |Others=0.10 (4.77%)|CurSamplesPerSec=15.12 |AvgSamplesPerSec=14.33 +epoch: 0|step: 297|ppo_ep: 1|act_loss: -0.042449951171875|cri_loss: -0.017486572265625|unsuper_loss: 0.0 +average reward score: 5.71484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.76%) |Training time=0.45s (20.70%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.33 +epoch: 0|step: 298|ppo_ep: 1|act_loss: -0.001461029052734375|cri_loss: 0.00197601318359375|unsuper_loss: 0.0 +average reward score: 6.12109375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.55%) |Training time=0.45s (20.81%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.33 +[2023-04-14 08:58:31,449] [INFO] [logging.py:96:log_dist] [Rank 0] step=300, skipped=6, lr=[9.636547955194047e-06, 9.636547955194047e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 08:58:31,467] [INFO] [timer.py:199:stop] epoch=0/micro_step=300/global_step=300, RunningAvgSamplesPerSec=101.42191353477254, CurrSamplesPerSec=116.11825887144846, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 08:58:31,561] [INFO] [logging.py:96:log_dist] [Rank 0] step=300, skipped=5, lr=[4.9929580218679195e-06, 4.9929580218679195e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 299|ppo_ep: 1|act_loss: -0.037841796875|cri_loss: -0.017974853515625|unsuper_loss: 0.0 +average reward score: 6.19140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.10%) |Training time=0.44s (20.32%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.34 +epoch: 0|step: 300|ppo_ep: 1|act_loss: 0.1055908203125|cri_loss: 0.0635986328125|unsuper_loss: 0.0 +average reward score: 6.1328125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.65%) |Training time=0.43s (19.69%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.34 +epoch: 0|step: 301|ppo_ep: 1|act_loss: 0.01062774658203125|cri_loss: 0.00878143310546875|unsuper_loss: 0.0 +average reward score: 4.91796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.96%) |Training time=0.44s (20.39%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34 +epoch: 0|step: 302|ppo_ep: 1|act_loss: 0.02557373046875|cri_loss: 0.0182647705078125|unsuper_loss: 0.0 +average reward score: 5.859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.99%) |Training time=0.44s (20.36%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.34 +epoch: 0|step: 303|ppo_ep: 1|act_loss: -0.0107421875|cri_loss: -0.004383087158203125|unsuper_loss: 0.0 +average reward score: 5.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.17%) |Training time=0.43s (18.60%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.34 +epoch: 0|step: 304|ppo_ep: 1|act_loss: -0.0123443603515625|cri_loss: -0.00382232666015625|unsuper_loss: 0.0 +average reward score: 5.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.97%) |Training time=0.44s (20.37%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.34 +epoch: 0|step: 305|ppo_ep: 1|act_loss: -0.01494598388671875|cri_loss: -0.0056304931640625|unsuper_loss: 0.0 +average reward score: 5.9453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.88%) |Training time=0.44s (20.49%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.34 +epoch: 0|step: 306|ppo_ep: 1|act_loss: -0.00437164306640625|cri_loss: -0.0005321502685546875|unsuper_loss: 0.0 +average reward score: 6.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.71%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34 +epoch: 0|step: 307|ppo_ep: 1|act_loss: -0.009765625|cri_loss: -0.004009246826171875|unsuper_loss: 0.0 +average reward score: 5.921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.28%) |Training time=0.44s (20.17%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.34 +epoch: 0|step: 308|ppo_ep: 1|act_loss: 0.039825439453125|cri_loss: 0.025970458984375|unsuper_loss: 0.0 +average reward score: 5.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.18%) |Training time=0.43s (20.05%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.34 +[2023-04-14 08:58:53,301] [INFO] [logging.py:96:log_dist] [Rank 0] step=310, skipped=6, lr=[9.635126135262344e-06, 9.635126135262344e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 08:58:53,319] [INFO] [timer.py:199:stop] epoch=0/micro_step=310/global_step=310, RunningAvgSamplesPerSec=101.82524227057964, CurrSamplesPerSec=112.93795796099022, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 08:58:53,412] [INFO] [logging.py:96:log_dist] [Rank 0] step=310, skipped=5, lr=[4.9922176329224145e-06, 4.9922176329224145e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 309|ppo_ep: 1|act_loss: -0.0081024169921875|cri_loss: -0.00330352783203125|unsuper_loss: 0.0 +average reward score: 5.76953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.77%) |Training time=0.45s (20.61%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.35 +[2023-04-14 08:58:55,462] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096 +epoch: 0|step: 310|ppo_ep: 1|act_loss: 0.0209503173828125|cri_loss: 0.0167999267578125|unsuper_loss: 0.0 +average reward score: 5.25 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.57%) |Training time=0.42s (19.76%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.35 +epoch: 0|step: 311|ppo_ep: 1|act_loss: 0.05865478515625|cri_loss: 0.036773681640625|unsuper_loss: 0.0 +average reward score: 6.4453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.01%) |Training time=0.44s (20.45%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.35 +epoch: 0|step: 312|ppo_ep: 1|act_loss: -0.0080413818359375|cri_loss: -0.001312255859375|unsuper_loss: 0.0 +average reward score: 6.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.67s (75.84%) |Training time=0.43s (19.60%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.35 +epoch: 0|step: 313|ppo_ep: 1|act_loss: 0.00653076171875|cri_loss: 0.0057525634765625|unsuper_loss: 0.0 +average reward score: 7.109375 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.73s (76.11%) |Training time=0.44s (19.49%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.07 |AvgSamplesPerSec=14.35 +epoch: 0|step: 314|ppo_ep: 1|act_loss: 0.0157012939453125|cri_loss: 0.0095367431640625|unsuper_loss: 0.0 +average reward score: 6.78515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.07%) |Training time=0.44s (20.28%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.35 +epoch: 0|step: 315|ppo_ep: 1|act_loss: -0.037750244140625|cri_loss: -0.016204833984375|unsuper_loss: 0.0 +average reward score: 6.21875 +------------------------------------------------------------------------------------- +|E2E latency=2.49s |Gather latency=0.00s (0.00%) |Generate time=1.64s (66.05%) |Training time=0.43s (17.14%) |Others=0.42 (16.82%)|CurSamplesPerSec=12.85 |AvgSamplesPerSec=14.34 +epoch: 0|step: 316|ppo_ep: 1|act_loss: 0.03277587890625|cri_loss: 0.0176544189453125|unsuper_loss: 0.0 +average reward score: 5.11328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.60%) |Training time=0.43s (19.71%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.35 +epoch: 0|step: 317|ppo_ep: 1|act_loss: -0.0408935546875|cri_loss: -0.0183868408203125|unsuper_loss: 0.0 +average reward score: 5.609375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.78%) |Training time=0.45s (20.72%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.35 +epoch: 0|step: 318|ppo_ep: 1|act_loss: 0.0999755859375|cri_loss: 0.059661865234375|unsuper_loss: 0.0 +average reward score: 6.859375 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.09%) |Training time=0.46s (19.62%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.34 +[2023-04-14 08:59:15,615] [INFO] [logging.py:96:log_dist] [Rank 0] step=320, skipped=7, lr=[9.63378553719082e-06, 9.63378553719082e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 08:59:15,633] [INFO] [timer.py:199:stop] epoch=0/micro_step=320/global_step=320, RunningAvgSamplesPerSec=102.20252850967381, CurrSamplesPerSec=109.19005671926405, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 08:59:15,725] [INFO] [logging.py:96:log_dist] [Rank 0] step=320, skipped=5, lr=[4.991440303299444e-06, 4.991440303299444e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 319|ppo_ep: 1|act_loss: 0.01959228515625|cri_loss: 0.01177978515625|unsuper_loss: 0.0 +average reward score: 5.95703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.51%) |Training time=0.46s (20.98%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.35 +epoch: 0|step: 320|ppo_ep: 1|act_loss: -0.0213623046875|cri_loss: -0.009521484375|unsuper_loss: 0.0 +average reward score: 6.09765625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.15%) |Training time=0.46s (21.24%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.35 +epoch: 0|step: 321|ppo_ep: 1|act_loss: 0.0077362060546875|cri_loss: 0.006229400634765625|unsuper_loss: 0.0 +average reward score: 6.9609375 +------------------------------------------------------------------------------------- +|E2E latency=2.45s |Gather latency=0.00s (0.00%) |Generate time=1.62s (66.12%) |Training time=0.46s (18.88%) |Others=0.37 (15.00%)|CurSamplesPerSec=13.07 |AvgSamplesPerSec=14.34 +epoch: 0|step: 322|ppo_ep: 1|act_loss: 0.031463623046875|cri_loss: 0.0181884765625|unsuper_loss: 0.0 +average reward score: 5.71484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.11%) |Training time=0.46s (21.28%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34 +epoch: 0|step: 323|ppo_ep: 1|act_loss: -0.0440673828125|cri_loss: -0.02008056640625|unsuper_loss: 0.0 +average reward score: 6.68359375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.13%) |Training time=0.46s (21.25%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.34 +epoch: 0|step: 324|ppo_ep: 1|act_loss: -0.05035400390625|cri_loss: -0.023223876953125|unsuper_loss: 0.0 +average reward score: 5.9375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.15%) |Training time=0.46s (21.23%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.35 +epoch: 0|step: 325|ppo_ep: 1|act_loss: 0.00405120849609375|cri_loss: 0.004222869873046875|unsuper_loss: 0.0 +average reward score: 5.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.21%) |Training time=0.46s (21.28%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.35 +epoch: 0|step: 326|ppo_ep: 1|act_loss: -0.024139404296875|cri_loss: -0.00982666015625|unsuper_loss: 0.0 +average reward score: 6.15234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.41%) |Training time=0.45s (20.94%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.35 +epoch: 0|step: 327|ppo_ep: 1|act_loss: 0.01953125|cri_loss: 0.0111846923828125|unsuper_loss: 0.0 +average reward score: 5.8515625 +------------------------------------------------------------------------------------- +|E2E latency=2.77s |Gather latency=0.00s (0.00%) |Generate time=1.62s (58.42%) |Training time=0.47s (16.78%) |Others=0.69 (24.80%)|CurSamplesPerSec=11.55 |AvgSamplesPerSec=14.34 +epoch: 0|step: 328|ppo_ep: 1|act_loss: 0.0179443359375|cri_loss: 0.0124053955078125|unsuper_loss: 0.0 +average reward score: 5.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.81%) |Training time=0.45s (20.57%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.34 +[2023-04-14 08:59:38,260] [INFO] [logging.py:96:log_dist] [Rank 0] step=330, skipped=7, lr=[9.632228270572594e-06, 9.632228270572594e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 08:59:38,278] [INFO] [timer.py:199:stop] epoch=0/micro_step=330/global_step=330, RunningAvgSamplesPerSec=102.38817781321211, CurrSamplesPerSec=120.18967042650056, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 08:59:38,371] [INFO] [logging.py:96:log_dist] [Rank 0] step=330, skipped=5, lr=[4.990626044520905e-06, 4.990626044520905e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 329|ppo_ep: 1|act_loss: -0.00534820556640625|cri_loss: 0.001178741455078125|unsuper_loss: 0.0 +average reward score: 5.6953125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.66%) |Training time=0.43s (19.71%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.34 +epoch: 0|step: 330|ppo_ep: 1|act_loss: -0.015716552734375|cri_loss: -0.00428009033203125|unsuper_loss: 0.0 +average reward score: 6.42578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.70%) |Training time=0.43s (19.65%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34 +epoch: 0|step: 331|ppo_ep: 1|act_loss: -0.000396728515625|cri_loss: 0.0016536712646484375|unsuper_loss: 0.0 +average reward score: 6.6640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.11%) |Training time=0.44s (20.35%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34 +epoch: 0|step: 332|ppo_ep: 1|act_loss: 0.01067352294921875|cri_loss: 0.01187896728515625|unsuper_loss: 0.0 +average reward score: 6.078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.03%) |Training time=0.44s (20.34%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34 +epoch: 0|step: 333|ppo_ep: 1|act_loss: -0.0230712890625|cri_loss: -0.009796142578125|unsuper_loss: 0.0 +average reward score: 5.37109375 +------------------------------------------------------------------------------------- +|E2E latency=2.59s |Gather latency=0.00s (0.00%) |Generate time=1.80s (69.53%) |Training time=0.43s (16.75%) |Others=0.35 (13.72%)|CurSamplesPerSec=12.38 |AvgSamplesPerSec=14.34 +epoch: 0|step: 334|ppo_ep: 1|act_loss: 0.0168609619140625|cri_loss: 0.0110626220703125|unsuper_loss: 0.0 +average reward score: 6.1953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.68%) |Training time=0.45s (20.69%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.34 +epoch: 0|step: 335|ppo_ep: 1|act_loss: 0.01114654541015625|cri_loss: 0.00699615478515625|unsuper_loss: 0.0 +average reward score: 5.6484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.79%) |Training time=0.45s (20.67%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34 +epoch: 0|step: 336|ppo_ep: 1|act_loss: 0.06365966796875|cri_loss: 0.03387451171875|unsuper_loss: 0.0 +average reward score: 5.921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.95%) |Training time=0.44s (20.42%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34 +epoch: 0|step: 337|ppo_ep: 1|act_loss: -0.011566162109375|cri_loss: -0.00455474853515625|unsuper_loss: 0.0 +average reward score: 6.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.64%) |Training time=0.45s (20.70%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34 +epoch: 0|step: 338|ppo_ep: 1|act_loss: 0.023193359375|cri_loss: 0.01505279541015625|unsuper_loss: 0.0 +average reward score: 6.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.61%) |Training time=0.45s (20.75%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34 +[2023-04-14 09:00:00,381] [INFO] [logging.py:96:log_dist] [Rank 0] step=340, skipped=7, lr=[9.630599749234592e-06, 9.630599749234592e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:00:01,026] [INFO] [timer.py:199:stop] epoch=0/micro_step=340/global_step=340, RunningAvgSamplesPerSec=102.09652405965743, CurrSamplesPerSec=35.20821465465094, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:00:01,119] [INFO] [logging.py:96:log_dist] [Rank 0] step=340, skipped=5, lr=[4.989774868656078e-06, 4.989774868656078e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 339|ppo_ep: 1|act_loss: -0.022247314453125|cri_loss: -0.0097503662109375|unsuper_loss: 0.0 +average reward score: 8.203125 +------------------------------------------------------------------------------------- +|E2E latency=2.80s |Gather latency=0.00s (0.00%) |Generate time=1.63s (58.17%) |Training time=1.07s (38.32%) |Others=0.10 (3.51%)|CurSamplesPerSec=11.44 |AvgSamplesPerSec=14.33 +epoch: 0|step: 340|ppo_ep: 1|act_loss: -0.02862548828125|cri_loss: -0.01319122314453125|unsuper_loss: 0.0 +average reward score: 6.9453125 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.07%) |Training time=0.45s (20.63%) |Others=0.12 (5.30%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.33 +epoch: 0|step: 341|ppo_ep: 1|act_loss: -0.003849029541015625|cri_loss: -0.000995635986328125|unsuper_loss: 0.0 +average reward score: 6.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.73s (76.32%) |Training time=0.44s (19.25%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.33 +epoch: 0|step: 342|ppo_ep: 1|act_loss: 0.01129913330078125|cri_loss: 0.007045745849609375|unsuper_loss: 0.0 +average reward score: 6.6328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.58%) |Training time=0.45s (20.79%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.33 +epoch: 0|step: 343|ppo_ep: 1|act_loss: -0.0092620849609375|cri_loss: -0.00258636474609375|unsuper_loss: 0.0 +average reward score: 5.98046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.92%) |Training time=0.45s (20.53%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.33 +epoch: 0|step: 344|ppo_ep: 1|act_loss: -0.00565338134765625|cri_loss: -0.0018253326416015625|unsuper_loss: 0.0 +average reward score: 5.8125 +------------------------------------------------------------------------------------- +|E2E latency=2.82s |Gather latency=0.00s (0.00%) |Generate time=1.64s (58.16%) |Training time=0.43s (15.11%) |Others=0.75 (26.73%)|CurSamplesPerSec=11.36 |AvgSamplesPerSec=14.32 +epoch: 0|step: 345|ppo_ep: 1|act_loss: -0.01235198974609375|cri_loss: -0.005222320556640625|unsuper_loss: 0.0 +average reward score: 5.75 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.88%) |Training time=0.45s (20.58%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.32 +epoch: 0|step: 346|ppo_ep: 1|act_loss: 0.03094482421875|cri_loss: 0.0184478759765625|unsuper_loss: 0.0 +average reward score: 6.375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.01%) |Training time=0.45s (20.39%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.32 +epoch: 0|step: 347|ppo_ep: 1|act_loss: 0.0733642578125|cri_loss: 0.044921875|unsuper_loss: 0.0 +average reward score: 5.9453125 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.65%) |Training time=0.45s (19.14%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.32 +epoch: 0|step: 348|ppo_ep: 1|act_loss: 0.010406494140625|cri_loss: 0.00753021240234375|unsuper_loss: 0.0 +average reward score: 5.79296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.73%) |Training time=0.45s (20.61%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.32 +[2023-04-14 09:00:23,695] [INFO] [logging.py:96:log_dist] [Rank 0] step=350, skipped=7, lr=[9.628899997315426e-06, 9.628899997315426e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:00:23,713] [INFO] [timer.py:199:stop] epoch=0/micro_step=350/global_step=350, RunningAvgSamplesPerSec=102.38370118482274, CurrSamplesPerSec=111.07989661499347, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:00:23,806] [INFO] [logging.py:96:log_dist] [Rank 0] step=350, skipped=5, lr=[4.988886788321443e-06, 4.988886788321443e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 349|ppo_ep: 1|act_loss: -0.00733184814453125|cri_loss: -0.0023441314697265625|unsuper_loss: 0.0 +average reward score: 6.18359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.80%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.32 +epoch: 0|step: 350|ppo_ep: 1|act_loss: 0.00539398193359375|cri_loss: 0.00363922119140625|unsuper_loss: 0.0 +average reward score: 6.51953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.39%) |Training time=0.43s (19.99%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33 +epoch: 0|step: 351|ppo_ep: 1|act_loss: 0.02569580078125|cri_loss: 0.01378631591796875|unsuper_loss: 0.0 +average reward score: 5.9296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.58%) |Training time=0.43s (19.88%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.33 +epoch: 0|step: 352|ppo_ep: 1|act_loss: 0.0211639404296875|cri_loss: 0.0124969482421875|unsuper_loss: 0.0 +average reward score: 6.20703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.73%) |Training time=0.43s (19.62%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.33 +epoch: 0|step: 353|ppo_ep: 1|act_loss: 0.0112762451171875|cri_loss: 0.006603240966796875|unsuper_loss: 0.0 +average reward score: 6.05078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.74%) |Training time=0.43s (19.70%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.33 +epoch: 0|step: 354|ppo_ep: 1|act_loss: -0.017547607421875|cri_loss: -0.008026123046875|unsuper_loss: 0.0 +average reward score: 6.3203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.14%) |Training time=0.44s (20.21%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.33 +epoch: 0|step: 355|ppo_ep: 1|act_loss: 0.001312255859375|cri_loss: 0.00237274169921875|unsuper_loss: 0.0 +average reward score: 5.6015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.88%) |Training time=0.45s (20.58%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.33 +epoch: 0|step: 356|ppo_ep: 1|act_loss: -0.03271484375|cri_loss: -0.01531982421875|unsuper_loss: 0.0 +average reward score: 6.9765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.00%) |Training time=0.44s (20.33%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.33 +epoch: 0|step: 357|ppo_ep: 1|act_loss: -0.007843017578125|cri_loss: -0.0031490325927734375|unsuper_loss: 0.0 +average reward score: 6.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.61%) |Training time=0.45s (20.70%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.33 +epoch: 0|step: 358|ppo_ep: 1|act_loss: -0.00775909423828125|cri_loss: -0.003040313720703125|unsuper_loss: 0.0 +average reward score: 5.66015625 +------------------------------------------------------------------------------------- +|E2E latency=2.67s |Gather latency=0.00s (0.00%) |Generate time=1.64s (61.37%) |Training time=0.43s (16.08%) |Others=0.60 (22.54%)|CurSamplesPerSec=11.98 |AvgSamplesPerSec=14.33 +[2023-04-14 09:00:45,855] [INFO] [logging.py:96:log_dist] [Rank 0] step=360, skipped=7, lr=[9.627129040009524e-06, 9.627129040009524e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:00:45,873] [INFO] [timer.py:199:stop] epoch=0/micro_step=360/global_step=360, RunningAvgSamplesPerSec=102.79024752173669, CurrSamplesPerSec=142.6891849079872, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:00:45,965] [INFO] [logging.py:96:log_dist] [Rank 0] step=360, skipped=5, lr=[4.987961816680493e-06, 4.987961816680493e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 359|ppo_ep: 1|act_loss: -0.0162811279296875|cri_loss: -0.0064697265625|unsuper_loss: 0.0 +average reward score: 6.65625 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.64s (77.15%) |Training time=0.39s (18.20%) |Others=0.10 (4.64%)|CurSamplesPerSec=15.03 |AvgSamplesPerSec=14.33 +epoch: 0|step: 360|ppo_ep: 1|act_loss: -0.03131103515625|cri_loss: -0.01505279541015625|unsuper_loss: 0.0 +average reward score: 6.44921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.68%) |Training time=0.45s (20.68%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33 +epoch: 0|step: 361|ppo_ep: 1|act_loss: 0.02545166015625|cri_loss: 0.0144805908203125|unsuper_loss: 0.0 +average reward score: 5.66796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.23%) |Training time=0.44s (20.24%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33 +epoch: 0|step: 362|ppo_ep: 1|act_loss: 0.07672119140625|cri_loss: 0.042572021484375|unsuper_loss: 0.0 +average reward score: 6.07421875 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.80s (76.93%) |Training time=0.44s (18.79%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.33 +epoch: 0|step: 363|ppo_ep: 1|act_loss: -0.13330078125|cri_loss: -0.06036376953125|unsuper_loss: 0.0 +average reward score: 6.20703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.54%) |Training time=0.43s (19.82%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.33 +epoch: 0|step: 364|ppo_ep: 1|act_loss: 0.0775146484375|cri_loss: 0.042633056640625|unsuper_loss: 0.0 +average reward score: 5.8046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.44%) |Training time=0.43s (19.94%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.33 +epoch: 0|step: 365|ppo_ep: 1|act_loss: 0.0132904052734375|cri_loss: 0.00804901123046875|unsuper_loss: 0.0 +average reward score: 5.5703125 +------------------------------------------------------------------------------------- +|E2E latency=2.53s |Gather latency=0.00s (0.00%) |Generate time=1.63s (64.45%) |Training time=0.44s (17.53%) |Others=0.46 (18.02%)|CurSamplesPerSec=12.66 |AvgSamplesPerSec=14.32 +epoch: 0|step: 366|ppo_ep: 1|act_loss: -0.025360107421875|cri_loss: -0.01123809814453125|unsuper_loss: 0.0 +average reward score: 5.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.18%) |Training time=0.44s (20.19%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.33 +epoch: 0|step: 367|ppo_ep: 1|act_loss: -0.018341064453125|cri_loss: -0.00739288330078125|unsuper_loss: 0.0 +average reward score: 5.3359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.64%) |Training time=0.43s (19.82%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.33 +epoch: 0|step: 368|ppo_ep: 1|act_loss: -0.03570556640625|cri_loss: -0.0162506103515625|unsuper_loss: 0.0 +average reward score: 4.9609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.39%) |Training time=0.43s (19.88%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.33 +[2023-04-14 09:01:08,114] [INFO] [logging.py:96:log_dist] [Rank 0] step=370, skipped=7, lr=[9.625286903566743e-06, 9.625286903566743e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:01:08,133] [INFO] [timer.py:199:stop] epoch=0/micro_step=370/global_step=370, RunningAvgSamplesPerSec=103.09254097134992, CurrSamplesPerSec=103.87621324471226, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:01:08,227] [INFO] [logging.py:96:log_dist] [Rank 0] step=370, skipped=5, lr=[4.986999967443538e-06, 4.986999967443538e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 369|ppo_ep: 1|act_loss: 0.037139892578125|cri_loss: 0.02215576171875|unsuper_loss: 0.0 +average reward score: 5.98046875 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.98%) |Training time=0.47s (21.40%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.54 |AvgSamplesPerSec=14.33 +epoch: 0|step: 370|ppo_ep: 1|act_loss: -0.025787353515625|cri_loss: -0.0121307373046875|unsuper_loss: 0.0 +average reward score: 5.40625 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.80%) |Training time=0.45s (19.78%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.10 |AvgSamplesPerSec=14.33 +epoch: 0|step: 371|ppo_ep: 1|act_loss: 0.15869140625|cri_loss: 0.1014404296875|unsuper_loss: 0.0 +average reward score: 6.26171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.71%) |Training time=0.45s (20.76%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33 +epoch: 0|step: 372|ppo_ep: 1|act_loss: 0.028961181640625|cri_loss: 0.0159454345703125|unsuper_loss: 0.0 +average reward score: 5.92578125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.55%) |Training time=0.46s (20.89%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.33 +epoch: 0|step: 373|ppo_ep: 1|act_loss: -0.00873565673828125|cri_loss: -0.00383758544921875|unsuper_loss: 0.0 +average reward score: 5.73046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.99%) |Training time=0.44s (20.43%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.33 +epoch: 0|step: 374|ppo_ep: 1|act_loss: 0.05352783203125|cri_loss: 0.029754638671875|unsuper_loss: 0.0 +average reward score: 6.60546875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.83%) |Training time=0.42s (19.45%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.33 +epoch: 0|step: 375|ppo_ep: 1|act_loss: -0.0106658935546875|cri_loss: -0.004302978515625|unsuper_loss: 0.0 +average reward score: 5.89453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.77%) |Training time=0.45s (20.61%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.33 +epoch: 0|step: 376|ppo_ep: 1|act_loss: -0.08251953125|cri_loss: -0.03900146484375|unsuper_loss: 0.0 +average reward score: 5.85546875 +------------------------------------------------------------------------------------- +|E2E latency=2.46s |Gather latency=0.00s (0.00%) |Generate time=1.62s (65.79%) |Training time=0.46s (18.63%) |Others=0.38 (15.58%)|CurSamplesPerSec=13.01 |AvgSamplesPerSec=14.33 +epoch: 0|step: 377|ppo_ep: 1|act_loss: 0.0123748779296875|cri_loss: 0.0083465576171875|unsuper_loss: 0.0 +average reward score: 6.8984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.55%) |Training time=0.45s (20.76%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.33 +epoch: 0|step: 378|ppo_ep: 1|act_loss: -0.016845703125|cri_loss: -0.006771087646484375|unsuper_loss: 0.0 +average reward score: 6.5 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.54%) |Training time=0.45s (20.82%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.33 +[2023-04-14 09:01:30,242] [INFO] [logging.py:96:log_dist] [Rank 0] step=380, skipped=7, lr=[9.623373615291988e-06, 9.623373615291988e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:01:30,260] [INFO] [timer.py:199:stop] epoch=0/micro_step=380/global_step=380, RunningAvgSamplesPerSec=103.31409354931576, CurrSamplesPerSec=110.43399930555898, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:01:30,353] [INFO] [logging.py:96:log_dist] [Rank 0] step=380, skipped=5, lr=[4.986001254867505e-06, 4.986001254867505e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 379|ppo_ep: 1|act_loss: 0.00899505615234375|cri_loss: 0.004871368408203125|unsuper_loss: 0.0 +average reward score: 6.72265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.65%) |Training time=0.45s (20.85%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33 +epoch: 0|step: 380|ppo_ep: 1|act_loss: -0.007282257080078125|cri_loss: -0.00290679931640625|unsuper_loss: 0.0 +average reward score: 6.97265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.43%) |Training time=0.46s (20.97%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.33 +epoch: 0|step: 381|ppo_ep: 1|act_loss: -0.0330810546875|cri_loss: -0.0158538818359375|unsuper_loss: 0.0 +average reward score: 7.83203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.45s (20.85%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33 +epoch: 0|step: 382|ppo_ep: 1|act_loss: -0.0264892578125|cri_loss: -0.01214599609375|unsuper_loss: 0.0 +average reward score: 5.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.75%) |Training time=0.45s (20.61%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.34 +epoch: 0|step: 383|ppo_ep: 1|act_loss: -0.029754638671875|cri_loss: -0.01397705078125|unsuper_loss: 0.0 +average reward score: 6.98046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.42%) |Training time=0.43s (20.04%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34 +epoch: 0|step: 384|ppo_ep: 1|act_loss: 0.044189453125|cri_loss: 0.0272216796875|unsuper_loss: 0.0 +average reward score: 5.6953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.78%) |Training time=0.45s (20.59%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.34 +epoch: 0|step: 385|ppo_ep: 1|act_loss: 0.024200439453125|cri_loss: 0.0135498046875|unsuper_loss: 0.0 +average reward score: 5.93359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.43%) |Training time=0.45s (20.91%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34 +epoch: 0|step: 386|ppo_ep: 1|act_loss: 0.044403076171875|cri_loss: 0.0243988037109375|unsuper_loss: 0.0 +average reward score: 6.16796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.73%) |Training time=0.45s (20.64%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34 +[2023-04-14 09:01:47,714] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 387|ppo_ep: 1|act_loss: 0.048492431640625|cri_loss: 0.026763916015625|unsuper_loss: 0.0 +average reward score: 5.87109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.20%) |Training time=0.45s (20.62%) |Others=0.09 (4.19%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.34 +[2023-04-14 09:01:49,872] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 388|ppo_ep: 1|act_loss: 0.0159149169921875|cri_loss: 0.00897979736328125|unsuper_loss: 0.0 +average reward score: 5.68359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.66%) |Training time=0.43s (20.09%) |Others=0.09 (4.25%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.34 +[2023-04-14 09:01:51,930] [INFO] [logging.py:96:log_dist] [Rank 0] step=390, skipped=7, lr=[9.62138920354481e-06, 9.62138920354481e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:01:51,948] [INFO] [timer.py:199:stop] epoch=0/micro_step=390/global_step=390, RunningAvgSamplesPerSec=103.55384594517648, CurrSamplesPerSec=120.64981208262432, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:01:52,041] [INFO] [logging.py:96:log_dist] [Rank 0] step=390, skipped=7, lr=[4.985175753132026e-06, 4.985175753132026e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 389|ppo_ep: 1|act_loss: -0.0217132568359375|cri_loss: -0.01003265380859375|unsuper_loss: 0.0 +average reward score: 6.18359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.62%) |Training time=0.43s (19.72%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34 +epoch: 0|step: 390|ppo_ep: 1|act_loss: 0.026214599609375|cri_loss: 0.0140380859375|unsuper_loss: 0.0 +average reward score: 5.77734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.59%) |Training time=0.45s (20.80%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34 +epoch: 0|step: 391|ppo_ep: 1|act_loss: 0.0062713623046875|cri_loss: 0.00415802001953125|unsuper_loss: 0.0 +average reward score: 5.71484375 +------------------------------------------------------------------------------------- +|E2E latency=2.42s |Gather latency=0.00s (0.00%) |Generate time=1.63s (67.10%) |Training time=0.44s (18.30%) |Others=0.35 (14.61%)|CurSamplesPerSec=13.21 |AvgSamplesPerSec=14.34 +epoch: 0|step: 392|ppo_ep: 1|act_loss: -0.021392822265625|cri_loss: -0.0094757080078125|unsuper_loss: 0.0 +average reward score: 5.67578125 +------------------------------------------------------------------------------------- +|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.80s (76.61%) |Training time=0.45s (19.10%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.64 |AvgSamplesPerSec=14.34 +epoch: 0|step: 393|ppo_ep: 1|act_loss: 0.0036029815673828125|cri_loss: 0.0069122314453125|unsuper_loss: 0.0 +average reward score: 5.3828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.56%) |Training time=0.43s (19.89%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.34 +epoch: 0|step: 394|ppo_ep: 1|act_loss: 0.027252197265625|cri_loss: 0.0158843994140625|unsuper_loss: 0.0 +average reward score: 6.08984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.04%) |Training time=0.44s (20.35%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34 +epoch: 0|step: 395|ppo_ep: 1|act_loss: -0.0112457275390625|cri_loss: -0.00496673583984375|unsuper_loss: 0.0 +average reward score: 5.51953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.13%) |Training time=0.44s (20.23%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34 +epoch: 0|step: 396|ppo_ep: 1|act_loss: -0.03106689453125|cri_loss: -0.014862060546875|unsuper_loss: 0.0 +average reward score: 4.6015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.42%) |Training time=0.43s (19.95%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.34 +epoch: 0|step: 397|ppo_ep: 1|act_loss: -0.04022216796875|cri_loss: -0.019287109375|unsuper_loss: 0.0 +average reward score: 5.375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.22%) |Training time=0.44s (20.05%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34 +epoch: 0|step: 398|ppo_ep: 1|act_loss: -0.04376220703125|cri_loss: -0.0200653076171875|unsuper_loss: 0.0 +average reward score: 5.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.98%) |Training time=0.44s (20.24%) |Others=0.11 (4.78%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.34 +[2023-04-14 09:02:14,207] [INFO] [logging.py:96:log_dist] [Rank 0] step=400, skipped=7, lr=[9.619333697738975e-06, 9.619333697738975e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:02:14,225] [INFO] [timer.py:199:stop] epoch=0/micro_step=400/global_step=400, RunningAvgSamplesPerSec=103.79039135515188, CurrSamplesPerSec=104.92432531312882, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:02:14,317] [INFO] [logging.py:96:log_dist] [Rank 0] step=400, skipped=7, lr=[4.984110724217086e-06, 4.984110724217086e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 399|ppo_ep: 1|act_loss: -0.012847900390625|cri_loss: -0.004749298095703125|unsuper_loss: 0.0 +average reward score: 5.34765625 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.21%) |Training time=0.47s (20.49%) |Others=0.10 (4.30%)|CurSamplesPerSec=14.01 |AvgSamplesPerSec=14.34 +epoch: 0|step: 400|ppo_ep: 1|act_loss: 0.0682373046875|cri_loss: 0.0369873046875|unsuper_loss: 0.0 +average reward score: 5.3828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.58%) |Training time=0.45s (20.79%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34 +epoch: 0|step: 401|ppo_ep: 1|act_loss: 0.024078369140625|cri_loss: 0.012664794921875|unsuper_loss: 0.0 +average reward score: 5.3203125 +------------------------------------------------------------------------------------- +|E2E latency=2.59s |Gather latency=0.00s (0.00%) |Generate time=1.62s (62.63%) |Training time=0.45s (17.50%) |Others=0.51 (19.86%)|CurSamplesPerSec=12.36 |AvgSamplesPerSec=14.34 +epoch: 0|step: 402|ppo_ep: 1|act_loss: 0.0159912109375|cri_loss: 0.0086822509765625|unsuper_loss: 0.0 +average reward score: 5.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.37%) |Training time=0.46s (21.03%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.34 +epoch: 0|step: 403|ppo_ep: 1|act_loss: 0.021636962890625|cri_loss: 0.0117950439453125|unsuper_loss: 0.0 +average reward score: 5.29296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.52%) |Training time=0.43s (19.83%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.34 +epoch: 0|step: 404|ppo_ep: 1|act_loss: 0.058624267578125|cri_loss: 0.03216552734375|unsuper_loss: 0.0 +average reward score: 5.73828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.61%) |Training time=0.43s (19.75%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.34 +epoch: 0|step: 405|ppo_ep: 1|act_loss: 0.02593994140625|cri_loss: 0.0146026611328125|unsuper_loss: 0.0 +average reward score: 5.6875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.85%) |Training time=0.45s (20.62%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34 +epoch: 0|step: 406|ppo_ep: 1|act_loss: 0.006076812744140625|cri_loss: 0.006511688232421875|unsuper_loss: 0.0 +average reward score: 6.0859375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.57%) |Training time=0.45s (20.82%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.34 +epoch: 0|step: 407|ppo_ep: 1|act_loss: -0.01947021484375|cri_loss: -0.00719451904296875|unsuper_loss: 0.0 +average reward score: 5.48046875 +------------------------------------------------------------------------------------- +|E2E latency=3.09s |Gather latency=0.00s (0.00%) |Generate time=1.78s (57.80%) |Training time=0.45s (14.56%) |Others=0.85 (27.64%)|CurSamplesPerSec=10.36 |AvgSamplesPerSec=14.33 +epoch: 0|step: 408|ppo_ep: 1|act_loss: 0.00424957275390625|cri_loss: 0.0063934326171875|unsuper_loss: 0.0 +average reward score: 5.99609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.50%) |Training time=0.45s (20.87%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.33 +[2023-04-14 09:02:37,266] [INFO] [logging.py:96:log_dist] [Rank 0] step=410, skipped=7, lr=[9.617207128342042e-06, 9.617207128342042e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:02:37,284] [INFO] [timer.py:199:stop] epoch=0/micro_step=410/global_step=410, RunningAvgSamplesPerSec=103.98547538695068, CurrSamplesPerSec=111.4699448786658, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:02:37,377] [INFO] [logging.py:96:log_dist] [Rank 0] step=410, skipped=7, lr=[4.983008874788623e-06, 4.983008874788623e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 409|ppo_ep: 1|act_loss: -0.04351806640625|cri_loss: -0.019500732421875|unsuper_loss: 0.0 +average reward score: 5.24609375 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.51%) |Training time=0.45s (20.31%) |Others=0.14 (6.18%)|CurSamplesPerSec=14.43 |AvgSamplesPerSec=14.33 +epoch: 0|step: 410|ppo_ep: 1|act_loss: -0.032745361328125|cri_loss: -0.0156402587890625|unsuper_loss: 0.0 +average reward score: 6.37890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.57%) |Training time=0.45s (20.81%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33 +epoch: 0|step: 411|ppo_ep: 1|act_loss: -0.0243377685546875|cri_loss: -0.0116424560546875|unsuper_loss: 0.0 +average reward score: 5.6015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.64%) |Training time=0.45s (20.77%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.33 +epoch: 0|step: 412|ppo_ep: 1|act_loss: 0.04522705078125|cri_loss: 0.0237884521484375|unsuper_loss: 0.0 +average reward score: 5.68359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.64%) |Training time=0.45s (20.70%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.33 +epoch: 0|step: 413|ppo_ep: 1|act_loss: -0.031341552734375|cri_loss: -0.01459503173828125|unsuper_loss: 0.0 +average reward score: 5.93359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.86%) |Training time=0.45s (20.51%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.33 +epoch: 0|step: 414|ppo_ep: 1|act_loss: 0.01171875|cri_loss: 0.0071258544921875|unsuper_loss: 0.0 +average reward score: 5.69140625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.72%) |Training time=0.45s (20.67%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.34 +epoch: 0|step: 415|ppo_ep: 1|act_loss: -0.045379638671875|cri_loss: -0.0215301513671875|unsuper_loss: 0.0 +average reward score: 7.15234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.78%) |Training time=0.45s (20.70%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.34 +epoch: 0|step: 416|ppo_ep: 1|act_loss: -0.01280975341796875|cri_loss: -0.0058746337890625|unsuper_loss: 0.0 +average reward score: 6.015625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.71%) |Training time=0.45s (20.66%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.34 +epoch: 0|step: 417|ppo_ep: 1|act_loss: -0.0185699462890625|cri_loss: -0.00850677490234375|unsuper_loss: 0.0 +average reward score: 6.12890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.84%) |Training time=0.45s (20.52%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34 +epoch: 0|step: 418|ppo_ep: 1|act_loss: 0.0283050537109375|cri_loss: 0.0148162841796875|unsuper_loss: 0.0 +average reward score: 6.3359375 +------------------------------------------------------------------------------------- +|E2E latency=2.54s |Gather latency=0.00s (0.00%) |Generate time=1.63s (64.15%) |Training time=0.44s (17.27%) |Others=0.47 (18.57%)|CurSamplesPerSec=12.61 |AvgSamplesPerSec=14.33 +[2023-04-14 09:02:59,407] [INFO] [logging.py:96:log_dist] [Rank 0] step=420, skipped=7, lr=[9.615009526874895e-06, 9.615009526874895e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:02:59,425] [INFO] [timer.py:199:stop] epoch=0/micro_step=420/global_step=420, RunningAvgSamplesPerSec=104.16394471702999, CurrSamplesPerSec=110.71603520667838, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:02:59,518] [INFO] [logging.py:96:log_dist] [Rank 0] step=420, skipped=7, lr=[4.981870221178703e-06, 4.981870221178703e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 419|ppo_ep: 1|act_loss: 0.04669189453125|cri_loss: 0.025634765625|unsuper_loss: 0.0 +average reward score: 5.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.55%) |Training time=0.45s (20.82%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33 +epoch: 0|step: 420|ppo_ep: 1|act_loss: 0.01861572265625|cri_loss: 0.0119781494140625|unsuper_loss: 0.0 +average reward score: 5.6953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.79%) |Training time=0.45s (20.54%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34 +epoch: 0|step: 421|ppo_ep: 1|act_loss: 0.04266357421875|cri_loss: 0.023651123046875|unsuper_loss: 0.0 +average reward score: 5.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.76%) |Training time=0.45s (20.71%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.34 +epoch: 0|step: 422|ppo_ep: 1|act_loss: -0.026763916015625|cri_loss: -0.01251983642578125|unsuper_loss: 0.0 +average reward score: 6.8046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.66%) |Training time=0.45s (20.65%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34 +epoch: 0|step: 423|ppo_ep: 1|act_loss: -0.02587890625|cri_loss: -0.011993408203125|unsuper_loss: 0.0 +average reward score: 5.5390625 +------------------------------------------------------------------------------------- +|E2E latency=3.01s |Gather latency=0.00s (0.00%) |Generate time=1.79s (59.41%) |Training time=0.45s (14.82%) |Others=0.78 (25.77%)|CurSamplesPerSec=10.61 |AvgSamplesPerSec=14.33 +epoch: 0|step: 424|ppo_ep: 1|act_loss: 0.0005359649658203125|cri_loss: 0.0005984306335449219|unsuper_loss: 0.0 +average reward score: 6.15234375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.89%) |Training time=0.45s (20.58%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.33 +epoch: 0|step: 425|ppo_ep: 1|act_loss: 0.05877685546875|cri_loss: 0.031890869140625|unsuper_loss: 0.0 +average reward score: 6.1171875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.81%) |Training time=0.45s (20.67%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.33 +epoch: 0|step: 426|ppo_ep: 1|act_loss: 0.00269317626953125|cri_loss: 0.0024261474609375|unsuper_loss: 0.0 +average reward score: 7.10546875 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.81%) |Training time=0.46s (20.73%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.51 |AvgSamplesPerSec=14.33 +epoch: 0|step: 427|ppo_ep: 1|act_loss: -0.010650634765625|cri_loss: -0.0042266845703125|unsuper_loss: 0.0 +average reward score: 6.15625 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.66s (73.85%) |Training time=0.46s (20.32%) |Others=0.13 (5.83%)|CurSamplesPerSec=14.25 |AvgSamplesPerSec=14.33 +epoch: 0|step: 428|ppo_ep: 1|act_loss: -0.0057525634765625|cri_loss: -0.0024261474609375|unsuper_loss: 0.0 +average reward score: 7.1171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.80%) |Training time=0.45s (20.66%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.33 +[2023-04-14 09:03:22,100] [INFO] [logging.py:96:log_dist] [Rank 0] step=430, skipped=7, lr=[9.612740925911291e-06, 9.612740925911291e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:03:22,118] [INFO] [timer.py:199:stop] epoch=0/micro_step=430/global_step=430, RunningAvgSamplesPerSec=104.3303442174363, CurrSamplesPerSec=114.00963432544857, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:03:22,211] [INFO] [logging.py:96:log_dist] [Rank 0] step=430, skipped=7, lr=[4.980694780264918e-06, 4.980694780264918e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 429|ppo_ep: 1|act_loss: -0.003879547119140625|cri_loss: -0.0010061264038085938|unsuper_loss: 0.0 +average reward score: 6.046875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.17%) |Training time=0.44s (20.33%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.33 +epoch: 0|step: 430|ppo_ep: 1|act_loss: 0.00652313232421875|cri_loss: 0.00467681884765625|unsuper_loss: 0.0 +average reward score: 6.27734375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.45s (20.86%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.33 +epoch: 0|step: 431|ppo_ep: 1|act_loss: 0.0018978118896484375|cri_loss: 0.0019092559814453125|unsuper_loss: 0.0 +average reward score: 6.4453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.75%) |Training time=0.45s (20.71%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.33 +epoch: 0|step: 432|ppo_ep: 1|act_loss: 0.007190704345703125|cri_loss: 0.00390625|unsuper_loss: 0.0 +average reward score: 6.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.80%) |Training time=0.45s (20.65%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.33 +epoch: 0|step: 433|ppo_ep: 1|act_loss: 0.003032684326171875|cri_loss: 0.002223968505859375|unsuper_loss: 0.0 +average reward score: 6.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.09%) |Training time=0.44s (20.35%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.33 +epoch: 0|step: 434|ppo_ep: 1|act_loss: -0.058868408203125|cri_loss: -0.0277099609375|unsuper_loss: 0.0 +average reward score: 5.90234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.79%) |Training time=0.45s (20.68%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.33 +epoch: 0|step: 435|ppo_ep: 1|act_loss: -0.022735595703125|cri_loss: -0.01020050048828125|unsuper_loss: 0.0 +average reward score: 6.27734375 +------------------------------------------------------------------------------------- +|E2E latency=2.41s |Gather latency=0.00s (0.00%) |Generate time=1.62s (67.27%) |Training time=0.45s (18.76%) |Others=0.34 (13.97%)|CurSamplesPerSec=13.27 |AvgSamplesPerSec=14.33 +epoch: 0|step: 436|ppo_ep: 1|act_loss: 0.053619384765625|cri_loss: 0.02984619140625|unsuper_loss: 0.0 +average reward score: 6.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.81%) |Training time=0.45s (20.67%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33 +epoch: 0|step: 437|ppo_ep: 1|act_loss: 0.01629638671875|cri_loss: 0.01190948486328125|unsuper_loss: 0.0 +average reward score: 6.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.74%) |Training time=0.44s (19.03%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.33 +epoch: 0|step: 438|ppo_ep: 1|act_loss: -0.07861328125|cri_loss: -0.0305023193359375|unsuper_loss: 0.0 +average reward score: 6.1796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.73%) |Training time=0.45s (20.73%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33 +[2023-04-14 09:03:44,233] [INFO] [logging.py:96:log_dist] [Rank 0] step=440, skipped=7, lr=[9.61040135907737e-06, 9.61040135907737e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:03:44,251] [INFO] [timer.py:199:stop] epoch=0/micro_step=440/global_step=440, RunningAvgSamplesPerSec=104.48292126996193, CurrSamplesPerSec=108.55886356632661, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:03:44,344] [INFO] [logging.py:96:log_dist] [Rank 0] step=440, skipped=7, lr=[4.97948256947014e-06, 4.97948256947014e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 439|ppo_ep: 1|act_loss: -0.0447998046875|cri_loss: -0.0199737548828125|unsuper_loss: 0.0 +average reward score: 6.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.47%) |Training time=0.46s (21.03%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.33 +epoch: 0|step: 440|ppo_ep: 1|act_loss: 0.052581787109375|cri_loss: 0.0293731689453125|unsuper_loss: 0.0 +average reward score: 5.6640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.96%) |Training time=0.44s (20.49%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.33 +epoch: 0|step: 441|ppo_ep: 1|act_loss: 0.05633544921875|cri_loss: 0.03546142578125|unsuper_loss: 0.0 +average reward score: 5.51953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.06%) |Training time=0.44s (20.41%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.33 +epoch: 0|step: 442|ppo_ep: 1|act_loss: -0.0230560302734375|cri_loss: 0.0008544921875|unsuper_loss: 0.0 +average reward score: 5.3671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.69%) |Training time=0.45s (20.77%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33 +epoch: 0|step: 443|ppo_ep: 1|act_loss: 0.016937255859375|cri_loss: 0.011688232421875|unsuper_loss: 0.0 +average reward score: 6.484375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.45s (20.87%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.34 +epoch: 0|step: 444|ppo_ep: 1|act_loss: 0.0214691162109375|cri_loss: 0.01448822021484375|unsuper_loss: 0.0 +average reward score: 4.859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.07%) |Training time=0.44s (20.37%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.34 +epoch: 0|step: 445|ppo_ep: 1|act_loss: -0.0421142578125|cri_loss: -0.016326904296875|unsuper_loss: 0.0 +average reward score: 6.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.77%) |Training time=0.45s (20.70%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34 +epoch: 0|step: 446|ppo_ep: 1|act_loss: 0.120361328125|cri_loss: 0.064453125|unsuper_loss: 0.0 +average reward score: 5.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.28%) |Training time=0.45s (20.36%) |Others=0.14 (6.36%)|CurSamplesPerSec=14.44 |AvgSamplesPerSec=14.34 +epoch: 0|step: 447|ppo_ep: 1|act_loss: -0.0311126708984375|cri_loss: -0.0119476318359375|unsuper_loss: 0.0 +average reward score: 5.62109375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.79%) |Training time=0.45s (20.65%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.34 +epoch: 0|step: 448|ppo_ep: 1|act_loss: -0.024383544921875|cri_loss: -0.00986480712890625|unsuper_loss: 0.0 +average reward score: 5.66796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.41%) |Training time=0.43s (20.04%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34 +[2023-04-14 09:04:05,991] [INFO] [logging.py:96:log_dist] [Rank 0] step=450, skipped=7, lr=[9.607990861051154e-06, 9.607990861051154e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:04:06,009] [INFO] [timer.py:199:stop] epoch=0/micro_step=450/global_step=450, RunningAvgSamplesPerSec=104.64871045065954, CurrSamplesPerSec=110.18387111339149, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:04:06,104] [INFO] [logging.py:96:log_dist] [Rank 0] step=450, skipped=7, lr=[4.978233606762256e-06, 4.978233606762256e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 449|ppo_ep: 1|act_loss: -0.11602783203125|cri_loss: -0.0455322265625|unsuper_loss: 0.0 +average reward score: 5.38671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.53%) |Training time=0.45s (20.86%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34 +epoch: 0|step: 450|ppo_ep: 1|act_loss: 0.019927978515625|cri_loss: 0.0127716064453125|unsuper_loss: 0.0 +average reward score: 6.1171875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.35%) |Training time=0.46s (21.11%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.34 +epoch: 0|step: 451|ppo_ep: 1|act_loss: 0.113037109375|cri_loss: 0.065673828125|unsuper_loss: 0.0 +average reward score: 5.57421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.80%) |Training time=0.45s (20.68%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34 +epoch: 0|step: 452|ppo_ep: 1|act_loss: 0.041015625|cri_loss: 0.0259552001953125|unsuper_loss: 0.0 +average reward score: 6.6640625 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.80s (76.88%) |Training time=0.44s (18.90%) |Others=0.10 (4.21%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.34 +epoch: 0|step: 453|ppo_ep: 1|act_loss: -0.0408935546875|cri_loss: -0.0171661376953125|unsuper_loss: 0.0 +average reward score: 6.30859375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.98%) |Training time=0.45s (20.53%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.34 +epoch: 0|step: 454|ppo_ep: 1|act_loss: -0.0245361328125|cri_loss: -0.01012420654296875|unsuper_loss: 0.0 +average reward score: 5.98046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.83%) |Training time=0.45s (20.65%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34 +epoch: 0|step: 455|ppo_ep: 1|act_loss: 0.012359619140625|cri_loss: 0.01100921630859375|unsuper_loss: 0.0 +average reward score: 6.6953125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.75%) |Training time=0.45s (20.72%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.34 +epoch: 0|step: 456|ppo_ep: 1|act_loss: 0.0537109375|cri_loss: 0.0309906005859375|unsuper_loss: 0.0 +average reward score: 6.28515625 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.13%) |Training time=0.45s (20.41%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.51 |AvgSamplesPerSec=14.34 +epoch: 0|step: 457|ppo_ep: 1|act_loss: 0.038177490234375|cri_loss: 0.026824951171875|unsuper_loss: 0.0 +average reward score: 6.890625 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.73s (75.83%) |Training time=0.45s (19.71%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.04 |AvgSamplesPerSec=14.34 +epoch: 0|step: 458|ppo_ep: 1|act_loss: 0.062255859375|cri_loss: 0.0347900390625|unsuper_loss: 0.0 +average reward score: 6.5703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.63%) |Training time=0.45s (20.83%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34 +[2023-04-14 09:04:28,063] [INFO] [logging.py:96:log_dist] [Rank 0] step=460, skipped=7, lr=[9.60550946756204e-06, 9.60550946756204e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:04:28,081] [INFO] [timer.py:199:stop] epoch=0/micro_step=460/global_step=460, RunningAvgSamplesPerSec=104.78282677177035, CurrSamplesPerSec=112.42672956817755, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:04:28,174] [INFO] [logging.py:96:log_dist] [Rank 0] step=460, skipped=7, lr=[4.976947910653907e-06, 4.976947910653907e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 459|ppo_ep: 1|act_loss: 0.0418701171875|cri_loss: 0.0247802734375|unsuper_loss: 0.0 +average reward score: 6.96875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.85%) |Training time=0.45s (20.62%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34 +epoch: 0|step: 460|ppo_ep: 1|act_loss: -0.0098114013671875|cri_loss: 0.0029449462890625|unsuper_loss: 0.0 +average reward score: 7.3515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.78%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.34 +epoch: 0|step: 461|ppo_ep: 1|act_loss: 0.038818359375|cri_loss: 0.021240234375|unsuper_loss: 0.0 +average reward score: 6.46484375 +------------------------------------------------------------------------------------- +|E2E latency=3.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (50.91%) |Training time=0.45s (14.09%) |Others=1.12 (35.00%)|CurSamplesPerSec=10.02 |AvgSamplesPerSec=14.33 +epoch: 0|step: 462|ppo_ep: 1|act_loss: -0.0008087158203125|cri_loss: 0.004665374755859375|unsuper_loss: 0.0 +average reward score: 6.25 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.11%) |Training time=0.44s (20.37%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.33 +epoch: 0|step: 463|ppo_ep: 1|act_loss: -0.00408172607421875|cri_loss: 0.0038604736328125|unsuper_loss: 0.0 +average reward score: 6.27734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.30%) |Training time=0.44s (20.13%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.33 +epoch: 0|step: 464|ppo_ep: 1|act_loss: -0.07135009765625|cri_loss: -0.0275421142578125|unsuper_loss: 0.0 +average reward score: 6.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.95%) |Training time=0.45s (20.52%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.33 +epoch: 0|step: 465|ppo_ep: 1|act_loss: 0.051300048828125|cri_loss: 0.0270538330078125|unsuper_loss: 0.0 +average reward score: 5.9921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.74%) |Training time=0.45s (20.72%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.33 +epoch: 0|step: 466|ppo_ep: 1|act_loss: 0.06756591796875|cri_loss: 0.042694091796875|unsuper_loss: 0.0 +average reward score: 6.3125 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.65%) |Training time=0.45s (19.12%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.33 +epoch: 0|step: 467|ppo_ep: 1|act_loss: -0.0213623046875|cri_loss: -0.00701141357421875|unsuper_loss: 0.0 +average reward score: 7.28515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.73%) |Training time=0.45s (20.73%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33 +epoch: 0|step: 468|ppo_ep: 1|act_loss: 0.037811279296875|cri_loss: 0.0226287841796875|unsuper_loss: 0.0 +average reward score: 6.66015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.99%) |Training time=0.44s (20.47%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.33 +[2023-04-14 09:04:50,963] [INFO] [logging.py:96:log_dist] [Rank 0] step=470, skipped=7, lr=[9.602957215390267e-06, 9.602957215390267e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:04:50,981] [INFO] [timer.py:199:stop] epoch=0/micro_step=470/global_step=470, RunningAvgSamplesPerSec=104.94226242948741, CurrSamplesPerSec=112.22189910894872, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:04:51,073] [INFO] [logging.py:96:log_dist] [Rank 0] step=470, skipped=7, lr=[4.9756255002022105e-06, 4.9756255002022105e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 469|ppo_ep: 1|act_loss: 0.03900146484375|cri_loss: 0.0225982666015625|unsuper_loss: 0.0 +average reward score: 6.65234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.87%) |Training time=0.45s (20.62%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34 +epoch: 0|step: 470|ppo_ep: 1|act_loss: -0.027069091796875|cri_loss: -0.00945281982421875|unsuper_loss: 0.0 +average reward score: 5.79296875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.29%) |Training time=0.44s (20.18%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.34 +epoch: 0|step: 471|ppo_ep: 1|act_loss: -0.000213623046875|cri_loss: 0.00934600830078125|unsuper_loss: 0.0 +average reward score: 5.99609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.66%) |Training time=0.45s (20.81%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34 +epoch: 0|step: 472|ppo_ep: 1|act_loss: 0.05279541015625|cri_loss: 0.028594970703125|unsuper_loss: 0.0 +average reward score: 6.9375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.19%) |Training time=0.44s (20.25%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34 +epoch: 0|step: 473|ppo_ep: 1|act_loss: -0.17431640625|cri_loss: -0.073974609375|unsuper_loss: 0.0 +average reward score: 5.41015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.70%) |Training time=0.45s (20.79%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.34 +[2023-04-14 09:05:01,823] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, but hysteresis is 2. Reducing hysteresis to 1 +[2023-04-14 09:05:01,909] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384 +epoch: 0|step: 474|ppo_ep: 1|act_loss: -0.1414794921875|cri_loss: -0.056488037109375|unsuper_loss: 0.0 +average reward score: 5.6796875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.93%) |Training time=0.43s (19.89%) |Others=0.09 (4.18%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.34 +epoch: 0|step: 475|ppo_ep: 1|act_loss: 0.017120361328125|cri_loss: 0.01279449462890625|unsuper_loss: 0.0 +average reward score: 6.078125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.76%) |Training time=0.45s (20.71%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.34 +[2023-04-14 09:05:06,139] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096 +epoch: 0|step: 476|ppo_ep: 1|act_loss: -0.20263671875|cri_loss: -0.08056640625|unsuper_loss: 0.0 +average reward score: 4.42578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.71%) |Training time=0.42s (19.72%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.34 +epoch: 0|step: 477|ppo_ep: 1|act_loss: -0.1700439453125|cri_loss: -0.07275390625|unsuper_loss: 0.0 +average reward score: 5.1328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.82%) |Training time=0.45s (20.64%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34 +epoch: 0|step: 478|ppo_ep: 1|act_loss: 0.039215087890625|cri_loss: 0.0303955078125|unsuper_loss: 0.0 +average reward score: 3.51953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.19%) |Training time=0.44s (20.26%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.34 +[2023-04-14 09:05:12,638] [INFO] [logging.py:96:log_dist] [Rank 0] step=480, skipped=9, lr=[9.600864420788175e-06, 9.600864420788175e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:05:12,656] [INFO] [timer.py:199:stop] epoch=0/micro_step=480/global_step=480, RunningAvgSamplesPerSec=105.12317312034696, CurrSamplesPerSec=110.69512469701006, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:05:12,749] [INFO] [logging.py:96:log_dist] [Rank 0] step=480, skipped=8, lr=[4.9744039562213675e-06, 4.9744039562213675e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 479|ppo_ep: 1|act_loss: 0.248291015625|cri_loss: 0.168701171875|unsuper_loss: 0.0 +average reward score: 2.625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.72%) |Training time=0.45s (20.78%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.34 +epoch: 0|step: 480|ppo_ep: 1|act_loss: 0.0872802734375|cri_loss: 0.067626953125|unsuper_loss: 0.0 +average reward score: 2.62109375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.40%) |Training time=0.46s (21.10%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.34 +epoch: 0|step: 481|ppo_ep: 1|act_loss: -0.034820556640625|cri_loss: -0.014739990234375|unsuper_loss: 0.0 +average reward score: 3.9765625 +------------------------------------------------------------------------------------- +|E2E latency=3.11s |Gather latency=0.00s (0.00%) |Generate time=1.80s (57.93%) |Training time=0.45s (14.32%) |Others=0.86 (27.75%)|CurSamplesPerSec=10.28 |AvgSamplesPerSec=14.33 +epoch: 0|step: 482|ppo_ep: 1|act_loss: -0.1807861328125|cri_loss: -0.0804443359375|unsuper_loss: 0.0 +average reward score: 3.55859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.33%) |Training time=0.44s (20.12%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.33 +epoch: 0|step: 483|ppo_ep: 1|act_loss: 0.0008697509765625|cri_loss: 0.007511138916015625|unsuper_loss: 0.0 +average reward score: 4.078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.25%) |Training time=0.44s (20.22%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.33 +epoch: 0|step: 484|ppo_ep: 1|act_loss: 0.034332275390625|cri_loss: 0.033843994140625|unsuper_loss: 0.0 +average reward score: 4.1328125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.97%) |Training time=0.44s (20.25%) |Others=0.10 (4.78%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.34 +epoch: 0|step: 485|ppo_ep: 1|act_loss: 0.0885009765625|cri_loss: 0.05035400390625|unsuper_loss: 0.0 +average reward score: 4.46484375 +------------------------------------------------------------------------------------- +|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.75s (76.50%) |Training time=0.44s (19.20%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.99 |AvgSamplesPerSec=14.33 +epoch: 0|step: 486|ppo_ep: 1|act_loss: 0.040283203125|cri_loss: 0.024200439453125|unsuper_loss: 0.0 +average reward score: 4.3359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.34%) |Training time=0.44s (20.12%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34 +epoch: 0|step: 487|ppo_ep: 1|act_loss: -0.05267333984375|cri_loss: -0.022125244140625|unsuper_loss: 0.0 +average reward score: 4.96484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.34%) |Training time=0.44s (20.12%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.34 +epoch: 0|step: 488|ppo_ep: 1|act_loss: 0.0249786376953125|cri_loss: 0.03216552734375|unsuper_loss: 0.0 +average reward score: 4.9453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.36%) |Training time=0.44s (20.09%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34 +[2023-04-14 09:05:35,437] [INFO] [logging.py:96:log_dist] [Rank 0] step=490, skipped=9, lr=[9.598184719026e-06, 9.598184719026e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:05:35,455] [INFO] [timer.py:199:stop] epoch=0/micro_step=490/global_step=490, RunningAvgSamplesPerSec=105.31313206740428, CurrSamplesPerSec=116.99643739730386, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:05:35,547] [INFO] [logging.py:96:log_dist] [Rank 0] step=490, skipped=8, lr=[4.973011842968471e-06, 4.973011842968471e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 489|ppo_ep: 1|act_loss: -0.0692138671875|cri_loss: -0.027069091796875|unsuper_loss: 0.0 +average reward score: 4.6328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.38%) |Training time=0.44s (20.10%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34 +epoch: 0|step: 490|ppo_ep: 1|act_loss: 0.021575927734375|cri_loss: 0.01507568359375|unsuper_loss: 0.0 +average reward score: 5.07421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.37%) |Training time=0.44s (20.09%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.34 +epoch: 0|step: 491|ppo_ep: 1|act_loss: 0.0997314453125|cri_loss: 0.055267333984375|unsuper_loss: 0.0 +average reward score: 6.04296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.50%) |Training time=0.43s (19.98%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34 +epoch: 0|step: 492|ppo_ep: 1|act_loss: 0.054840087890625|cri_loss: 0.03228759765625|unsuper_loss: 0.0 +average reward score: 5.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.90%) |Training time=0.42s (19.56%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34 +epoch: 0|step: 493|ppo_ep: 1|act_loss: 0.181884765625|cri_loss: 0.099365234375|unsuper_loss: 0.0 +average reward score: 4.8984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.31%) |Training time=0.44s (20.14%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34 +epoch: 0|step: 494|ppo_ep: 1|act_loss: 0.259765625|cri_loss: 0.1429443359375|unsuper_loss: 0.0 +average reward score: 4.8203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.40%) |Training time=0.44s (20.06%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34 +epoch: 0|step: 495|ppo_ep: 1|act_loss: -0.0028533935546875|cri_loss: 0.007110595703125|unsuper_loss: 0.0 +average reward score: 5.30859375 +------------------------------------------------------------------------------------- +|E2E latency=3.01s |Gather latency=0.00s (0.00%) |Generate time=1.65s (54.93%) |Training time=0.53s (17.57%) |Others=0.83 (27.49%)|CurSamplesPerSec=10.64 |AvgSamplesPerSec=14.33 +epoch: 0|step: 496|ppo_ep: 1|act_loss: 0.019744873046875|cri_loss: 0.023468017578125|unsuper_loss: 0.0 +average reward score: 4.9453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.93%) |Training time=0.45s (20.52%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33 +epoch: 0|step: 497|ppo_ep: 1|act_loss: -0.0919189453125|cri_loss: -0.03338623046875|unsuper_loss: 0.0 +average reward score: 6.00390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.66%) |Training time=0.45s (20.80%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33 +epoch: 0|step: 498|ppo_ep: 1|act_loss: -0.146728515625|cri_loss: -0.05950927734375|unsuper_loss: 0.0 +average reward score: 4.94921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.63%) |Training time=0.45s (20.84%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.33 +[2023-04-14 09:05:57,994] [INFO] [logging.py:96:log_dist] [Rank 0] step=500, skipped=9, lr=[9.595434267151607e-06, 9.595434267151607e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:05:58,012] [INFO] [timer.py:199:stop] epoch=0/micro_step=500/global_step=500, RunningAvgSamplesPerSec=105.46009089853679, CurrSamplesPerSec=114.55113153375825, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:05:58,105] [INFO] [logging.py:96:log_dist] [Rank 0] step=500, skipped=8, lr=[4.971583073714247e-06, 4.971583073714247e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 499|ppo_ep: 1|act_loss: -0.0750732421875|cri_loss: -0.0311279296875|unsuper_loss: 0.0 +average reward score: 5.1328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.10%) |Training time=0.44s (20.35%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.33 +epoch: 0|step: 500|ppo_ep: 1|act_loss: -0.033935546875|cri_loss: -0.014617919921875|unsuper_loss: 0.0 +average reward score: 4.71875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.77%) |Training time=0.45s (20.69%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.34 +epoch: 0|step: 501|ppo_ep: 1|act_loss: 0.173583984375|cri_loss: 0.0955810546875|unsuper_loss: 0.0 +average reward score: 3.822265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.75%) |Training time=0.45s (20.70%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34 +epoch: 0|step: 502|ppo_ep: 1|act_loss: 0.10595703125|cri_loss: 0.0550537109375|unsuper_loss: 0.0 +average reward score: 3.6640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.89%) |Training time=0.45s (20.58%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.34 +epoch: 0|step: 503|ppo_ep: 1|act_loss: 0.12017822265625|cri_loss: 0.0670166015625|unsuper_loss: 0.0 +average reward score: 4.44140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.75%) |Training time=0.45s (20.71%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.34 +epoch: 0|step: 504|ppo_ep: 1|act_loss: -0.024993896484375|cri_loss: -0.0099639892578125|unsuper_loss: 0.0 +average reward score: 6.3515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.05%) |Training time=0.44s (20.41%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34 +epoch: 0|step: 505|ppo_ep: 1|act_loss: -0.0570068359375|cri_loss: -0.0201263427734375|unsuper_loss: 0.0 +average reward score: 5.26171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.78%) |Training time=0.45s (20.66%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34 +epoch: 0|step: 506|ppo_ep: 1|act_loss: 0.01320648193359375|cri_loss: 0.00848388671875|unsuper_loss: 0.0 +average reward score: 5.80859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.74%) |Training time=0.45s (20.70%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34 +epoch: 0|step: 507|ppo_ep: 1|act_loss: 0.11767578125|cri_loss: 0.06378173828125|unsuper_loss: 0.0 +average reward score: 5.80078125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.16%) |Training time=0.42s (19.30%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.34 +epoch: 0|step: 508|ppo_ep: 1|act_loss: 0.20751953125|cri_loss: 0.11810302734375|unsuper_loss: 0.0 +average reward score: 6.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.82%) |Training time=0.45s (20.63%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34 +[2023-04-14 09:06:19,713] [INFO] [logging.py:96:log_dist] [Rank 0] step=510, skipped=9, lr=[9.592613105933331e-06, 9.592613105933331e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:06:19,731] [INFO] [timer.py:199:stop] epoch=0/micro_step=510/global_step=510, RunningAvgSamplesPerSec=105.59803688950436, CurrSamplesPerSec=110.03175743087647, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:06:19,824] [INFO] [logging.py:96:log_dist] [Rank 0] step=510, skipped=8, lr=[4.970117669636501e-06, 4.970117669636501e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 509|ppo_ep: 1|act_loss: 0.1334228515625|cri_loss: 0.0731201171875|unsuper_loss: 0.0 +average reward score: 5.1796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.56%) |Training time=0.45s (20.92%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34 +epoch: 0|step: 510|ppo_ep: 1|act_loss: 0.051361083984375|cri_loss: 0.03314208984375|unsuper_loss: 0.0 +average reward score: 6.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.64%) |Training time=0.45s (19.15%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.34 +epoch: 0|step: 511|ppo_ep: 1|act_loss: 0.033843994140625|cri_loss: 0.02349853515625|unsuper_loss: 0.0 +average reward score: 6.17578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.99%) |Training time=0.44s (20.46%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.34 +epoch: 0|step: 512|ppo_ep: 1|act_loss: -0.001708984375|cri_loss: 0.003505706787109375|unsuper_loss: 0.0 +average reward score: 5.96484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.53%) |Training time=0.45s (20.92%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.34 +epoch: 0|step: 513|ppo_ep: 1|act_loss: 0.069091796875|cri_loss: 0.04083251953125|unsuper_loss: 0.0 +average reward score: 5.02734375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.86%) |Training time=0.45s (20.61%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.34 +epoch: 0|step: 514|ppo_ep: 1|act_loss: 0.059661865234375|cri_loss: 0.032440185546875|unsuper_loss: 0.0 +average reward score: 4.9375 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.66s (73.99%) |Training time=0.49s (21.61%) |Others=0.10 (4.40%)|CurSamplesPerSec=14.25 |AvgSamplesPerSec=14.34 +epoch: 0|step: 515|ppo_ep: 1|act_loss: 0.04168701171875|cri_loss: 0.021697998046875|unsuper_loss: 0.0 +average reward score: 4.65234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.28%) |Training time=0.44s (20.17%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.34 +epoch: 0|step: 516|ppo_ep: 1|act_loss: -0.0787353515625|cri_loss: -0.03155517578125|unsuper_loss: 0.0 +average reward score: 4.28125 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.63s (70.07%) |Training time=0.44s (18.84%) |Others=0.26 (11.09%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.34 +epoch: 0|step: 517|ppo_ep: 1|act_loss: -0.0179443359375|cri_loss: -0.00739288330078125|unsuper_loss: 0.0 +average reward score: 4.2734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.09%) |Training time=0.44s (20.33%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.34 +epoch: 0|step: 518|ppo_ep: 1|act_loss: -0.05718994140625|cri_loss: -0.0237884521484375|unsuper_loss: 0.0 +average reward score: 4.625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.80%) |Training time=0.45s (20.66%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.34 +[2023-04-14 09:06:41,836] [INFO] [logging.py:96:log_dist] [Rank 0] step=520, skipped=9, lr=[9.589721277187583e-06, 9.589721277187583e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:06:41,854] [INFO] [timer.py:199:stop] epoch=0/micro_step=520/global_step=520, RunningAvgSamplesPerSec=105.71696360506587, CurrSamplesPerSec=113.11680590894035, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:06:41,947] [INFO] [logging.py:96:log_dist] [Rank 0] step=520, skipped=8, lr=[4.968615652456056e-06, 4.968615652456056e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 519|ppo_ep: 1|act_loss: -0.0887451171875|cri_loss: -0.041229248046875|unsuper_loss: 0.0 +average reward score: 4.7265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.98%) |Training time=0.45s (20.50%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.34 +epoch: 0|step: 520|ppo_ep: 1|act_loss: -0.027496337890625|cri_loss: -0.00603485107421875|unsuper_loss: 0.0 +average reward score: 4.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.76%) |Training time=0.45s (20.69%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.35 +epoch: 0|step: 521|ppo_ep: 1|act_loss: -0.048919677734375|cri_loss: -0.023101806640625|unsuper_loss: 0.0 +average reward score: 4.20703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.02%) |Training time=0.44s (20.44%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.35 +epoch: 0|step: 522|ppo_ep: 1|act_loss: 0.08306884765625|cri_loss: 0.04388427734375|unsuper_loss: 0.0 +average reward score: 4.59375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.75%) |Training time=0.43s (19.69%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.35 +epoch: 0|step: 523|ppo_ep: 1|act_loss: -0.028228759765625|cri_loss: -0.0107574462890625|unsuper_loss: 0.0 +average reward score: 5.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.22%) |Training time=0.44s (20.22%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.35 +epoch: 0|step: 524|ppo_ep: 1|act_loss: -0.03082275390625|cri_loss: -0.01323699951171875|unsuper_loss: 0.0 +average reward score: 5.1328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.44%) |Training time=0.46s (21.03%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.35 +epoch: 0|step: 525|ppo_ep: 1|act_loss: -0.05889892578125|cri_loss: -0.026611328125|unsuper_loss: 0.0 +average reward score: 5.53515625 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.84%) |Training time=0.44s (18.93%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.70 |AvgSamplesPerSec=14.35 +epoch: 0|step: 526|ppo_ep: 1|act_loss: 0.016693115234375|cri_loss: 0.01453399658203125|unsuper_loss: 0.0 +average reward score: 5.62890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.88%) |Training time=0.45s (20.57%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.35 +epoch: 0|step: 527|ppo_ep: 1|act_loss: 0.04449462890625|cri_loss: 0.025054931640625|unsuper_loss: 0.0 +average reward score: 6.07421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.64%) |Training time=0.45s (20.80%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.35 +epoch: 0|step: 528|ppo_ep: 1|act_loss: 0.0195465087890625|cri_loss: 0.01230621337890625|unsuper_loss: 0.0 +average reward score: 7.73046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.85%) |Training time=0.45s (20.59%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.35 +[2023-04-14 09:07:03,705] [INFO] [logging.py:96:log_dist] [Rank 0] step=530, skipped=9, lr=[9.586758823778245e-06, 9.586758823778245e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:07:03,723] [INFO] [timer.py:199:stop] epoch=0/micro_step=530/global_step=530, RunningAvgSamplesPerSec=105.85254516677392, CurrSamplesPerSec=112.33056644956756, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:07:03,816] [INFO] [logging.py:96:log_dist] [Rank 0] step=530, skipped=8, lr=[4.96707704443643e-06, 4.96707704443643e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 529|ppo_ep: 1|act_loss: 0.120849609375|cri_loss: 0.06866455078125|unsuper_loss: 0.0 +average reward score: 6.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.83%) |Training time=0.45s (20.64%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.35 +epoch: 0|step: 530|ppo_ep: 1|act_loss: 0.026214599609375|cri_loss: 0.016265869140625|unsuper_loss: 0.0 +average reward score: 5.6171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.73%) |Training time=0.45s (20.65%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.35 +epoch: 0|step: 531|ppo_ep: 1|act_loss: -0.0165557861328125|cri_loss: -0.006198883056640625|unsuper_loss: 0.0 +average reward score: 5.94921875 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.62s (71.93%) |Training time=0.46s (20.20%) |Others=0.18 (7.87%)|CurSamplesPerSec=14.20 |AvgSamplesPerSec=14.35 +epoch: 0|step: 532|ppo_ep: 1|act_loss: -0.03509521484375|cri_loss: -0.012542724609375|unsuper_loss: 0.0 +average reward score: 5.18359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.56%) |Training time=0.45s (20.89%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.35 +epoch: 0|step: 533|ppo_ep: 1|act_loss: 0.018951416015625|cri_loss: 0.0110321044921875|unsuper_loss: 0.0 +average reward score: 6.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.88%) |Training time=0.44s (20.55%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.35 +epoch: 0|step: 534|ppo_ep: 1|act_loss: 0.0035495758056640625|cri_loss: 0.0029582977294921875|unsuper_loss: 0.0 +average reward score: 5.83984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.72%) |Training time=0.45s (20.71%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.35 +epoch: 0|step: 535|ppo_ep: 1|act_loss: 0.0374755859375|cri_loss: 0.02105712890625|unsuper_loss: 0.0 +average reward score: 6.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.86%) |Training time=0.45s (20.63%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.35 +epoch: 0|step: 536|ppo_ep: 1|act_loss: -0.029327392578125|cri_loss: -0.01258087158203125|unsuper_loss: 0.0 +average reward score: 6.7578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.68%) |Training time=0.45s (20.76%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.35 +epoch: 0|step: 537|ppo_ep: 1|act_loss: 0.035675048828125|cri_loss: 0.0200042724609375|unsuper_loss: 0.0 +average reward score: 6.4296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.73%) |Training time=0.43s (19.70%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.35 +epoch: 0|step: 538|ppo_ep: 1|act_loss: 0.0178375244140625|cri_loss: 0.010833740234375|unsuper_loss: 0.0 +average reward score: 5.0 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.46%) |Training time=0.44s (20.02%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.36 +[2023-04-14 09:07:25,507] [INFO] [logging.py:96:log_dist] [Rank 0] step=540, skipped=9, lr=[9.583725789616017e-06, 9.583725789616017e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:07:25,525] [INFO] [timer.py:199:stop] epoch=0/micro_step=540/global_step=540, RunningAvgSamplesPerSec=105.97276576876709, CurrSamplesPerSec=111.52968940341991, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:07:25,618] [INFO] [logging.py:96:log_dist] [Rank 0] step=540, skipped=8, lr=[4.965501868383507e-06, 4.965501868383507e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 539|ppo_ep: 1|act_loss: 0.04998779296875|cri_loss: 0.0268096923828125|unsuper_loss: 0.0 +average reward score: 5.23046875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.81%) |Training time=0.45s (20.68%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.36 +epoch: 0|step: 540|ppo_ep: 1|act_loss: 0.0016565322875976562|cri_loss: 0.00145721435546875|unsuper_loss: 0.0 +average reward score: 5.8125 +------------------------------------------------------------------------------------- +|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.69s (73.78%) |Training time=0.50s (21.90%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.98 |AvgSamplesPerSec=14.36 +epoch: 0|step: 541|ppo_ep: 1|act_loss: -0.017913818359375|cri_loss: -0.0053253173828125|unsuper_loss: 0.0 +average reward score: 5.453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.86%) |Training time=0.45s (20.61%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.36 +epoch: 0|step: 542|ppo_ep: 1|act_loss: -0.090087890625|cri_loss: -0.04132080078125|unsuper_loss: 0.0 +average reward score: 6.328125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.99%) |Training time=0.45s (20.44%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.36 +epoch: 0|step: 543|ppo_ep: 1|act_loss: -0.0291748046875|cri_loss: -0.0136871337890625|unsuper_loss: 0.0 +average reward score: 5.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.48%) |Training time=0.44s (20.04%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.51 |AvgSamplesPerSec=14.36 +epoch: 0|step: 544|ppo_ep: 1|act_loss: -0.0049285888671875|cri_loss: -0.0016317367553710938|unsuper_loss: 0.0 +average reward score: 5.3046875 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.73s (75.89%) |Training time=0.45s (19.61%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.07 |AvgSamplesPerSec=14.36 +epoch: 0|step: 545|ppo_ep: 1|act_loss: 0.0345458984375|cri_loss: 0.01959228515625|unsuper_loss: 0.0 +average reward score: 5.33984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.14%) |Training time=0.44s (20.33%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.36 +epoch: 0|step: 546|ppo_ep: 1|act_loss: 0.030914306640625|cri_loss: 0.016998291015625|unsuper_loss: 0.0 +average reward score: 4.8125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.85%) |Training time=0.45s (20.61%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.36 +epoch: 0|step: 547|ppo_ep: 1|act_loss: -0.01776123046875|cri_loss: -0.00774383544921875|unsuper_loss: 0.0 +average reward score: 5.34375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.54%) |Training time=0.45s (20.92%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.36 +epoch: 0|step: 548|ppo_ep: 1|act_loss: -0.08251953125|cri_loss: -0.03662109375|unsuper_loss: 0.0 +average reward score: 5.484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.79%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.36 +[2023-04-14 09:07:47,483] [INFO] [logging.py:96:log_dist] [Rank 0] step=550, skipped=9, lr=[9.58062221965779e-06, 9.58062221965779e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:07:47,501] [INFO] [timer.py:199:stop] epoch=0/micro_step=550/global_step=550, RunningAvgSamplesPerSec=106.10481150577755, CurrSamplesPerSec=113.16315685191665, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:07:47,594] [INFO] [logging.py:96:log_dist] [Rank 0] step=550, skipped=8, lr=[4.963890147645195e-06, 4.963890147645195e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 549|ppo_ep: 1|act_loss: 0.0102081298828125|cri_loss: 0.0094757080078125|unsuper_loss: 0.0 +average reward score: 5.16796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.92%) |Training time=0.45s (20.56%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.36 +epoch: 0|step: 550|ppo_ep: 1|act_loss: 0.00433349609375|cri_loss: 0.0032825469970703125|unsuper_loss: 0.0 +average reward score: 5.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.92%) |Training time=0.45s (20.55%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.36 +epoch: 0|step: 551|ppo_ep: 1|act_loss: 0.11962890625|cri_loss: 0.0660400390625|unsuper_loss: 0.0 +average reward score: 5.328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.50%) |Training time=0.46s (20.97%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.36 +epoch: 0|step: 552|ppo_ep: 1|act_loss: 0.033538818359375|cri_loss: 0.019012451171875|unsuper_loss: 0.0 +average reward score: 5.9921875 +------------------------------------------------------------------------------------- +|E2E latency=2.53s |Gather latency=0.00s (0.00%) |Generate time=1.64s (64.93%) |Training time=0.43s (16.84%) |Others=0.46 (18.23%)|CurSamplesPerSec=12.66 |AvgSamplesPerSec=14.36 +epoch: 0|step: 553|ppo_ep: 1|act_loss: -0.00921630859375|cri_loss: -0.0028324127197265625|unsuper_loss: 0.0 +average reward score: 5.51171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.33%) |Training time=0.44s (20.09%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.36 +epoch: 0|step: 554|ppo_ep: 1|act_loss: -0.01349639892578125|cri_loss: -0.005279541015625|unsuper_loss: 0.0 +average reward score: 6.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.38%) |Training time=0.46s (21.06%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.36 +epoch: 0|step: 555|ppo_ep: 1|act_loss: -0.00868988037109375|cri_loss: -0.0027103424072265625|unsuper_loss: 0.0 +average reward score: 5.6484375 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.32%) |Training time=0.45s (19.43%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.36 +epoch: 0|step: 556|ppo_ep: 1|act_loss: -0.038116455078125|cri_loss: -0.01690673828125|unsuper_loss: 0.0 +average reward score: 5.9375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.64%) |Training time=0.45s (20.81%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.36 +epoch: 0|step: 557|ppo_ep: 1|act_loss: -0.014892578125|cri_loss: -0.0054931640625|unsuper_loss: 0.0 +average reward score: 5.88671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.69%) |Training time=0.45s (20.75%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.36 +epoch: 0|step: 558|ppo_ep: 1|act_loss: 0.01316070556640625|cri_loss: 0.0116729736328125|unsuper_loss: 0.0 +average reward score: 5.16796875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.86%) |Training time=0.46s (20.84%) |Others=0.12 (5.31%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.36 +[2023-04-14 09:08:09,697] [INFO] [logging.py:96:log_dist] [Rank 0] step=560, skipped=9, lr=[9.577448159905952e-06, 9.577448159905952e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:08:09,715] [INFO] [timer.py:199:stop] epoch=0/micro_step=560/global_step=560, RunningAvgSamplesPerSec=106.20737357610386, CurrSamplesPerSec=110.12176447228941, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:08:09,810] [INFO] [logging.py:96:log_dist] [Rank 0] step=560, skipped=8, lr=[4.962241906111083e-06, 4.962241906111083e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 559|ppo_ep: 1|act_loss: -0.028106689453125|cri_loss: -0.01242828369140625|unsuper_loss: 0.0 +average reward score: 6.11328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.50%) |Training time=0.45s (20.90%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.36 +epoch: 0|step: 560|ppo_ep: 1|act_loss: -0.00113677978515625|cri_loss: -1.621246337890625e-05|unsuper_loss: 0.0 +average reward score: 6.0859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.40%) |Training time=0.46s (21.07%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.36 +epoch: 0|step: 561|ppo_ep: 1|act_loss: 0.0175323486328125|cri_loss: 0.0101165771484375|unsuper_loss: 0.0 +average reward score: 5.63671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.52%) |Training time=0.45s (20.94%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.36 +epoch: 0|step: 562|ppo_ep: 1|act_loss: 0.1058349609375|cri_loss: 0.055267333984375|unsuper_loss: 0.0 +average reward score: 5.10546875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.54%) |Training time=0.46s (20.97%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.36 +epoch: 0|step: 563|ppo_ep: 1|act_loss: 0.050018310546875|cri_loss: 0.0263671875|unsuper_loss: 0.0 +average reward score: 5.50390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.36%) |Training time=0.46s (21.10%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.36 +epoch: 0|step: 564|ppo_ep: 1|act_loss: -0.010040283203125|cri_loss: -0.003032684326171875|unsuper_loss: 0.0 +average reward score: 6.1328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.51%) |Training time=0.45s (20.85%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.36 +epoch: 0|step: 565|ppo_ep: 1|act_loss: -0.00855255126953125|cri_loss: -0.003559112548828125|unsuper_loss: 0.0 +average reward score: 5.15625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.33%) |Training time=0.46s (21.13%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.36 +epoch: 0|step: 566|ppo_ep: 1|act_loss: -0.0287017822265625|cri_loss: -0.0123443603515625|unsuper_loss: 0.0 +average reward score: 5.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.45%) |Training time=0.45s (21.00%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.36 +epoch: 0|step: 567|ppo_ep: 1|act_loss: 0.047515869140625|cri_loss: 0.027313232421875|unsuper_loss: 0.0 +average reward score: 5.0 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.33%) |Training time=0.44s (20.13%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.37 +epoch: 0|step: 568|ppo_ep: 1|act_loss: 0.07122802734375|cri_loss: 0.04351806640625|unsuper_loss: 0.0 +average reward score: 5.375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.59%) |Training time=0.43s (19.79%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.37 +[2023-04-14 09:08:31,392] [INFO] [logging.py:96:log_dist] [Rank 0] step=570, skipped=9, lr=[9.574203657407728e-06, 9.574203657407728e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:08:31,410] [INFO] [timer.py:199:stop] epoch=0/micro_step=570/global_step=570, RunningAvgSamplesPerSec=106.29109456024048, CurrSamplesPerSec=111.84186216269592, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:08:31,502] [INFO] [logging.py:96:log_dist] [Rank 0] step=570, skipped=8, lr=[4.960557168212088e-06, 4.960557168212088e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 569|ppo_ep: 1|act_loss: -0.04010009765625|cri_loss: -0.0181732177734375|unsuper_loss: 0.0 +average reward score: 4.74609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.77%) |Training time=0.45s (20.71%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.37 +epoch: 0|step: 570|ppo_ep: 1|act_loss: -0.067138671875|cri_loss: -0.0316162109375|unsuper_loss: 0.0 +average reward score: 5.09765625 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.81s (77.41%) |Training time=0.43s (18.38%) |Others=0.10 (4.21%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.37 +epoch: 0|step: 571|ppo_ep: 1|act_loss: -0.029998779296875|cri_loss: -0.01392364501953125|unsuper_loss: 0.0 +average reward score: 5.21875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.59%) |Training time=0.45s (20.86%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.37 +epoch: 0|step: 572|ppo_ep: 1|act_loss: -0.065673828125|cri_loss: -0.0294036865234375|unsuper_loss: 0.0 +average reward score: 4.5234375 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.51%) |Training time=0.46s (21.02%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.49 |AvgSamplesPerSec=14.37 +epoch: 0|step: 573|ppo_ep: 1|act_loss: 0.06640625|cri_loss: 0.0472412109375|unsuper_loss: 0.0 +average reward score: 4.609375 +------------------------------------------------------------------------------------- +|E2E latency=2.78s |Gather latency=0.00s (0.00%) |Generate time=1.75s (62.78%) |Training time=0.43s (15.34%) |Others=0.61 (21.88%)|CurSamplesPerSec=11.50 |AvgSamplesPerSec=14.36 +[2023-04-14 09:08:43,056] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 4096, reducing to 2048 +epoch: 0|step: 574|ppo_ep: 1|act_loss: 0.0679931640625|cri_loss: 0.039154052734375|unsuper_loss: 0.0 +average reward score: 4.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.72%) |Training time=0.42s (19.69%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.36 +epoch: 0|step: 575|ppo_ep: 1|act_loss: 0.12646484375|cri_loss: 0.0697021484375|unsuper_loss: 0.0 +average reward score: 4.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.58%) |Training time=0.45s (20.90%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.36 +[2023-04-14 09:08:47,490] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 576|ppo_ep: 1|act_loss: 0.1339111328125|cri_loss: 0.0732421875|unsuper_loss: 0.0 +average reward score: 4.7421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.93%) |Training time=0.45s (20.92%) |Others=0.09 (4.15%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.36 +[2023-04-14 09:08:49,655] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384 +epoch: 0|step: 577|ppo_ep: 1|act_loss: 0.02734375|cri_loss: 0.025177001953125|unsuper_loss: 0.0 +average reward score: 4.15234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.05%) |Training time=0.45s (20.79%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.36 +epoch: 0|step: 578|ppo_ep: 1|act_loss: 0.216064453125|cri_loss: 0.143310546875|unsuper_loss: 0.0 +average reward score: 4.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.40%) |Training time=0.46s (21.08%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.36 +[2023-04-14 09:08:53,886] [INFO] [logging.py:96:log_dist] [Rank 0] step=580, skipped=10, lr=[9.571223416337106e-06, 9.571223416337106e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:08:53,904] [INFO] [timer.py:199:stop] epoch=0/micro_step=580/global_step=580, RunningAvgSamplesPerSec=106.40314362946779, CurrSamplesPerSec=112.75446463825047, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:08:53,997] [INFO] [logging.py:96:log_dist] [Rank 0] step=580, skipped=10, lr=[4.959183117273112e-06, 4.959183117273112e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 579|ppo_ep: 1|act_loss: 0.2337646484375|cri_loss: 0.13671875|unsuper_loss: 0.0 +average reward score: 4.58984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.84%) |Training time=0.45s (20.62%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.36 +epoch: 0|step: 580|ppo_ep: 1|act_loss: 0.0645751953125|cri_loss: 0.03631591796875|unsuper_loss: 0.0 +average reward score: 5.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.62%) |Training time=0.45s (20.82%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.37 +epoch: 0|step: 581|ppo_ep: 1|act_loss: 0.08306884765625|cri_loss: 0.04730224609375|unsuper_loss: 0.0 +average reward score: 4.984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.45s (20.86%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.37 +epoch: 0|step: 582|ppo_ep: 1|act_loss: 0.236083984375|cri_loss: 0.1676025390625|unsuper_loss: 0.0 +average reward score: 3.515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.22%) |Training time=0.44s (20.20%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.37 +epoch: 0|step: 583|ppo_ep: 1|act_loss: 0.007049560546875|cri_loss: 0.005939483642578125|unsuper_loss: 0.0 +average reward score: 4.2734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.27%) |Training time=0.44s (20.18%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.37 +epoch: 0|step: 584|ppo_ep: 1|act_loss: -0.004924774169921875|cri_loss: 0.00067138671875|unsuper_loss: 0.0 +average reward score: 4.3828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.90%) |Training time=0.45s (20.56%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.37 +epoch: 0|step: 585|ppo_ep: 1|act_loss: -0.0716552734375|cri_loss: -0.0330810546875|unsuper_loss: 0.0 +average reward score: 4.59375 +------------------------------------------------------------------------------------- +|E2E latency=2.41s |Gather latency=0.00s (0.00%) |Generate time=1.80s (74.76%) |Training time=0.45s (18.58%) |Others=0.16 (6.65%)|CurSamplesPerSec=13.30 |AvgSamplesPerSec=14.37 +epoch: 0|step: 586|ppo_ep: 1|act_loss: -0.10107421875|cri_loss: -0.04376220703125|unsuper_loss: 0.0 +average reward score: 3.1640625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.72%) |Training time=0.45s (20.75%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.37 +epoch: 0|step: 587|ppo_ep: 1|act_loss: -0.05377197265625|cri_loss: -0.02410888671875|unsuper_loss: 0.0 +average reward score: 4.57421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.75%) |Training time=0.45s (20.71%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.37 +epoch: 0|step: 588|ppo_ep: 1|act_loss: -0.0753173828125|cri_loss: -0.03302001953125|unsuper_loss: 0.0 +average reward score: 5.07421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.77%) |Training time=0.45s (20.69%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.37 +[2023-04-14 09:09:15,849] [INFO] [logging.py:96:log_dist] [Rank 0] step=590, skipped=10, lr=[9.567845205974828e-06, 9.567845205974828e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:09:15,867] [INFO] [timer.py:199:stop] epoch=0/micro_step=590/global_step=590, RunningAvgSamplesPerSec=106.49800390590352, CurrSamplesPerSec=110.78347744838766, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:09:15,960] [INFO] [logging.py:96:log_dist] [Rank 0] step=590, skipped=10, lr=[4.957432749209755e-06, 4.957432749209755e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 589|ppo_ep: 1|act_loss: -0.0306396484375|cri_loss: -0.0132293701171875|unsuper_loss: 0.0 +average reward score: 5.65625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.88%) |Training time=0.45s (20.64%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.37 +epoch: 0|step: 590|ppo_ep: 1|act_loss: -0.05267333984375|cri_loss: -0.025299072265625|unsuper_loss: 0.0 +average reward score: 5.5859375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.66%) |Training time=0.45s (20.81%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.37 +epoch: 0|step: 591|ppo_ep: 1|act_loss: -0.007110595703125|cri_loss: -0.0025691986083984375|unsuper_loss: 0.0 +average reward score: 5.14453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.71%) |Training time=0.45s (20.74%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.37 +epoch: 0|step: 592|ppo_ep: 1|act_loss: -0.014495849609375|cri_loss: -0.005519866943359375|unsuper_loss: 0.0 +average reward score: 4.89453125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.50%) |Training time=0.46s (20.98%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.37 +epoch: 0|step: 593|ppo_ep: 1|act_loss: 0.04583740234375|cri_loss: 0.031585693359375|unsuper_loss: 0.0 +average reward score: 5.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.75%) |Training time=0.45s (20.72%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.37 +epoch: 0|step: 594|ppo_ep: 1|act_loss: -0.0069427490234375|cri_loss: -0.00215911865234375|unsuper_loss: 0.0 +average reward score: 5.5859375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.45s (20.86%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.37 +epoch: 0|step: 595|ppo_ep: 1|act_loss: 0.0129241943359375|cri_loss: 0.0084228515625|unsuper_loss: 0.0 +average reward score: 5.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.90%) |Training time=0.45s (20.55%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.37 +epoch: 0|step: 596|ppo_ep: 1|act_loss: 0.01959228515625|cri_loss: 0.01129913330078125|unsuper_loss: 0.0 +average reward score: 5.81640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.08%) |Training time=0.44s (20.35%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.37 +epoch: 0|step: 597|ppo_ep: 1|act_loss: 0.0300140380859375|cri_loss: 0.0162200927734375|unsuper_loss: 0.0 +average reward score: 6.23046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.08%) |Training time=0.42s (19.33%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.37 +epoch: 0|step: 598|ppo_ep: 1|act_loss: 0.0751953125|cri_loss: 0.041015625|unsuper_loss: 0.0 +average reward score: 5.95703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.68%) |Training time=0.43s (19.76%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.37 +[2023-04-14 09:09:37,614] [INFO] [logging.py:96:log_dist] [Rank 0] step=600, skipped=10, lr=[9.564396695205104e-06, 9.564396695205104e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:09:37,618] [INFO] [timer.py:199:stop] epoch=0/micro_step=600/global_step=600, RunningAvgSamplesPerSec=106.58806837441567, CurrSamplesPerSec=100.7356997842199, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:09:37,737] [INFO] [logging.py:96:log_dist] [Rank 0] step=600, skipped=10, lr=[4.955645956064821e-06, 4.955645956064821e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 599|ppo_ep: 1|act_loss: 0.011077880859375|cri_loss: 0.00719451904296875|unsuper_loss: 0.0 +average reward score: 5.87890625 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.62s (72.81%) |Training time=0.48s (21.61%) |Others=0.12 (5.58%)|CurSamplesPerSec=14.39 |AvgSamplesPerSec=14.37 +epoch: 0|step: 600|ppo_ep: 1|act_loss: 0.03582763671875|cri_loss: 0.0189361572265625|unsuper_loss: 0.0 +average reward score: 6.08984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.91%) |Training time=0.45s (20.53%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.37 +epoch: 0|step: 601|ppo_ep: 1|act_loss: -0.0229949951171875|cri_loss: -0.0109100341796875|unsuper_loss: 0.0 +average reward score: 5.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.70%) |Training time=0.46s (20.80%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.37 +epoch: 0|step: 602|ppo_ep: 1|act_loss: -0.032379150390625|cri_loss: -0.0155487060546875|unsuper_loss: 0.0 +average reward score: 5.8984375 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.73s (76.26%) |Training time=0.44s (19.28%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.07 |AvgSamplesPerSec=14.37 +epoch: 0|step: 603|ppo_ep: 1|act_loss: -0.0089874267578125|cri_loss: -0.00399017333984375|unsuper_loss: 0.0 +average reward score: 5.703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.18%) |Training time=0.44s (20.28%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.37 +epoch: 0|step: 604|ppo_ep: 1|act_loss: -0.056182861328125|cri_loss: -0.0275115966796875|unsuper_loss: 0.0 +average reward score: 5.8984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.25%) |Training time=0.44s (20.19%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.37 +epoch: 0|step: 605|ppo_ep: 1|act_loss: -0.0023899078369140625|cri_loss: -0.0002040863037109375|unsuper_loss: 0.0 +average reward score: 5.5 +------------------------------------------------------------------------------------- +|E2E latency=2.52s |Gather latency=0.00s (0.00%) |Generate time=1.63s (64.46%) |Training time=0.44s (17.39%) |Others=0.46 (18.15%)|CurSamplesPerSec=12.69 |AvgSamplesPerSec=14.37 +epoch: 0|step: 606|ppo_ep: 1|act_loss: 0.0224609375|cri_loss: 0.012969970703125|unsuper_loss: 0.0 +average reward score: 5.8828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.77%) |Training time=0.44s (20.46%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.37 +epoch: 0|step: 607|ppo_ep: 1|act_loss: 0.005245208740234375|cri_loss: 0.00289154052734375|unsuper_loss: 0.0 +average reward score: 5.80859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.24%) |Training time=0.44s (20.18%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.37 +epoch: 0|step: 608|ppo_ep: 1|act_loss: 0.064208984375|cri_loss: 0.033111572265625|unsuper_loss: 0.0 +average reward score: 6.0 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.11%) |Training time=0.44s (20.30%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.37 +[2023-04-14 09:09:59,804] [INFO] [logging.py:96:log_dist] [Rank 0] step=610, skipped=10, lr=[9.560877935143189e-06, 9.560877935143189e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:09:59,822] [INFO] [timer.py:199:stop] epoch=0/micro_step=610/global_step=610, RunningAvgSamplesPerSec=106.7184158486569, CurrSamplesPerSec=117.44905641885447, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:09:59,917] [INFO] [logging.py:96:log_dist] [Rank 0] step=610, skipped=10, lr=[4.953822764322896e-06, 4.953822764322896e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 609|ppo_ep: 1|act_loss: -0.0032367706298828125|cri_loss: -0.001232147216796875|unsuper_loss: 0.0 +average reward score: 5.40625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.27%) |Training time=0.44s (20.09%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.37 +epoch: 0|step: 610|ppo_ep: 1|act_loss: -0.024261474609375|cri_loss: -0.011322021484375|unsuper_loss: 0.0 +average reward score: 6.328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.11%) |Training time=0.44s (20.32%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.37 +epoch: 0|step: 611|ppo_ep: 1|act_loss: -0.024688720703125|cri_loss: -0.0111083984375|unsuper_loss: 0.0 +average reward score: 5.48828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.31%) |Training time=0.44s (20.12%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.38 +epoch: 0|step: 612|ppo_ep: 1|act_loss: -0.08856201171875|cri_loss: -0.041107177734375|unsuper_loss: 0.0 +average reward score: 5.3046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.50%) |Training time=0.41s (18.88%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.38 +epoch: 0|step: 613|ppo_ep: 1|act_loss: -0.018341064453125|cri_loss: -0.0086517333984375|unsuper_loss: 0.0 +average reward score: 5.0078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.25%) |Training time=0.42s (19.09%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.38 +epoch: 0|step: 614|ppo_ep: 1|act_loss: -0.032562255859375|cri_loss: -0.014404296875|unsuper_loss: 0.0 +average reward score: 6.59375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.73%) |Training time=0.41s (18.59%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.38 +epoch: 0|step: 615|ppo_ep: 1|act_loss: 0.0167388916015625|cri_loss: 0.0095367431640625|unsuper_loss: 0.0 +average reward score: 6.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.84s (78.72%) |Training time=0.40s (17.01%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.38 +epoch: 0|step: 616|ppo_ep: 1|act_loss: 0.0194091796875|cri_loss: 0.01035308837890625|unsuper_loss: 0.0 +average reward score: 5.375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.16%) |Training time=0.44s (20.27%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.38 +epoch: 0|step: 617|ppo_ep: 1|act_loss: -0.0017223358154296875|cri_loss: -0.0002288818359375|unsuper_loss: 0.0 +average reward score: 5.72265625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.67s (75.95%) |Training time=0.43s (19.48%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.38 +epoch: 0|step: 618|ppo_ep: 1|act_loss: 0.0160675048828125|cri_loss: 0.00909423828125|unsuper_loss: 0.0 +average reward score: 5.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.10%) |Training time=0.44s (20.35%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.38 +[2023-04-14 09:10:21,685] [INFO] [logging.py:96:log_dist] [Rank 0] step=620, skipped=10, lr=[9.557288977945587e-06, 9.557288977945587e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:10:21,703] [INFO] [timer.py:199:stop] epoch=0/micro_step=620/global_step=620, RunningAvgSamplesPerSec=106.9398379077226, CurrSamplesPerSec=119.05743097000362, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:10:21,797] [INFO] [logging.py:96:log_dist] [Rank 0] step=620, skipped=10, lr=[4.9519632010080765e-06, 4.9519632010080765e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 619|ppo_ep: 1|act_loss: 0.015380859375|cri_loss: 0.0081634521484375|unsuper_loss: 0.0 +average reward score: 6.078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.44%) |Training time=0.43s (20.00%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.38 +epoch: 0|step: 620|ppo_ep: 1|act_loss: -0.004367828369140625|cri_loss: -0.001682281494140625|unsuper_loss: 0.0 +average reward score: 5.515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.97%) |Training time=0.42s (19.48%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.38 +epoch: 0|step: 621|ppo_ep: 1|act_loss: -0.037322998046875|cri_loss: -0.0178985595703125|unsuper_loss: 0.0 +average reward score: 6.125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.80%) |Training time=0.45s (20.65%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.38 +epoch: 0|step: 622|ppo_ep: 1|act_loss: -0.0214996337890625|cri_loss: -0.0097198486328125|unsuper_loss: 0.0 +average reward score: 6.15625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.79%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.38 +epoch: 0|step: 623|ppo_ep: 1|act_loss: -0.051300048828125|cri_loss: -0.024322509765625|unsuper_loss: 0.0 +average reward score: 6.875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.46%) |Training time=0.45s (20.79%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.38 +epoch: 0|step: 624|ppo_ep: 1|act_loss: -0.03497314453125|cri_loss: -0.016021728515625|unsuper_loss: 0.0 +average reward score: 5.1328125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.20%) |Training time=0.44s (20.26%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.38 +epoch: 0|step: 625|ppo_ep: 1|act_loss: 0.016265869140625|cri_loss: 0.00904083251953125|unsuper_loss: 0.0 +average reward score: 6.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.57%) |Training time=0.45s (20.88%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.38 +epoch: 0|step: 626|ppo_ep: 1|act_loss: 0.03240966796875|cri_loss: 0.0170135498046875|unsuper_loss: 0.0 +average reward score: 5.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.40%) |Training time=0.46s (21.08%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.38 +epoch: 0|step: 627|ppo_ep: 1|act_loss: 0.0118408203125|cri_loss: 0.006252288818359375|unsuper_loss: 0.0 +average reward score: 5.59375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.45%) |Training time=0.43s (19.66%) |Others=0.11 (4.89%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.38 +epoch: 0|step: 628|ppo_ep: 1|act_loss: 0.0095062255859375|cri_loss: 0.005832672119140625|unsuper_loss: 0.0 +average reward score: 5.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.35%) |Training time=0.42s (19.12%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.38 +[2023-04-14 09:10:43,620] [INFO] [logging.py:96:log_dist] [Rank 0] step=630, skipped=10, lr=[9.55362987680931e-06, 9.55362987680931e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:10:44,139] [INFO] [timer.py:199:stop] epoch=0/micro_step=630/global_step=630, RunningAvgSamplesPerSec=106.77900790800318, CurrSamplesPerSec=42.10884357156303, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:10:44,235] [INFO] [logging.py:96:log_dist] [Rank 0] step=630, skipped=10, lr=[4.95006729368358e-06, 4.95006729368358e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 629|ppo_ep: 1|act_loss: 0.048309326171875|cri_loss: 0.027130126953125|unsuper_loss: 0.0 +average reward score: 5.5546875 +------------------------------------------------------------------------------------- +|E2E latency=2.84s |Gather latency=0.00s (0.00%) |Generate time=1.82s (63.97%) |Training time=0.92s (32.46%) |Others=0.10 (3.57%)|CurSamplesPerSec=11.25 |AvgSamplesPerSec=14.38 +epoch: 0|step: 630|ppo_ep: 1|act_loss: 0.08837890625|cri_loss: 0.0487060546875|unsuper_loss: 0.0 +average reward score: 4.65234375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.67%) |Training time=0.46s (20.86%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.38 +epoch: 0|step: 631|ppo_ep: 1|act_loss: 0.0255584716796875|cri_loss: 0.0142059326171875|unsuper_loss: 0.0 +average reward score: 4.984375 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.66%) |Training time=0.46s (20.00%) |Others=0.10 (4.35%)|CurSamplesPerSec=14.05 |AvgSamplesPerSec=14.38 +epoch: 0|step: 632|ppo_ep: 1|act_loss: 0.001983642578125|cri_loss: 0.00154876708984375|unsuper_loss: 0.0 +average reward score: 5.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.44%) |Training time=0.46s (21.01%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.38 +epoch: 0|step: 633|ppo_ep: 1|act_loss: -0.04437255859375|cri_loss: -0.02142333984375|unsuper_loss: 0.0 +average reward score: 5.98828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.39%) |Training time=0.46s (21.04%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.38 +epoch: 0|step: 634|ppo_ep: 1|act_loss: -0.04608154296875|cri_loss: -0.0216827392578125|unsuper_loss: 0.0 +average reward score: 5.75 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.35%) |Training time=0.46s (21.03%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.38 +epoch: 0|step: 635|ppo_ep: 1|act_loss: -0.0011425018310546875|cri_loss: 0.0001544952392578125|unsuper_loss: 0.0 +average reward score: 5.62890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.55%) |Training time=0.45s (20.89%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.38 +epoch: 0|step: 636|ppo_ep: 1|act_loss: 0.02423095703125|cri_loss: 0.01284027099609375|unsuper_loss: 0.0 +average reward score: 5.94921875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.34%) |Training time=0.46s (21.04%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.38 +epoch: 0|step: 637|ppo_ep: 1|act_loss: 0.058624267578125|cri_loss: 0.031890869140625|unsuper_loss: 0.0 +average reward score: 5.92578125 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.46%) |Training time=0.48s (21.83%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.38 +epoch: 0|step: 638|ppo_ep: 1|act_loss: 0.01776123046875|cri_loss: 0.00981903076171875|unsuper_loss: 0.0 +average reward score: 5.73046875 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.52%) |Training time=0.49s (22.02%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.38 +[2023-04-14 09:11:06,101] [INFO] [logging.py:96:log_dist] [Rank 0] step=640, skipped=10, lr=[9.549900685971059e-06, 9.549900685971059e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:11:06,119] [INFO] [timer.py:199:stop] epoch=0/micro_step=640/global_step=640, RunningAvgSamplesPerSec=106.75939140991701, CurrSamplesPerSec=95.74492697381844, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:11:06,212] [INFO] [logging.py:96:log_dist] [Rank 0] step=640, skipped=10, lr=[4.948135070451325e-06, 4.948135070451325e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 639|ppo_ep: 1|act_loss: 0.023193359375|cri_loss: 0.0130615234375|unsuper_loss: 0.0 +average reward score: 5.796875 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.14%) |Training time=0.50s (22.43%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.44 |AvgSamplesPerSec=14.38 +epoch: 0|step: 640|ppo_ep: 1|act_loss: -0.017852783203125|cri_loss: -0.00658416748046875|unsuper_loss: 0.0 +average reward score: 5.8828125 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.58%) |Training time=0.48s (21.95%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.38 +epoch: 0|step: 641|ppo_ep: 1|act_loss: -0.00551605224609375|cri_loss: -0.0018453598022460938|unsuper_loss: 0.0 +average reward score: 6.1875 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.28%) |Training time=0.48s (21.89%) |Others=0.11 (4.83%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.38 +epoch: 0|step: 642|ppo_ep: 1|act_loss: -0.0426025390625|cri_loss: -0.01947021484375|unsuper_loss: 0.0 +average reward score: 6.01171875 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.63%) |Training time=0.46s (20.89%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.54 |AvgSamplesPerSec=14.38 +epoch: 0|step: 643|ppo_ep: 1|act_loss: -0.005496978759765625|cri_loss: -0.0017728805541992188|unsuper_loss: 0.0 +average reward score: 5.9375 +------------------------------------------------------------------------------------- +|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.72s (73.10%) |Training time=0.53s (22.70%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.62 |AvgSamplesPerSec=14.38 +epoch: 0|step: 644|ppo_ep: 1|act_loss: -0.01617431640625|cri_loss: -0.007472991943359375|unsuper_loss: 0.0 +average reward score: 6.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.85%) |Training time=0.45s (20.63%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.38 +epoch: 0|step: 645|ppo_ep: 1|act_loss: -0.02459716796875|cri_loss: -0.00748443603515625|unsuper_loss: 0.0 +average reward score: 5.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.72%) |Training time=0.45s (20.76%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.38 +epoch: 0|step: 646|ppo_ep: 1|act_loss: 0.0555419921875|cri_loss: 0.028778076171875|unsuper_loss: 0.0 +average reward score: 5.328125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.55%) |Training time=0.45s (20.84%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.38 +epoch: 0|step: 647|ppo_ep: 1|act_loss: 0.06146240234375|cri_loss: 0.03546142578125|unsuper_loss: 0.0 +average reward score: 4.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.79%) |Training time=0.45s (20.67%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.38 +epoch: 0|step: 648|ppo_ep: 1|act_loss: 0.021026611328125|cri_loss: 0.01177215576171875|unsuper_loss: 0.0 +average reward score: 4.6796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.76%) |Training time=0.45s (20.71%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.38 +[2023-04-14 09:11:28,136] [INFO] [logging.py:96:log_dist] [Rank 0] step=650, skipped=10, lr=[9.546101460706439e-06, 9.546101460706439e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:11:28,154] [INFO] [timer.py:199:stop] epoch=0/micro_step=650/global_step=650, RunningAvgSamplesPerSec=106.73659438423795, CurrSamplesPerSec=108.43134463767952, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:11:28,247] [INFO] [logging.py:96:log_dist] [Rank 0] step=650, skipped=10, lr=[4.946166559951523e-06, 4.946166559951523e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 649|ppo_ep: 1|act_loss: 0.06475830078125|cri_loss: 0.035064697265625|unsuper_loss: 0.0 +average reward score: 4.3125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.49%) |Training time=0.46s (21.01%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.38 +epoch: 0|step: 650|ppo_ep: 1|act_loss: 0.03387451171875|cri_loss: 0.018646240234375|unsuper_loss: 0.0 +average reward score: 5.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.62%) |Training time=0.45s (20.83%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.38 +epoch: 0|step: 651|ppo_ep: 1|act_loss: -0.01080322265625|cri_loss: -0.00469207763671875|unsuper_loss: 0.0 +average reward score: 6.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.84%) |Training time=0.45s (20.62%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.38 +epoch: 0|step: 652|ppo_ep: 1|act_loss: 0.023223876953125|cri_loss: 0.0124664306640625|unsuper_loss: 0.0 +average reward score: 6.0 +------------------------------------------------------------------------------------- +|E2E latency=2.83s |Gather latency=0.00s (0.00%) |Generate time=1.63s (57.58%) |Training time=0.45s (15.96%) |Others=0.75 (26.45%)|CurSamplesPerSec=11.32 |AvgSamplesPerSec=14.38 +epoch: 0|step: 653|ppo_ep: 1|act_loss: -0.010345458984375|cri_loss: -0.00450897216796875|unsuper_loss: 0.0 +average reward score: 5.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.61%) |Training time=0.45s (20.86%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.38 +epoch: 0|step: 654|ppo_ep: 1|act_loss: 0.023651123046875|cri_loss: 0.0128631591796875|unsuper_loss: 0.0 +average reward score: 5.9453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.40%) |Training time=0.46s (21.06%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.38 +epoch: 0|step: 655|ppo_ep: 1|act_loss: -0.0174407958984375|cri_loss: -0.0077667236328125|unsuper_loss: 0.0 +average reward score: 6.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.62%) |Training time=0.45s (20.84%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.38 +epoch: 0|step: 656|ppo_ep: 1|act_loss: -0.03515625|cri_loss: -0.015533447265625|unsuper_loss: 0.0 +average reward score: 5.6796875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.62%) |Training time=0.46s (20.85%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.38 +epoch: 0|step: 657|ppo_ep: 1|act_loss: 0.01385498046875|cri_loss: 0.007476806640625|unsuper_loss: 0.0 +average reward score: 5.5859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.67%) |Training time=0.43s (19.79%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.38 +epoch: 0|step: 658|ppo_ep: 1|act_loss: -0.00970458984375|cri_loss: -0.00411224365234375|unsuper_loss: 0.0 +average reward score: 6.0078125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.41%) |Training time=0.46s (21.06%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.38 +[2023-04-14 09:11:50,718] [INFO] [logging.py:96:log_dist] [Rank 0] step=660, skipped=10, lr=[9.542232257329135e-06, 9.542232257329135e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:11:50,736] [INFO] [timer.py:199:stop] epoch=0/micro_step=660/global_step=660, RunningAvgSamplesPerSec=106.79015890097004, CurrSamplesPerSec=105.78220256428848, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:11:50,845] [INFO] [logging.py:96:log_dist] [Rank 0] step=660, skipped=10, lr=[4.944161791362246e-06, 4.944161791362246e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 659|ppo_ep: 1|act_loss: 0.037841796875|cri_loss: 0.023101806640625|unsuper_loss: 0.0 +average reward score: 6.63671875 +------------------------------------------------------------------------------------- +|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.79s (75.59%) |Training time=0.47s (19.60%) |Others=0.11 (4.81%)|CurSamplesPerSec=13.48 |AvgSamplesPerSec=14.38 +epoch: 0|step: 660|ppo_ep: 1|act_loss: -0.0282745361328125|cri_loss: -0.013214111328125|unsuper_loss: 0.0 +average reward score: 6.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.78%) |Training time=0.45s (19.86%) |Others=0.10 (4.35%)|CurSamplesPerSec=14.09 |AvgSamplesPerSec=14.38 +epoch: 0|step: 661|ppo_ep: 1|act_loss: -0.025054931640625|cri_loss: -0.012237548828125|unsuper_loss: 0.0 +average reward score: 5.82421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.50%) |Training time=0.46s (20.97%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.38 +epoch: 0|step: 662|ppo_ep: 1|act_loss: -0.02947998046875|cri_loss: -0.012115478515625|unsuper_loss: 0.0 +average reward score: 6.34375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.23%) |Training time=0.46s (21.24%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.38 +epoch: 0|step: 663|ppo_ep: 1|act_loss: 0.00266265869140625|cri_loss: 0.00197601318359375|unsuper_loss: 0.0 +average reward score: 6.06640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.53%) |Training time=0.45s (20.94%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.38 +epoch: 0|step: 664|ppo_ep: 1|act_loss: 0.026275634765625|cri_loss: 0.01369476318359375|unsuper_loss: 0.0 +average reward score: 5.7421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.68%) |Training time=0.45s (20.79%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.38 +epoch: 0|step: 665|ppo_ep: 1|act_loss: -0.0006084442138671875|cri_loss: 0.00041294097900390625|unsuper_loss: 0.0 +average reward score: 6.203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.46%) |Training time=0.46s (20.99%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.38 +epoch: 0|step: 666|ppo_ep: 1|act_loss: -0.00737762451171875|cri_loss: -0.00324249267578125|unsuper_loss: 0.0 +average reward score: 6.11328125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.27%) |Training time=0.46s (21.21%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.38 +epoch: 0|step: 667|ppo_ep: 1|act_loss: 0.00258636474609375|cri_loss: 0.0026836395263671875|unsuper_loss: 0.0 +average reward score: 6.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.51%) |Training time=0.46s (20.95%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.38 +epoch: 0|step: 668|ppo_ep: 1|act_loss: 0.0150604248046875|cri_loss: 0.0088348388671875|unsuper_loss: 0.0 +average reward score: 6.2734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.63%) |Training time=0.45s (20.83%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.38 +[2023-04-14 09:12:12,578] [INFO] [logging.py:96:log_dist] [Rank 0] step=670, skipped=10, lr=[9.538293133190075e-06, 9.538293133190075e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:12:12,596] [INFO] [timer.py:199:stop] epoch=0/micro_step=670/global_step=670, RunningAvgSamplesPerSec=106.82658739397799, CurrSamplesPerSec=110.64967844108436, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:12:12,689] [INFO] [logging.py:96:log_dist] [Rank 0] step=670, skipped=10, lr=[4.942120794399002e-06, 4.942120794399002e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 669|ppo_ep: 1|act_loss: 0.037322998046875|cri_loss: 0.022125244140625|unsuper_loss: 0.0 +average reward score: 5.73828125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.71%) |Training time=0.45s (20.78%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.38 +epoch: 0|step: 670|ppo_ep: 1|act_loss: -0.06207275390625|cri_loss: -0.0292816162109375|unsuper_loss: 0.0 +average reward score: 6.35546875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.80%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.38 +epoch: 0|step: 671|ppo_ep: 1|act_loss: 0.051605224609375|cri_loss: 0.0302734375|unsuper_loss: 0.0 +average reward score: 6.1015625 +------------------------------------------------------------------------------------- +|E2E latency=2.61s |Gather latency=0.00s (0.00%) |Generate time=1.64s (63.04%) |Training time=0.45s (17.15%) |Others=0.52 (19.81%)|CurSamplesPerSec=12.27 |AvgSamplesPerSec=14.38 +epoch: 0|step: 672|ppo_ep: 1|act_loss: -0.0310516357421875|cri_loss: -0.0143280029296875|unsuper_loss: 0.0 +average reward score: 6.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.67%) |Training time=0.43s (19.75%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.38 +epoch: 0|step: 673|ppo_ep: 1|act_loss: 0.03326416015625|cri_loss: 0.017730712890625|unsuper_loss: 0.0 +average reward score: 5.83203125 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.62s (71.00%) |Training time=0.56s (24.67%) |Others=0.10 (4.32%)|CurSamplesPerSec=14.01 |AvgSamplesPerSec=14.38 +epoch: 0|step: 674|ppo_ep: 1|act_loss: 0.06341552734375|cri_loss: 0.033233642578125|unsuper_loss: 0.0 +average reward score: 5.9140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.38%) |Training time=0.44s (20.08%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.38 +epoch: 0|step: 675|ppo_ep: 1|act_loss: 0.003040313720703125|cri_loss: 0.0020275115966796875|unsuper_loss: 0.0 +average reward score: 5.77734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.85%) |Training time=0.45s (20.59%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.38 +epoch: 0|step: 676|ppo_ep: 1|act_loss: -0.002918243408203125|cri_loss: -0.0004673004150390625|unsuper_loss: 0.0 +average reward score: 6.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.62%) |Training time=0.45s (20.83%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.38 +epoch: 0|step: 677|ppo_ep: 1|act_loss: -0.061248779296875|cri_loss: -0.025970458984375|unsuper_loss: 0.0 +average reward score: 5.7109375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.63%) |Training time=0.45s (20.81%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.38 +epoch: 0|step: 678|ppo_ep: 1|act_loss: -0.04840087890625|cri_loss: -0.019805908203125|unsuper_loss: 0.0 +average reward score: 6.37109375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.63%) |Training time=0.45s (20.82%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.38 +[2023-04-14 09:12:34,852] [INFO] [logging.py:96:log_dist] [Rank 0] step=680, skipped=10, lr=[9.534284146676578e-06, 9.534284146676578e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:12:34,870] [INFO] [timer.py:199:stop] epoch=0/micro_step=680/global_step=680, RunningAvgSamplesPerSec=106.85888050857336, CurrSamplesPerSec=109.72457047932349, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:12:34,963] [INFO] [logging.py:96:log_dist] [Rank 0] step=680, skipped=10, lr=[4.9400435993142895e-06, 4.9400435993142895e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 679|ppo_ep: 1|act_loss: -0.08935546875|cri_loss: -0.039794921875|unsuper_loss: 0.0 +average reward score: 6.80078125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.63%) |Training time=0.45s (20.86%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.38 +epoch: 0|step: 680|ppo_ep: 1|act_loss: -0.05218505859375|cri_loss: -0.02313232421875|unsuper_loss: 0.0 +average reward score: 6.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.79%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.38 +epoch: 0|step: 681|ppo_ep: 1|act_loss: -0.0291748046875|cri_loss: -0.01338958740234375|unsuper_loss: 0.0 +average reward score: 6.03125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.78%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.38 +epoch: 0|step: 682|ppo_ep: 1|act_loss: -0.0007305145263671875|cri_loss: 0.0006284713745117188|unsuper_loss: 0.0 +average reward score: 5.96875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.50%) |Training time=0.46s (20.96%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.38 +epoch: 0|step: 683|ppo_ep: 1|act_loss: 0.072021484375|cri_loss: 0.04058837890625|unsuper_loss: 0.0 +average reward score: 6.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.44%) |Training time=0.46s (21.04%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.38 +epoch: 0|step: 684|ppo_ep: 1|act_loss: 0.017791748046875|cri_loss: 0.0110321044921875|unsuper_loss: 0.0 +average reward score: 5.87890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.54%) |Training time=0.45s (20.91%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.38 +epoch: 0|step: 685|ppo_ep: 1|act_loss: 0.0330810546875|cri_loss: 0.018218994140625|unsuper_loss: 0.0 +average reward score: 5.90625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.66%) |Training time=0.45s (20.36%) |Others=0.13 (5.98%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.38 +epoch: 0|step: 686|ppo_ep: 1|act_loss: -0.005847930908203125|cri_loss: 8.392333984375e-05|unsuper_loss: 0.0 +average reward score: 7.765625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.57%) |Training time=0.46s (20.89%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.38 +epoch: 0|step: 687|ppo_ep: 1|act_loss: -0.0004673004150390625|cri_loss: 0.0002837181091308594|unsuper_loss: 0.0 +average reward score: 7.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.28%) |Training time=0.44s (20.15%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.39 +epoch: 0|step: 688|ppo_ep: 1|act_loss: -0.022308349609375|cri_loss: -0.01081085205078125|unsuper_loss: 0.0 +average reward score: 7.1875 +------------------------------------------------------------------------------------- +|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.63s (70.81%) |Training time=0.57s (24.90%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.91 |AvgSamplesPerSec=14.38 +[2023-04-14 09:12:56,792] [INFO] [logging.py:96:log_dist] [Rank 0] step=690, skipped=10, lr=[9.5302053572115e-06, 9.5302053572115e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:12:56,809] [INFO] [timer.py:199:stop] epoch=0/micro_step=690/global_step=690, RunningAvgSamplesPerSec=106.85876245451874, CurrSamplesPerSec=107.36396135087939, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:12:56,902] [INFO] [logging.py:96:log_dist] [Rank 0] step=690, skipped=10, lr=[4.937930236897151e-06, 4.937930236897151e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 689|ppo_ep: 1|act_loss: -0.044921875|cri_loss: -0.0218658447265625|unsuper_loss: 0.0 +average reward score: 6.12109375 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.47%) |Training time=0.47s (21.09%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.41 |AvgSamplesPerSec=14.38 +epoch: 0|step: 690|ppo_ep: 1|act_loss: 0.017822265625|cri_loss: 0.00949859619140625|unsuper_loss: 0.0 +average reward score: 6.8515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.48%) |Training time=0.46s (20.98%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39 +epoch: 0|step: 691|ppo_ep: 1|act_loss: -0.010772705078125|cri_loss: -0.0048675537109375|unsuper_loss: 0.0 +average reward score: 6.66796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.49%) |Training time=0.46s (20.97%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.39 +epoch: 0|step: 692|ppo_ep: 1|act_loss: -0.02557373046875|cri_loss: -0.011932373046875|unsuper_loss: 0.0 +average reward score: 6.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.49%) |Training time=0.46s (20.99%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.39 +epoch: 0|step: 693|ppo_ep: 1|act_loss: -0.012969970703125|cri_loss: -0.0058441162109375|unsuper_loss: 0.0 +average reward score: 6.5859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.10%) |Training time=0.44s (20.36%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39 +epoch: 0|step: 694|ppo_ep: 1|act_loss: -0.0142974853515625|cri_loss: -0.006664276123046875|unsuper_loss: 0.0 +average reward score: 6.52734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.05%) |Training time=0.44s (20.41%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39 +epoch: 0|step: 695|ppo_ep: 1|act_loss: -0.031768798828125|cri_loss: -0.01519012451171875|unsuper_loss: 0.0 +average reward score: 6.88671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.43%) |Training time=0.46s (21.02%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39 +epoch: 0|step: 696|ppo_ep: 1|act_loss: 0.026397705078125|cri_loss: 0.01434326171875|unsuper_loss: 0.0 +average reward score: 7.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.52%) |Training time=0.46s (20.94%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.39 +epoch: 0|step: 697|ppo_ep: 1|act_loss: 0.00650787353515625|cri_loss: 0.003849029541015625|unsuper_loss: 0.0 +average reward score: 5.8046875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.76%) |Training time=0.45s (20.70%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.39 +epoch: 0|step: 698|ppo_ep: 1|act_loss: 0.00577545166015625|cri_loss: 0.0033206939697265625|unsuper_loss: 0.0 +average reward score: 6.82421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.57%) |Training time=0.45s (20.89%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39 +[2023-04-14 09:13:18,531] [INFO] [logging.py:96:log_dist] [Rank 0] step=700, skipped=10, lr=[9.526056825252338e-06, 9.526056825252338e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:13:18,548] [INFO] [timer.py:199:stop] epoch=0/micro_step=700/global_step=700, RunningAvgSamplesPerSec=106.90781997916815, CurrSamplesPerSec=108.76533453483435, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:13:18,641] [INFO] [logging.py:96:log_dist] [Rank 0] step=700, skipped=10, lr=[4.935780738472714e-06, 4.935780738472714e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 699|ppo_ep: 1|act_loss: 0.04449462890625|cri_loss: 0.0231781005859375|unsuper_loss: 0.0 +average reward score: 6.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.49%) |Training time=0.46s (21.00%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.39 +epoch: 0|step: 700|ppo_ep: 1|act_loss: 0.03704833984375|cri_loss: 0.01910400390625|unsuper_loss: 0.0 +average reward score: 6.64453125 +------------------------------------------------------------------------------------- +|E2E latency=2.41s |Gather latency=0.00s (0.00%) |Generate time=1.62s (67.26%) |Training time=0.46s (19.00%) |Others=0.33 (13.75%)|CurSamplesPerSec=13.30 |AvgSamplesPerSec=14.39 +epoch: 0|step: 701|ppo_ep: 1|act_loss: -0.01427459716796875|cri_loss: -0.00638580322265625|unsuper_loss: 0.0 +average reward score: 6.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.99%) |Training time=0.44s (20.43%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.39 +epoch: 0|step: 702|ppo_ep: 1|act_loss: 0.043365478515625|cri_loss: 0.02227783203125|unsuper_loss: 0.0 +average reward score: 6.65625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.76%) |Training time=0.43s (19.66%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39 +epoch: 0|step: 703|ppo_ep: 1|act_loss: -0.0113067626953125|cri_loss: -0.0049591064453125|unsuper_loss: 0.0 +average reward score: 7.98828125 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.64s (72.31%) |Training time=0.53s (23.32%) |Others=0.10 (4.37%)|CurSamplesPerSec=14.12 |AvgSamplesPerSec=14.39 +epoch: 0|step: 704|ppo_ep: 1|act_loss: -0.017974853515625|cri_loss: -0.00820159912109375|unsuper_loss: 0.0 +average reward score: 5.9609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.18%) |Training time=0.44s (20.26%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.39 +epoch: 0|step: 705|ppo_ep: 1|act_loss: 0.0028667449951171875|cri_loss: 0.0019893646240234375|unsuper_loss: 0.0 +average reward score: 6.1796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.01%) |Training time=0.44s (20.35%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.39 +epoch: 0|step: 706|ppo_ep: 1|act_loss: 0.0090789794921875|cri_loss: 0.0047760009765625|unsuper_loss: 0.0 +average reward score: 6.68359375 +------------------------------------------------------------------------------------- +|E2E latency=2.45s |Gather latency=0.00s (0.00%) |Generate time=1.62s (66.09%) |Training time=0.45s (18.32%) |Others=0.38 (15.59%)|CurSamplesPerSec=13.08 |AvgSamplesPerSec=14.39 +epoch: 0|step: 707|ppo_ep: 1|act_loss: -0.019134521484375|cri_loss: -0.00870513916015625|unsuper_loss: 0.0 +average reward score: 5.890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.77%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.39 +epoch: 0|step: 708|ppo_ep: 1|act_loss: -0.0189361572265625|cri_loss: -0.00836181640625|unsuper_loss: 0.0 +average reward score: 6.6953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.09%) |Training time=0.44s (20.36%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.39 +[2023-04-14 09:13:40,822] [INFO] [logging.py:96:log_dist] [Rank 0] step=710, skipped=10, lr=[9.521838612290344e-06, 9.521838612290344e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:13:40,840] [INFO] [timer.py:199:stop] epoch=0/micro_step=710/global_step=710, RunningAvgSamplesPerSec=106.9759180678844, CurrSamplesPerSec=109.9996541451361, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:13:40,933] [INFO] [logging.py:96:log_dist] [Rank 0] step=710, skipped=10, lr=[4.933595135901733e-06, 4.933595135901733e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 709|ppo_ep: 1|act_loss: -0.016876220703125|cri_loss: -0.008026123046875|unsuper_loss: 0.0 +average reward score: 6.3203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.54%) |Training time=0.45s (20.92%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.39 +epoch: 0|step: 710|ppo_ep: 1|act_loss: -0.021514892578125|cri_loss: -0.01042938232421875|unsuper_loss: 0.0 +average reward score: 6.97265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.74%) |Training time=0.45s (20.70%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.39 +epoch: 0|step: 711|ppo_ep: 1|act_loss: -0.0162353515625|cri_loss: -0.00786590576171875|unsuper_loss: 0.0 +average reward score: 6.328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.71%) |Training time=0.45s (20.74%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.39 +epoch: 0|step: 712|ppo_ep: 1|act_loss: -0.00830078125|cri_loss: -0.0038661956787109375|unsuper_loss: 0.0 +average reward score: 6.0 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.55%) |Training time=0.45s (20.91%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39 +epoch: 0|step: 713|ppo_ep: 1|act_loss: 0.0106964111328125|cri_loss: 0.005687713623046875|unsuper_loss: 0.0 +average reward score: 7.04296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.78%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.39 +epoch: 0|step: 714|ppo_ep: 1|act_loss: -0.017486572265625|cri_loss: -0.00839996337890625|unsuper_loss: 0.0 +average reward score: 6.53515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.78%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.39 +epoch: 0|step: 715|ppo_ep: 1|act_loss: -0.013763427734375|cri_loss: -0.006649017333984375|unsuper_loss: 0.0 +average reward score: 5.91796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.78%) |Training time=0.45s (20.66%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.39 +epoch: 0|step: 716|ppo_ep: 1|act_loss: -0.0003304481506347656|cri_loss: 0.0001533031463623047|unsuper_loss: 0.0 +average reward score: 5.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.76%) |Training time=0.45s (20.69%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39 +epoch: 0|step: 717|ppo_ep: 1|act_loss: 0.0340576171875|cri_loss: 0.02008056640625|unsuper_loss: 0.0 +average reward score: 5.8125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.94%) |Training time=0.42s (19.52%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39 +epoch: 0|step: 718|ppo_ep: 1|act_loss: -0.03125|cri_loss: -0.0149383544921875|unsuper_loss: 0.0 +average reward score: 6.3828125 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.47%) |Training time=0.44s (20.04%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.39 +[2023-04-14 09:14:02,796] [INFO] [logging.py:96:log_dist] [Rank 0] step=720, skipped=10, lr=[9.517550780849608e-06, 9.517550780849608e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:14:02,814] [INFO] [timer.py:199:stop] epoch=0/micro_step=720/global_step=720, RunningAvgSamplesPerSec=107.05598671959284, CurrSamplesPerSec=114.59396915763213, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:14:02,907] [INFO] [logging.py:96:log_dist] [Rank 0] step=720, skipped=10, lr=[4.9313734615801076e-06, 4.9313734615801076e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 719|ppo_ep: 1|act_loss: -0.0001888275146484375|cri_loss: 0.0005574226379394531|unsuper_loss: 0.0 +average reward score: 6.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.43s |Gather latency=0.00s (0.00%) |Generate time=1.89s (77.74%) |Training time=0.44s (18.20%) |Others=0.10 (4.06%)|CurSamplesPerSec=13.19 |AvgSamplesPerSec=14.39 +epoch: 0|step: 720|ppo_ep: 1|act_loss: 0.01340484619140625|cri_loss: 0.00720977783203125|unsuper_loss: 0.0 +average reward score: 6.65625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.14%) |Training time=0.44s (20.29%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.39 +epoch: 0|step: 721|ppo_ep: 1|act_loss: -0.0146636962890625|cri_loss: -0.006771087646484375|unsuper_loss: 0.0 +average reward score: 5.671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.15%) |Training time=0.44s (20.27%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.39 +epoch: 0|step: 722|ppo_ep: 1|act_loss: 0.025054931640625|cri_loss: 0.013336181640625|unsuper_loss: 0.0 +average reward score: 5.57421875 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.62s (69.13%) |Training time=0.44s (18.96%) |Others=0.28 (11.91%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.39 +epoch: 0|step: 723|ppo_ep: 1|act_loss: 0.007061004638671875|cri_loss: 0.00450897216796875|unsuper_loss: 0.0 +average reward score: 5.5703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.96%) |Training time=0.44s (20.45%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.39 +epoch: 0|step: 724|ppo_ep: 1|act_loss: 0.06036376953125|cri_loss: 0.037261962890625|unsuper_loss: 0.0 +average reward score: 5.515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.16%) |Training time=0.44s (20.27%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.39 +epoch: 0|step: 725|ppo_ep: 1|act_loss: 0.0367431640625|cri_loss: 0.0194091796875|unsuper_loss: 0.0 +average reward score: 5.88671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.08%) |Training time=0.44s (20.34%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.39 +epoch: 0|step: 726|ppo_ep: 1|act_loss: 0.0280914306640625|cri_loss: 0.0167236328125|unsuper_loss: 0.0 +average reward score: 5.984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.25%) |Training time=0.44s (20.20%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39 +epoch: 0|step: 727|ppo_ep: 1|act_loss: 0.0149993896484375|cri_loss: 0.009033203125|unsuper_loss: 0.0 +average reward score: 5.49609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.96%) |Training time=0.42s (19.47%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.39 +epoch: 0|step: 728|ppo_ep: 1|act_loss: 0.01483154296875|cri_loss: 0.0087432861328125|unsuper_loss: 0.0 +average reward score: 5.83203125 +------------------------------------------------------------------------------------- +|E2E latency=2.57s |Gather latency=0.00s (0.00%) |Generate time=1.62s (63.00%) |Training time=0.44s (16.91%) |Others=0.52 (20.09%)|CurSamplesPerSec=12.43 |AvgSamplesPerSec=14.39 +[2023-04-14 09:14:24,985] [INFO] [logging.py:96:log_dist] [Rank 0] step=730, skipped=10, lr=[9.51319339448614e-06, 9.51319339448614e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:14:25,003] [INFO] [timer.py:199:stop] epoch=0/micro_step=730/global_step=730, RunningAvgSamplesPerSec=107.17587828789893, CurrSamplesPerSec=115.30782176899385, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:14:25,096] [INFO] [logging.py:96:log_dist] [Rank 0] step=730, skipped=10, lr=[4.929115748438415e-06, 4.929115748438415e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 729|ppo_ep: 1|act_loss: 0.02777099609375|cri_loss: 0.01476287841796875|unsuper_loss: 0.0 +average reward score: 5.5546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.04%) |Training time=0.44s (20.40%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.39 +epoch: 0|step: 730|ppo_ep: 1|act_loss: -0.037384033203125|cri_loss: -0.0158538818359375|unsuper_loss: 0.0 +average reward score: 5.4453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.07%) |Training time=0.44s (20.34%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.39 +epoch: 0|step: 731|ppo_ep: 1|act_loss: 0.03265380859375|cri_loss: 0.019927978515625|unsuper_loss: 0.0 +average reward score: 6.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.51%) |Training time=0.43s (19.91%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.39 +epoch: 0|step: 732|ppo_ep: 1|act_loss: 0.0188751220703125|cri_loss: 0.0112762451171875|unsuper_loss: 0.0 +average reward score: 5.9921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.61%) |Training time=0.43s (19.85%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.39 +epoch: 0|step: 733|ppo_ep: 1|act_loss: 0.0577392578125|cri_loss: 0.034088134765625|unsuper_loss: 0.0 +average reward score: 6.3125 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.67%) |Training time=0.45s (19.09%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.39 +epoch: 0|step: 734|ppo_ep: 1|act_loss: 0.01357269287109375|cri_loss: 0.009857177734375|unsuper_loss: 0.0 +average reward score: 5.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.71%) |Training time=0.45s (20.73%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.39 +epoch: 0|step: 735|ppo_ep: 1|act_loss: 0.0099334716796875|cri_loss: 0.007404327392578125|unsuper_loss: 0.0 +average reward score: 6.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.62%) |Training time=0.45s (20.85%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.39 +epoch: 0|step: 736|ppo_ep: 1|act_loss: 0.0087432861328125|cri_loss: 0.00799560546875|unsuper_loss: 0.0 +average reward score: 5.79296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.92%) |Training time=0.44s (20.51%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.39 +epoch: 0|step: 737|ppo_ep: 1|act_loss: -0.020660400390625|cri_loss: -0.00601959228515625|unsuper_loss: 0.0 +average reward score: 5.6484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.87%) |Training time=0.45s (20.57%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.39 +epoch: 0|step: 738|ppo_ep: 1|act_loss: 0.0296478271484375|cri_loss: 0.017120361328125|unsuper_loss: 0.0 +average reward score: 5.80078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.86%) |Training time=0.45s (20.58%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.39 +[2023-04-14 09:14:46,793] [INFO] [logging.py:96:log_dist] [Rank 0] step=740, skipped=10, lr=[9.508766517786914e-06, 9.508766517786914e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:14:46,811] [INFO] [timer.py:199:stop] epoch=0/micro_step=740/global_step=740, RunningAvgSamplesPerSec=107.26615834374832, CurrSamplesPerSec=110.89899609178117, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:14:46,904] [INFO] [logging.py:96:log_dist] [Rank 0] step=740, skipped=10, lr=[4.926822029941406e-06, 4.926822029941406e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 739|ppo_ep: 1|act_loss: 0.06011962890625|cri_loss: 0.03375244140625|unsuper_loss: 0.0 +average reward score: 6.04296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.64%) |Training time=0.45s (20.83%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.40 +epoch: 0|step: 740|ppo_ep: 1|act_loss: 0.05902099609375|cri_loss: 0.0396728515625|unsuper_loss: 0.0 +average reward score: 5.3515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.65%) |Training time=0.45s (20.78%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.40 +epoch: 0|step: 741|ppo_ep: 1|act_loss: 0.0015230178833007812|cri_loss: 0.00215911865234375|unsuper_loss: 0.0 +average reward score: 5.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.81%) |Training time=0.45s (20.63%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.40 +epoch: 0|step: 742|ppo_ep: 1|act_loss: -0.025299072265625|cri_loss: -0.00882720947265625|unsuper_loss: 0.0 +average reward score: 5.58984375 +------------------------------------------------------------------------------------- +|E2E latency=2.47s |Gather latency=0.00s (0.00%) |Generate time=1.62s (65.72%) |Training time=0.45s (18.24%) |Others=0.40 (16.04%)|CurSamplesPerSec=12.98 |AvgSamplesPerSec=14.39 +epoch: 0|step: 743|ppo_ep: 1|act_loss: 0.13818359375|cri_loss: 0.08477783203125|unsuper_loss: 0.0 +average reward score: 5.765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.93%) |Training time=0.44s (20.50%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.39 +epoch: 0|step: 744|ppo_ep: 1|act_loss: 0.013946533203125|cri_loss: 0.009307861328125|unsuper_loss: 0.0 +average reward score: 5.73828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.65%) |Training time=0.45s (20.78%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.40 +epoch: 0|step: 745|ppo_ep: 1|act_loss: -0.13232421875|cri_loss: -0.056793212890625|unsuper_loss: 0.0 +average reward score: 5.62890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.69%) |Training time=0.45s (20.77%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.40 +epoch: 0|step: 746|ppo_ep: 1|act_loss: 0.020477294921875|cri_loss: 0.013763427734375|unsuper_loss: 0.0 +average reward score: 6.296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.51%) |Training time=0.43s (19.94%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.40 +epoch: 0|step: 747|ppo_ep: 1|act_loss: 0.060943603515625|cri_loss: 0.0345458984375|unsuper_loss: 0.0 +average reward score: 5.7578125 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.68s (76.20%) |Training time=0.42s (19.31%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.54 |AvgSamplesPerSec=14.40 +epoch: 0|step: 748|ppo_ep: 1|act_loss: 0.0953369140625|cri_loss: 0.051788330078125|unsuper_loss: 0.0 +average reward score: 5.58984375 +------------------------------------------------------------------------------------- +|E2E latency=3.11s |Gather latency=0.00s (0.00%) |Generate time=1.89s (60.67%) |Training time=0.44s (14.24%) |Others=0.78 (25.09%)|CurSamplesPerSec=10.29 |AvgSamplesPerSec=14.39 +[2023-04-14 09:15:09,708] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 4096, but hysteresis is 2. Reducing hysteresis to 1 +[2023-04-14 09:15:09,708] [INFO] [logging.py:96:log_dist] [Rank 0] step=750, skipped=11, lr=[9.504722968731713e-06, 9.504722968731713e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:15:09,709] [INFO] [timer.py:199:stop] epoch=0/micro_step=750/global_step=750, RunningAvgSamplesPerSec=107.37028457158374, CurrSamplesPerSec=130.24941895276606, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:15:09,801] [INFO] [logging.py:96:log_dist] [Rank 0] step=750, skipped=10, lr=[4.9244923400875245e-06, 4.9244923400875245e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 749|ppo_ep: 1|act_loss: -0.016571044921875|cri_loss: -0.0033111572265625|unsuper_loss: 0.0 +average reward score: 5.74609375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.35%) |Training time=0.41s (19.04%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.39 +epoch: 0|step: 750|ppo_ep: 1|act_loss: 0.0894775390625|cri_loss: 0.0479736328125|unsuper_loss: 0.0 +average reward score: 5.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.43%) |Training time=0.43s (20.02%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.39 +epoch: 0|step: 751|ppo_ep: 1|act_loss: -0.052947998046875|cri_loss: -0.0222015380859375|unsuper_loss: 0.0 +average reward score: 5.953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.69%) |Training time=0.43s (19.68%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.39 +epoch: 0|step: 752|ppo_ep: 1|act_loss: -0.16259765625|cri_loss: -0.07012939453125|unsuper_loss: 0.0 +average reward score: 5.8515625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.06%) |Training time=0.44s (20.42%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.39 +epoch: 0|step: 753|ppo_ep: 1|act_loss: -0.019073486328125|cri_loss: -0.007190704345703125|unsuper_loss: 0.0 +average reward score: 5.9609375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.31%) |Training time=0.44s (20.21%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.39 +epoch: 0|step: 754|ppo_ep: 1|act_loss: 0.01169586181640625|cri_loss: 0.00966644287109375|unsuper_loss: 0.0 +average reward score: 5.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.64s |Gather latency=0.00s (0.00%) |Generate time=1.63s (61.60%) |Training time=0.45s (16.89%) |Others=0.57 (21.51%)|CurSamplesPerSec=12.13 |AvgSamplesPerSec=14.39 +epoch: 0|step: 755|ppo_ep: 1|act_loss: 0.0743408203125|cri_loss: 0.040374755859375|unsuper_loss: 0.0 +average reward score: 5.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.86%) |Training time=0.45s (20.61%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39 +epoch: 0|step: 756|ppo_ep: 1|act_loss: 0.00849151611328125|cri_loss: 0.005237579345703125|unsuper_loss: 0.0 +average reward score: 5.76171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.84%) |Training time=0.45s (20.61%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.39 +epoch: 0|step: 757|ppo_ep: 1|act_loss: -0.019805908203125|cri_loss: -0.006443023681640625|unsuper_loss: 0.0 +average reward score: 5.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.91%) |Training time=0.45s (20.53%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.39 +epoch: 0|step: 758|ppo_ep: 1|act_loss: -0.0068206787109375|cri_loss: -0.0015316009521484375|unsuper_loss: 0.0 +average reward score: 5.3828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.80%) |Training time=0.45s (20.66%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.39 +[2023-04-14 09:15:31,886] [INFO] [logging.py:96:log_dist] [Rank 0] step=760, skipped=11, lr=[9.500164242019886e-06, 9.500164242019886e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:15:31,904] [INFO] [timer.py:199:stop] epoch=0/micro_step=760/global_step=760, RunningAvgSamplesPerSec=107.4533984061924, CurrSamplesPerSec=112.81274968711625, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:15:31,997] [INFO] [logging.py:96:log_dist] [Rank 0] step=760, skipped=10, lr=[4.922126713408392e-06, 4.922126713408392e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 759|ppo_ep: 1|act_loss: -0.0772705078125|cri_loss: -0.036865234375|unsuper_loss: 0.0 +average reward score: 5.9296875 +------------------------------------------------------------------------------------- +|E2E latency=2.73s |Gather latency=0.00s (0.00%) |Generate time=1.63s (59.60%) |Training time=0.45s (16.36%) |Others=0.66 (24.04%)|CurSamplesPerSec=11.73 |AvgSamplesPerSec=14.39 +epoch: 0|step: 760|ppo_ep: 1|act_loss: -0.0902099609375|cri_loss: -0.041015625|unsuper_loss: 0.0 +average reward score: 5.8359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.71%) |Training time=0.45s (20.74%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.39 +epoch: 0|step: 761|ppo_ep: 1|act_loss: -0.000972747802734375|cri_loss: 0.000545501708984375|unsuper_loss: 0.0 +average reward score: 5.15625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.30%) |Training time=0.44s (20.10%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39 +epoch: 0|step: 762|ppo_ep: 1|act_loss: 0.005298614501953125|cri_loss: 0.0035610198974609375|unsuper_loss: 0.0 +average reward score: 5.82421875 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.82s (78.13%) |Training time=0.41s (17.61%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.39 +epoch: 0|step: 763|ppo_ep: 1|act_loss: 0.0191497802734375|cri_loss: 0.0120086669921875|unsuper_loss: 0.0 +average reward score: 5.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.77%) |Training time=0.45s (20.67%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.39 +epoch: 0|step: 764|ppo_ep: 1|act_loss: 0.07623291015625|cri_loss: 0.041717529296875|unsuper_loss: 0.0 +average reward score: 5.55859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.91%) |Training time=0.45s (20.53%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.39 +epoch: 0|step: 765|ppo_ep: 1|act_loss: 0.0992431640625|cri_loss: 0.052703857421875|unsuper_loss: 0.0 +average reward score: 5.57421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.45%) |Training time=0.46s (20.99%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39 +epoch: 0|step: 766|ppo_ep: 1|act_loss: -0.0380859375|cri_loss: -0.0171661376953125|unsuper_loss: 0.0 +average reward score: 5.703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.95%) |Training time=0.44s (20.48%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.39 +epoch: 0|step: 767|ppo_ep: 1|act_loss: 0.01441192626953125|cri_loss: 0.010528564453125|unsuper_loss: 0.0 +average reward score: 5.453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.59%) |Training time=0.45s (20.85%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.39 +epoch: 0|step: 768|ppo_ep: 1|act_loss: -0.042572021484375|cri_loss: -0.017120361328125|unsuper_loss: 0.0 +average reward score: 7.19140625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.26%) |Training time=0.46s (21.21%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.39 +[2023-04-14 09:15:54,304] [INFO] [logging.py:96:log_dist] [Rank 0] step=770, skipped=11, lr=[9.49553621809577e-06, 9.49553621809577e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:15:55,082] [INFO] [timer.py:199:stop] epoch=0/micro_step=770/global_step=770, RunningAvgSamplesPerSec=107.16824767200629, CurrSamplesPerSec=30.633630233362283, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:15:55,175] [INFO] [logging.py:96:log_dist] [Rank 0] step=770, skipped=10, lr=[4.919725184968307e-06, 4.919725184968307e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 769|ppo_ep: 1|act_loss: -0.01451873779296875|cri_loss: -0.0052490234375|unsuper_loss: 0.0 +average reward score: 6.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.93s |Gather latency=0.00s (0.00%) |Generate time=1.62s (55.38%) |Training time=1.21s (41.26%) |Others=0.10 (3.36%)|CurSamplesPerSec=10.93 |AvgSamplesPerSec=14.38 +epoch: 0|step: 770|ppo_ep: 1|act_loss: 0.0462646484375|cri_loss: 0.0270233154296875|unsuper_loss: 0.0 +average reward score: 5.74609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.77%) |Training time=0.45s (20.67%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.38 +epoch: 0|step: 771|ppo_ep: 1|act_loss: 0.0252532958984375|cri_loss: 0.0135498046875|unsuper_loss: 0.0 +average reward score: 6.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.69%) |Training time=0.45s (20.76%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.38 +epoch: 0|step: 772|ppo_ep: 1|act_loss: -0.01418304443359375|cri_loss: -0.0051727294921875|unsuper_loss: 0.0 +average reward score: 5.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.65%) |Training time=0.45s (20.81%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.38 +epoch: 0|step: 773|ppo_ep: 1|act_loss: -0.01824951171875|cri_loss: -0.00795745849609375|unsuper_loss: 0.0 +average reward score: 6.390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.76%) |Training time=0.45s (20.69%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.38 +epoch: 0|step: 774|ppo_ep: 1|act_loss: 0.12939453125|cri_loss: 0.07391357421875|unsuper_loss: 0.0 +average reward score: 5.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.51%) |Training time=0.45s (20.94%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.38 +epoch: 0|step: 775|ppo_ep: 1|act_loss: -0.010955810546875|cri_loss: -0.00473785400390625|unsuper_loss: 0.0 +average reward score: 6.5234375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.23%) |Training time=0.44s (20.20%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.38 +epoch: 0|step: 776|ppo_ep: 1|act_loss: -0.0279388427734375|cri_loss: -0.01334381103515625|unsuper_loss: 0.0 +average reward score: 5.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.51s |Gather latency=0.00s (0.00%) |Generate time=1.85s (73.64%) |Training time=0.45s (17.73%) |Others=0.22 (8.63%)|CurSamplesPerSec=12.74 |AvgSamplesPerSec=14.38 +epoch: 0|step: 777|ppo_ep: 1|act_loss: 0.01385498046875|cri_loss: 0.0074615478515625|unsuper_loss: 0.0 +average reward score: 7.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.45s (20.84%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.38 +[2023-04-14 09:16:15,024] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 778|ppo_ep: 1|act_loss: -0.020172119140625|cri_loss: -0.00988006591796875|unsuper_loss: 0.0 +average reward score: 6.88671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.22%) |Training time=0.45s (20.70%) |Others=0.09 (4.08%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.38 +[2023-04-14 09:16:17,078] [INFO] [logging.py:96:log_dist] [Rank 0] step=780, skipped=11, lr=[9.490838965557847e-06, 9.490838965557847e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:16:17,096] [INFO] [timer.py:199:stop] epoch=0/micro_step=780/global_step=780, RunningAvgSamplesPerSec=107.25929385459395, CurrSamplesPerSec=115.33516940086052, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:16:17,181] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +[2023-04-14 09:16:17,182] [INFO] [logging.py:96:log_dist] [Rank 0] step=780, skipped=12, lr=[4.917778136851294e-06, 4.917778136851294e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 779|ppo_ep: 1|act_loss: -0.0114898681640625|cri_loss: -0.00531005859375|unsuper_loss: 0.0 +average reward score: 6.54296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.73%) |Training time=0.46s (21.14%) |Others=0.09 (4.13%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.38 +epoch: 0|step: 780|ppo_ep: 1|act_loss: -0.04376220703125|cri_loss: -0.021148681640625|unsuper_loss: 0.0 +average reward score: 6.203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.35%) |Training time=0.46s (21.02%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.38 +epoch: 0|step: 781|ppo_ep: 1|act_loss: -0.00690460205078125|cri_loss: -0.003047943115234375|unsuper_loss: 0.0 +average reward score: 6.9140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.59%) |Training time=0.45s (20.85%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.38 +epoch: 0|step: 782|ppo_ep: 1|act_loss: 0.05810546875|cri_loss: 0.031005859375|unsuper_loss: 0.0 +average reward score: 6.40625 +------------------------------------------------------------------------------------- +|E2E latency=2.90s |Gather latency=0.00s (0.00%) |Generate time=1.62s (55.99%) |Training time=0.45s (15.46%) |Others=0.83 (28.55%)|CurSamplesPerSec=11.05 |AvgSamplesPerSec=14.38 +epoch: 0|step: 783|ppo_ep: 1|act_loss: 0.006511688232421875|cri_loss: 0.00395965576171875|unsuper_loss: 0.0 +average reward score: 6.203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.71%) |Training time=0.45s (20.72%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.38 +epoch: 0|step: 784|ppo_ep: 1|act_loss: -0.01220703125|cri_loss: -0.005767822265625|unsuper_loss: 0.0 +average reward score: 6.125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.45s (20.84%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.38 +epoch: 0|step: 785|ppo_ep: 1|act_loss: -0.0089111328125|cri_loss: -0.0036163330078125|unsuper_loss: 0.0 +average reward score: 6.1015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.94%) |Training time=0.44s (20.49%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.38 +epoch: 0|step: 786|ppo_ep: 1|act_loss: -0.0023040771484375|cri_loss: -0.0006747245788574219|unsuper_loss: 0.0 +average reward score: 6.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.71%) |Training time=0.45s (20.73%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.38 +epoch: 0|step: 787|ppo_ep: 1|act_loss: 0.01407623291015625|cri_loss: 0.00878143310546875|unsuper_loss: 0.0 +average reward score: 5.70703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.45s (20.85%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.38 +epoch: 0|step: 788|ppo_ep: 1|act_loss: 0.0009098052978515625|cri_loss: 0.0007901191711425781|unsuper_loss: 0.0 +average reward score: 6.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.46s |Gather latency=0.00s (0.00%) |Generate time=1.62s (65.68%) |Training time=0.45s (18.26%) |Others=0.40 (16.06%)|CurSamplesPerSec=13.00 |AvgSamplesPerSec=14.38 +[2023-04-14 09:16:39,783] [INFO] [logging.py:96:log_dist] [Rank 0] step=790, skipped=11, lr=[9.486072554030733e-06, 9.486072554030733e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:16:39,801] [INFO] [timer.py:199:stop] epoch=0/micro_step=790/global_step=790, RunningAvgSamplesPerSec=107.30907836345817, CurrSamplesPerSec=113.32836398086337, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:16:39,895] [INFO] [logging.py:96:log_dist] [Rank 0] step=790, skipped=12, lr=[4.915312075301798e-06, 4.915312075301798e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 789|ppo_ep: 1|act_loss: 0.01332855224609375|cri_loss: 0.007904052734375|unsuper_loss: 0.0 +average reward score: 6.0234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.94%) |Training time=0.45s (20.51%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.38 +epoch: 0|step: 790|ppo_ep: 1|act_loss: 0.06280517578125|cri_loss: 0.036529541015625|unsuper_loss: 0.0 +average reward score: 5.82421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.14%) |Training time=0.44s (20.28%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.38 +epoch: 0|step: 791|ppo_ep: 1|act_loss: -0.03814697265625|cri_loss: -0.018585205078125|unsuper_loss: 0.0 +average reward score: 5.67578125 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.84s (78.81%) |Training time=0.40s (16.97%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.38 +epoch: 0|step: 792|ppo_ep: 1|act_loss: 0.05023193359375|cri_loss: 0.0278167724609375|unsuper_loss: 0.0 +average reward score: 4.94921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.94%) |Training time=0.44s (20.50%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.38 +epoch: 0|step: 793|ppo_ep: 1|act_loss: 0.00034809112548828125|cri_loss: 0.002155303955078125|unsuper_loss: 0.0 +average reward score: 5.09375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.10%) |Training time=0.44s (20.33%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.38 +epoch: 0|step: 794|ppo_ep: 1|act_loss: -0.011444091796875|cri_loss: -0.00421142578125|unsuper_loss: 0.0 +average reward score: 4.73046875 +------------------------------------------------------------------------------------- +|E2E latency=2.58s |Gather latency=0.00s (0.00%) |Generate time=1.62s (62.67%) |Training time=0.45s (17.42%) |Others=0.51 (19.91%)|CurSamplesPerSec=12.39 |AvgSamplesPerSec=14.38 +epoch: 0|step: 795|ppo_ep: 1|act_loss: 0.06146240234375|cri_loss: 0.0360107421875|unsuper_loss: 0.0 +average reward score: 4.29296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.81%) |Training time=0.45s (20.62%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.38 +epoch: 0|step: 796|ppo_ep: 1|act_loss: 0.07598876953125|cri_loss: 0.0418701171875|unsuper_loss: 0.0 +average reward score: 4.9453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.89%) |Training time=0.45s (20.56%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.38 +epoch: 0|step: 797|ppo_ep: 1|act_loss: -0.06427001953125|cri_loss: -0.0310516357421875|unsuper_loss: 0.0 +average reward score: 5.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.82%) |Training time=0.45s (20.62%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.38 +epoch: 0|step: 798|ppo_ep: 1|act_loss: 0.00328826904296875|cri_loss: 0.0026397705078125|unsuper_loss: 0.0 +average reward score: 5.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.72%) |Training time=0.45s (20.73%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.38 +[2023-04-14 09:17:02,019] [INFO] [logging.py:96:log_dist] [Rank 0] step=800, skipped=11, lr=[9.481237054164141e-06, 9.481237054164141e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:17:02,037] [INFO] [timer.py:199:stop] epoch=0/micro_step=800/global_step=800, RunningAvgSamplesPerSec=107.40037872515786, CurrSamplesPerSec=111.78615850382913, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:17:02,130] [INFO] [logging.py:96:log_dist] [Rank 0] step=800, skipped=12, lr=[4.912810213000723e-06, 4.912810213000723e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 799|ppo_ep: 1|act_loss: -0.02484130859375|cri_loss: -0.0116119384765625|unsuper_loss: 0.0 +average reward score: 5.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.69%) |Training time=0.45s (20.76%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.38 +epoch: 0|step: 800|ppo_ep: 1|act_loss: 0.037261962890625|cri_loss: 0.021484375|unsuper_loss: 0.0 +average reward score: 4.92578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.85%) |Training time=0.45s (20.59%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.38 +epoch: 0|step: 801|ppo_ep: 1|act_loss: -0.002933502197265625|cri_loss: -0.000164031982421875|unsuper_loss: 0.0 +average reward score: 5.44921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.76%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.38 +epoch: 0|step: 802|ppo_ep: 1|act_loss: 0.0721435546875|cri_loss: 0.041412353515625|unsuper_loss: 0.0 +average reward score: 5.71484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.59%) |Training time=0.45s (20.86%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.38 +epoch: 0|step: 803|ppo_ep: 1|act_loss: 0.01995849609375|cri_loss: 0.01092529296875|unsuper_loss: 0.0 +average reward score: 5.3515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.73%) |Training time=0.45s (20.72%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.38 +epoch: 0|step: 804|ppo_ep: 1|act_loss: -0.00691986083984375|cri_loss: -0.0017642974853515625|unsuper_loss: 0.0 +average reward score: 5.37890625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.62%) |Training time=0.46s (20.87%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.38 +epoch: 0|step: 805|ppo_ep: 1|act_loss: -0.005359649658203125|cri_loss: -0.001361846923828125|unsuper_loss: 0.0 +average reward score: 5.765625 +------------------------------------------------------------------------------------- +|E2E latency=2.38s |Gather latency=0.00s (0.00%) |Generate time=1.78s (74.75%) |Training time=0.49s (20.79%) |Others=0.11 (4.46%)|CurSamplesPerSec=13.45 |AvgSamplesPerSec=14.38 +epoch: 0|step: 806|ppo_ep: 1|act_loss: 0.0159759521484375|cri_loss: 0.0096588134765625|unsuper_loss: 0.0 +average reward score: 5.68359375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.42%) |Training time=0.42s (19.06%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.38 +epoch: 0|step: 807|ppo_ep: 1|act_loss: -0.0023345947265625|cri_loss: 0.000698089599609375|unsuper_loss: 0.0 +average reward score: 5.48046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.79%) |Training time=0.45s (20.67%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.38 +epoch: 0|step: 808|ppo_ep: 1|act_loss: 0.035400390625|cri_loss: 0.019683837890625|unsuper_loss: 0.0 +average reward score: 6.15625 +------------------------------------------------------------------------------------- +|E2E latency=2.76s |Gather latency=0.00s (0.00%) |Generate time=1.62s (58.90%) |Training time=0.44s (16.06%) |Others=0.69 (25.05%)|CurSamplesPerSec=11.60 |AvgSamplesPerSec=14.38 +[2023-04-14 09:17:24,516] [INFO] [logging.py:96:log_dist] [Rank 0] step=810, skipped=11, lr=[9.476332537631846e-06, 9.476332537631846e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:17:24,534] [INFO] [timer.py:199:stop] epoch=0/micro_step=810/global_step=810, RunningAvgSamplesPerSec=107.47037359827968, CurrSamplesPerSec=114.3687949064038, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:17:24,627] [INFO] [logging.py:96:log_dist] [Rank 0] step=810, skipped=12, lr=[4.910272587031704e-06, 4.910272587031704e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 809|ppo_ep: 1|act_loss: 0.0015316009521484375|cri_loss: 0.0021305084228515625|unsuper_loss: 0.0 +average reward score: 5.58984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.01%) |Training time=0.44s (20.45%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.38 +epoch: 0|step: 810|ppo_ep: 1|act_loss: 0.000553131103515625|cri_loss: 0.0010042190551757812|unsuper_loss: 0.0 +average reward score: 5.8359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.02%) |Training time=0.44s (20.42%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.38 +epoch: 0|step: 811|ppo_ep: 1|act_loss: -0.03125|cri_loss: -0.01335906982421875|unsuper_loss: 0.0 +average reward score: 5.8359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.07%) |Training time=0.44s (20.36%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.38 +epoch: 0|step: 812|ppo_ep: 1|act_loss: -0.0394287109375|cri_loss: -0.018096923828125|unsuper_loss: 0.0 +average reward score: 5.6953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.23%) |Training time=0.44s (20.22%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.38 +epoch: 0|step: 813|ppo_ep: 1|act_loss: 0.00807952880859375|cri_loss: 0.004505157470703125|unsuper_loss: 0.0 +average reward score: 5.90234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.37%) |Training time=0.43s (20.07%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.38 +epoch: 0|step: 814|ppo_ep: 1|act_loss: 0.003143310546875|cri_loss: 0.0019350051879882812|unsuper_loss: 0.0 +average reward score: 5.6171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.41%) |Training time=0.44s (20.07%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.38 +epoch: 0|step: 815|ppo_ep: 1|act_loss: 0.00887298583984375|cri_loss: 0.00640106201171875|unsuper_loss: 0.0 +average reward score: 5.6328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.88%) |Training time=0.45s (20.60%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.38 +epoch: 0|step: 816|ppo_ep: 1|act_loss: -0.01953125|cri_loss: -0.009552001953125|unsuper_loss: 0.0 +average reward score: 5.63671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.18%) |Training time=0.44s (20.27%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.38 +epoch: 0|step: 817|ppo_ep: 1|act_loss: 0.0401611328125|cri_loss: 0.02154541015625|unsuper_loss: 0.0 +average reward score: 5.50390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.94%) |Training time=0.44s (20.51%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.38 +epoch: 0|step: 818|ppo_ep: 1|act_loss: 0.004489898681640625|cri_loss: 0.0026702880859375|unsuper_loss: 0.0 +average reward score: 5.6015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.90%) |Training time=0.45s (20.55%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.38 +[2023-04-14 09:17:46,179] [INFO] [logging.py:96:log_dist] [Rank 0] step=820, skipped=11, lr=[9.471359077130615e-06, 9.471359077130615e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:17:46,197] [INFO] [timer.py:199:stop] epoch=0/micro_step=820/global_step=820, RunningAvgSamplesPerSec=107.5542575745028, CurrSamplesPerSec=112.59810387019539, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:17:46,290] [INFO] [logging.py:96:log_dist] [Rank 0] step=820, skipped=12, lr=[4.907699235008478e-06, 4.907699235008478e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 819|ppo_ep: 1|act_loss: -0.003543853759765625|cri_loss: -0.0015096664428710938|unsuper_loss: 0.0 +average reward score: 5.57421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.89%) |Training time=0.45s (20.60%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.38 +epoch: 0|step: 820|ppo_ep: 1|act_loss: -0.02679443359375|cri_loss: -0.01294708251953125|unsuper_loss: 0.0 +average reward score: 5.2109375 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.05%) |Training time=0.44s (19.68%) |Others=0.16 (7.28%)|CurSamplesPerSec=14.30 |AvgSamplesPerSec=14.38 +epoch: 0|step: 821|ppo_ep: 1|act_loss: 0.0236663818359375|cri_loss: 0.012542724609375|unsuper_loss: 0.0 +average reward score: 5.87109375 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.72s (76.86%) |Training time=0.42s (18.71%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.28 |AvgSamplesPerSec=14.38 +epoch: 0|step: 822|ppo_ep: 1|act_loss: 0.0013227462768554688|cri_loss: 0.0012483596801757812|unsuper_loss: 0.0 +average reward score: 5.328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.81%) |Training time=0.45s (20.66%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.38 +epoch: 0|step: 823|ppo_ep: 1|act_loss: 0.02459716796875|cri_loss: 0.01287078857421875|unsuper_loss: 0.0 +average reward score: 5.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.78%) |Training time=0.45s (20.69%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.38 +epoch: 0|step: 824|ppo_ep: 1|act_loss: -0.045379638671875|cri_loss: -0.0219268798828125|unsuper_loss: 0.0 +average reward score: 6.2109375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.80%) |Training time=0.45s (20.67%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.38 +epoch: 0|step: 825|ppo_ep: 1|act_loss: -0.005863189697265625|cri_loss: -0.00244140625|unsuper_loss: 0.0 +average reward score: 5.37109375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.45s (20.87%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.38 +epoch: 0|step: 826|ppo_ep: 1|act_loss: -0.01385498046875|cri_loss: -0.006084442138671875|unsuper_loss: 0.0 +average reward score: 5.3203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.70%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39 +epoch: 0|step: 827|ppo_ep: 1|act_loss: -0.01480865478515625|cri_loss: -0.00696563720703125|unsuper_loss: 0.0 +average reward score: 5.6328125 +------------------------------------------------------------------------------------- +|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.63s (70.82%) |Training time=0.45s (19.77%) |Others=0.22 (9.42%)|CurSamplesPerSec=13.94 |AvgSamplesPerSec=14.38 +epoch: 0|step: 828|ppo_ep: 1|act_loss: 0.0229034423828125|cri_loss: 0.01232147216796875|unsuper_loss: 0.0 +average reward score: 6.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.72%) |Training time=0.45s (20.75%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.38 +[2023-04-14 09:18:08,145] [INFO] [logging.py:96:log_dist] [Rank 0] step=830, skipped=11, lr=[9.466316746379131e-06, 9.466316746379131e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:18:08,163] [INFO] [timer.py:199:stop] epoch=0/micro_step=830/global_step=830, RunningAvgSamplesPerSec=107.61573678676814, CurrSamplesPerSec=112.13863919103744, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:18:08,256] [INFO] [logging.py:96:log_dist] [Rank 0] step=830, skipped=12, lr=[4.905090195074332e-06, 4.905090195074332e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 829|ppo_ep: 1|act_loss: -0.01229095458984375|cri_loss: -0.0051116943359375|unsuper_loss: 0.0 +average reward score: 5.21875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.81%) |Training time=0.45s (20.67%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.39 +epoch: 0|step: 830|ppo_ep: 1|act_loss: 0.00736236572265625|cri_loss: 0.0042724609375|unsuper_loss: 0.0 +average reward score: 6.08203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.79%) |Training time=0.45s (20.68%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39 +epoch: 0|step: 831|ppo_ep: 1|act_loss: 0.0172576904296875|cri_loss: 0.01012420654296875|unsuper_loss: 0.0 +average reward score: 5.984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.88%) |Training time=0.45s (20.58%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39 +epoch: 0|step: 832|ppo_ep: 1|act_loss: -0.016693115234375|cri_loss: -0.00738525390625|unsuper_loss: 0.0 +average reward score: 5.65625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.80%) |Training time=0.45s (20.67%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39 +epoch: 0|step: 833|ppo_ep: 1|act_loss: 0.01226806640625|cri_loss: 0.006694793701171875|unsuper_loss: 0.0 +average reward score: 5.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.89%) |Training time=0.45s (20.47%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.39 +epoch: 0|step: 834|ppo_ep: 1|act_loss: 0.035400390625|cri_loss: 0.018402099609375|unsuper_loss: 0.0 +average reward score: 6.38671875 +------------------------------------------------------------------------------------- +|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.74s (76.01%) |Training time=0.45s (19.69%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.96 |AvgSamplesPerSec=14.39 +epoch: 0|step: 835|ppo_ep: 1|act_loss: -0.01434326171875|cri_loss: -0.006683349609375|unsuper_loss: 0.0 +average reward score: 6.0 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.63s (72.83%) |Training time=0.49s (21.85%) |Others=0.12 (5.32%)|CurSamplesPerSec=14.30 |AvgSamplesPerSec=14.39 +epoch: 0|step: 836|ppo_ep: 1|act_loss: -0.04925537109375|cri_loss: -0.023651123046875|unsuper_loss: 0.0 +average reward score: 5.76953125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.86%) |Training time=0.43s (19.59%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.39 +epoch: 0|step: 837|ppo_ep: 1|act_loss: -0.054840087890625|cri_loss: -0.0267333984375|unsuper_loss: 0.0 +average reward score: 6.25390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.11%) |Training time=0.44s (20.33%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.39 +epoch: 0|step: 838|ppo_ep: 1|act_loss: -0.016571044921875|cri_loss: -0.0059051513671875|unsuper_loss: 0.0 +average reward score: 5.93359375 +------------------------------------------------------------------------------------- +|E2E latency=2.45s |Gather latency=0.00s (0.00%) |Generate time=1.62s (66.08%) |Training time=0.45s (18.42%) |Others=0.38 (15.50%)|CurSamplesPerSec=13.05 |AvgSamplesPerSec=14.39 +[2023-04-14 09:18:30,351] [INFO] [logging.py:96:log_dist] [Rank 0] step=840, skipped=11, lr=[9.461205620116899e-06, 9.461205620116899e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:18:30,370] [INFO] [timer.py:199:stop] epoch=0/micro_step=840/global_step=840, RunningAvgSamplesPerSec=107.66078239168438, CurrSamplesPerSec=111.81325820119396, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:18:30,463] [INFO] [logging.py:96:log_dist] [Rank 0] step=840, skipped=12, lr=[4.902445505901531e-06, 4.902445505901531e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 839|ppo_ep: 1|act_loss: -0.023773193359375|cri_loss: -0.0101318359375|unsuper_loss: 0.0 +average reward score: 6.68359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.75%) |Training time=0.45s (20.71%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.39 +epoch: 0|step: 840|ppo_ep: 1|act_loss: -0.01178741455078125|cri_loss: -0.005176544189453125|unsuper_loss: 0.0 +average reward score: 6.53515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.40%) |Training time=0.46s (21.07%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.39 +epoch: 0|step: 841|ppo_ep: 1|act_loss: 0.014923095703125|cri_loss: 0.00815582275390625|unsuper_loss: 0.0 +average reward score: 5.390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.68%) |Training time=0.45s (20.78%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39 +epoch: 0|step: 842|ppo_ep: 1|act_loss: -0.0009603500366210938|cri_loss: 0.0004177093505859375|unsuper_loss: 0.0 +average reward score: 5.6171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.80%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39 +epoch: 0|step: 843|ppo_ep: 1|act_loss: 0.0574951171875|cri_loss: 0.0333251953125|unsuper_loss: 0.0 +average reward score: 5.05078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.71%) |Training time=0.45s (20.76%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39 +epoch: 0|step: 844|ppo_ep: 1|act_loss: -0.001766204833984375|cri_loss: 0.002040863037109375|unsuper_loss: 0.0 +average reward score: 6.13671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.44%) |Training time=0.46s (21.03%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.39 +epoch: 0|step: 845|ppo_ep: 1|act_loss: -0.02178955078125|cri_loss: -0.00971221923828125|unsuper_loss: 0.0 +average reward score: 5.23046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.45s (20.87%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39 +epoch: 0|step: 846|ppo_ep: 1|act_loss: -0.05322265625|cri_loss: -0.024993896484375|unsuper_loss: 0.0 +average reward score: 5.63671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.01%) |Training time=0.44s (20.46%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.39 +epoch: 0|step: 847|ppo_ep: 1|act_loss: -0.031646728515625|cri_loss: -0.01514434814453125|unsuper_loss: 0.0 +average reward score: 5.7109375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.40%) |Training time=0.46s (21.09%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.39 +epoch: 0|step: 848|ppo_ep: 1|act_loss: -0.044219970703125|cri_loss: -0.0208587646484375|unsuper_loss: 0.0 +average reward score: 5.25390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.47%) |Training time=0.46s (21.00%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39 +[2023-04-14 09:18:52,073] [INFO] [logging.py:96:log_dist] [Rank 0] step=850, skipped=11, lr=[9.456025774103137e-06, 9.456025774103137e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:18:52,091] [INFO] [timer.py:199:stop] epoch=0/micro_step=850/global_step=850, RunningAvgSamplesPerSec=107.68979385696973, CurrSamplesPerSec=109.89481785825113, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:18:52,184] [INFO] [logging.py:96:log_dist] [Rank 0] step=850, skipped=12, lr=[4.899765206690747e-06, 4.899765206690747e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 849|ppo_ep: 1|act_loss: 0.01556396484375|cri_loss: 0.0083770751953125|unsuper_loss: 0.0 +average reward score: 5.890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.55%) |Training time=0.45s (20.91%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39 +epoch: 0|step: 850|ppo_ep: 1|act_loss: 0.027984619140625|cri_loss: 0.01593017578125|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.79%) |Training time=0.45s (20.67%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.39 +epoch: 0|step: 851|ppo_ep: 1|act_loss: -0.01447296142578125|cri_loss: -0.00623321533203125|unsuper_loss: 0.0 +average reward score: 5.75390625 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.65s (73.45%) |Training time=0.43s (18.96%) |Others=0.17 (7.60%)|CurSamplesPerSec=14.24 |AvgSamplesPerSec=14.39 +epoch: 0|step: 852|ppo_ep: 1|act_loss: -0.02020263671875|cri_loss: -0.00873565673828125|unsuper_loss: 0.0 +average reward score: 5.43359375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.95%) |Training time=0.44s (20.19%) |Others=0.11 (4.85%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.39 +epoch: 0|step: 853|ppo_ep: 1|act_loss: -0.04766845703125|cri_loss: -0.0230560302734375|unsuper_loss: 0.0 +average reward score: 5.13671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.05%) |Training time=0.44s (20.41%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.39 +epoch: 0|step: 854|ppo_ep: 1|act_loss: -0.03314208984375|cri_loss: -0.0153350830078125|unsuper_loss: 0.0 +average reward score: 5.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.40s |Gather latency=0.00s (0.00%) |Generate time=1.62s (67.54%) |Training time=0.45s (18.83%) |Others=0.33 (13.63%)|CurSamplesPerSec=13.32 |AvgSamplesPerSec=14.39 +epoch: 0|step: 855|ppo_ep: 1|act_loss: 0.0093231201171875|cri_loss: 0.00567626953125|unsuper_loss: 0.0 +average reward score: 5.6171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.58%) |Training time=0.45s (20.90%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39 +epoch: 0|step: 856|ppo_ep: 1|act_loss: -0.0193023681640625|cri_loss: -0.00839996337890625|unsuper_loss: 0.0 +average reward score: 5.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.54%) |Training time=0.45s (20.93%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39 +epoch: 0|step: 857|ppo_ep: 1|act_loss: 0.003940582275390625|cri_loss: 0.003002166748046875|unsuper_loss: 0.0 +average reward score: 5.39453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.66%) |Training time=0.45s (20.80%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.39 +epoch: 0|step: 858|ppo_ep: 1|act_loss: 0.01480865478515625|cri_loss: 0.00885009765625|unsuper_loss: 0.0 +average reward score: 5.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.67%) |Training time=0.45s (20.79%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.39 +[2023-04-14 09:19:14,107] [INFO] [logging.py:96:log_dist] [Rank 0] step=860, skipped=11, lr=[9.450777285115664e-06, 9.450777285115664e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:19:14,125] [INFO] [timer.py:199:stop] epoch=0/micro_step=860/global_step=860, RunningAvgSamplesPerSec=107.74103031128162, CurrSamplesPerSec=110.24423676217167, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:19:14,218] [INFO] [logging.py:96:log_dist] [Rank 0] step=860, skipped=12, lr=[4.897049337170483e-06, 4.897049337170483e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 859|ppo_ep: 1|act_loss: 0.00782012939453125|cri_loss: 0.005535125732421875|unsuper_loss: 0.0 +average reward score: 5.484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.63%) |Training time=0.45s (20.86%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39 +epoch: 0|step: 860|ppo_ep: 1|act_loss: 0.06195068359375|cri_loss: 0.034088134765625|unsuper_loss: 0.0 +average reward score: 5.1875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.87%) |Training time=0.45s (20.58%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39 +epoch: 0|step: 861|ppo_ep: 1|act_loss: -0.027069091796875|cri_loss: -0.012939453125|unsuper_loss: 0.0 +average reward score: 5.078125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.97%) |Training time=0.45s (20.53%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.39 +epoch: 0|step: 862|ppo_ep: 1|act_loss: -0.0242156982421875|cri_loss: -0.010772705078125|unsuper_loss: 0.0 +average reward score: 5.69921875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.53%) |Training time=0.46s (20.97%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.39 +epoch: 0|step: 863|ppo_ep: 1|act_loss: -0.0100555419921875|cri_loss: -0.00096893310546875|unsuper_loss: 0.0 +average reward score: 5.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.65s (73.54%) |Training time=0.49s (22.04%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.30 |AvgSamplesPerSec=14.39 +epoch: 0|step: 864|ppo_ep: 1|act_loss: 0.04193115234375|cri_loss: 0.028228759765625|unsuper_loss: 0.0 +average reward score: 5.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.51%) |Training time=0.45s (20.95%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39 +epoch: 0|step: 865|ppo_ep: 1|act_loss: 0.020294189453125|cri_loss: 0.01458740234375|unsuper_loss: 0.0 +average reward score: 5.43359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.66%) |Training time=0.45s (20.81%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.39 +epoch: 0|step: 866|ppo_ep: 1|act_loss: -0.03167724609375|cri_loss: -0.014373779296875|unsuper_loss: 0.0 +average reward score: 5.01953125 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.82s (77.79%) |Training time=0.42s (17.89%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.39 +epoch: 0|step: 867|ppo_ep: 1|act_loss: 0.0034465789794921875|cri_loss: 0.00566864013671875|unsuper_loss: 0.0 +average reward score: 5.29296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.94%) |Training time=0.45s (20.51%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.39 +epoch: 0|step: 868|ppo_ep: 1|act_loss: 0.0083160400390625|cri_loss: 0.008819580078125|unsuper_loss: 0.0 +average reward score: 5.12890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.58%) |Training time=0.45s (20.89%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.39 +[2023-04-14 09:19:36,087] [INFO] [logging.py:96:log_dist] [Rank 0] step=870, skipped=11, lr=[9.445460230949745e-06, 9.445460230949745e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:19:36,491] [INFO] [timer.py:199:stop] epoch=0/micro_step=870/global_step=870, RunningAvgSamplesPerSec=107.61950179826276, CurrSamplesPerSec=47.67854553463432, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:19:36,584] [INFO] [logging.py:96:log_dist] [Rank 0] step=870, skipped=12, lr=[4.894297937596475e-06, 4.894297937596475e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 869|ppo_ep: 1|act_loss: -0.000762939453125|cri_loss: 0.0043182373046875|unsuper_loss: 0.0 +average reward score: 5.33984375 +------------------------------------------------------------------------------------- +|E2E latency=2.55s |Gather latency=0.00s (0.00%) |Generate time=1.62s (63.49%) |Training time=0.83s (32.66%) |Others=0.10 (3.85%)|CurSamplesPerSec=12.53 |AvgSamplesPerSec=14.39 +epoch: 0|step: 870|ppo_ep: 1|act_loss: 0.06591796875|cri_loss: 0.047149658203125|unsuper_loss: 0.0 +average reward score: 5.08984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.79%) |Training time=0.45s (20.68%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39 +epoch: 0|step: 871|ppo_ep: 1|act_loss: -0.0975341796875|cri_loss: -0.04486083984375|unsuper_loss: 0.0 +average reward score: 5.6953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.50%) |Training time=0.46s (20.98%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.39 +epoch: 0|step: 872|ppo_ep: 1|act_loss: 0.01934814453125|cri_loss: 0.0106658935546875|unsuper_loss: 0.0 +average reward score: 5.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.51%) |Training time=0.46s (20.95%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.39 +epoch: 0|step: 873|ppo_ep: 1|act_loss: 0.0455322265625|cri_loss: 0.02703857421875|unsuper_loss: 0.0 +average reward score: 5.56640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.73%) |Training time=0.45s (20.76%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.39 +epoch: 0|step: 874|ppo_ep: 1|act_loss: 0.01427459716796875|cri_loss: 0.00791168212890625|unsuper_loss: 0.0 +average reward score: 5.6015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.98%) |Training time=0.45s (20.49%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.39 +epoch: 0|step: 875|ppo_ep: 1|act_loss: -0.04949951171875|cri_loss: -0.0236358642578125|unsuper_loss: 0.0 +average reward score: 5.3671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.65%) |Training time=0.45s (20.81%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.39 +epoch: 0|step: 876|ppo_ep: 1|act_loss: -0.004947662353515625|cri_loss: -0.00028228759765625|unsuper_loss: 0.0 +average reward score: 5.3359375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.73%) |Training time=0.45s (20.76%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.39 +epoch: 0|step: 877|ppo_ep: 1|act_loss: 0.0782470703125|cri_loss: 0.044403076171875|unsuper_loss: 0.0 +average reward score: 5.5234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.86%) |Training time=0.45s (20.61%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.39 +epoch: 0|step: 878|ppo_ep: 1|act_loss: 0.06304931640625|cri_loss: 0.037445068359375|unsuper_loss: 0.0 +average reward score: 5.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.64%) |Training time=0.45s (20.83%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.39 +[2023-04-14 09:19:58,214] [INFO] [logging.py:96:log_dist] [Rank 0] step=880, skipped=11, lr=[9.440074690416949e-06, 9.440074690416949e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:19:58,232] [INFO] [timer.py:199:stop] epoch=0/micro_step=880/global_step=880, RunningAvgSamplesPerSec=107.65370035299243, CurrSamplesPerSec=109.40803398863513, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:19:58,325] [INFO] [logging.py:96:log_dist] [Rank 0] step=880, skipped=12, lr=[4.891511048751102e-06, 4.891511048751102e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 879|ppo_ep: 1|act_loss: -0.0906982421875|cri_loss: -0.03826904296875|unsuper_loss: 0.0 +average reward score: 5.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.55%) |Training time=0.46s (20.94%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.39 +[2023-04-14 09:20:00,493] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 880|ppo_ep: 1|act_loss: 0.0443115234375|cri_loss: 0.02581787109375|unsuper_loss: 0.0 +average reward score: 5.79296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.84%) |Training time=0.46s (21.08%) |Others=0.09 (4.09%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.39 +[2023-04-14 09:20:02,656] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 881|ppo_ep: 1|act_loss: -0.0081787109375|cri_loss: -5.340576171875e-05|unsuper_loss: 0.0 +average reward score: 5.12890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.09%) |Training time=0.43s (19.84%) |Others=0.09 (4.07%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.39 +epoch: 0|step: 882|ppo_ep: 1|act_loss: -0.0518798828125|cri_loss: -0.01336669921875|unsuper_loss: 0.0 +average reward score: 5.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.22%) |Training time=0.43s (18.55%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.39 +epoch: 0|step: 883|ppo_ep: 1|act_loss: -0.050872802734375|cri_loss: -0.01409912109375|unsuper_loss: 0.0 +average reward score: 5.2734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.36%) |Training time=0.46s (21.03%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.39 +epoch: 0|step: 884|ppo_ep: 1|act_loss: 0.0120086669921875|cri_loss: 0.01505279541015625|unsuper_loss: 0.0 +average reward score: 5.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.54%) |Training time=0.45s (20.91%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.39 +epoch: 0|step: 885|ppo_ep: 1|act_loss: 0.0875244140625|cri_loss: 0.05120849609375|unsuper_loss: 0.0 +average reward score: 4.94140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.13%) |Training time=0.44s (20.34%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.40 +[2023-04-14 09:20:13,670] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384 +epoch: 0|step: 886|ppo_ep: 1|act_loss: -0.1021728515625|cri_loss: -0.035919189453125|unsuper_loss: 0.0 +average reward score: 5.3671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.50%) |Training time=0.44s (20.34%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.40 +epoch: 0|step: 887|ppo_ep: 1|act_loss: -0.0577392578125|cri_loss: -0.0184173583984375|unsuper_loss: 0.0 +average reward score: 5.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.99%) |Training time=0.45s (20.47%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.40 +epoch: 0|step: 888|ppo_ep: 1|act_loss: -0.0115966796875|cri_loss: 0.014251708984375|unsuper_loss: 0.0 +average reward score: 5.46484375 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.65s (71.35%) |Training time=0.44s (18.94%) |Others=0.22 (9.70%)|CurSamplesPerSec=13.82 |AvgSamplesPerSec=14.40 +[2023-04-14 09:20:20,226] [INFO] [logging.py:96:log_dist] [Rank 0] step=890, skipped=11, lr=[9.43462074334398e-06, 9.43462074334398e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:20:20,244] [INFO] [timer.py:199:stop] epoch=0/micro_step=890/global_step=890, RunningAvgSamplesPerSec=107.7239576261761, CurrSamplesPerSec=116.03634172425211, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:20:20,337] [INFO] [logging.py:96:log_dist] [Rank 0] step=890, skipped=15, lr=[4.889539132542428e-06, 4.889539132542428e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 889|ppo_ep: 1|act_loss: 0.06048583984375|cri_loss: 0.04217529296875|unsuper_loss: 0.0 +average reward score: 5.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.33%) |Training time=0.44s (20.16%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.40 +[2023-04-14 09:20:22,384] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 890|ppo_ep: 1|act_loss: -0.027069091796875|cri_loss: 0.01715087890625|unsuper_loss: 0.0 +average reward score: 6.2734375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.25%) |Training time=0.41s (19.15%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.40 +epoch: 0|step: 891|ppo_ep: 1|act_loss: -0.1314697265625|cri_loss: -0.045501708984375|unsuper_loss: 0.0 +average reward score: 5.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.14%) |Training time=0.44s (20.33%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.40 +epoch: 0|step: 892|ppo_ep: 1|act_loss: -0.0782470703125|cri_loss: -0.028076171875|unsuper_loss: 0.0 +average reward score: 5.4453125 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.67s (74.39%) |Training time=0.44s (19.66%) |Others=0.13 (5.96%)|CurSamplesPerSec=14.28 |AvgSamplesPerSec=14.40 +epoch: 0|step: 893|ppo_ep: 1|act_loss: -0.04217529296875|cri_loss: 0.0130615234375|unsuper_loss: 0.0 +average reward score: 5.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.70s (76.07%) |Training time=0.44s (19.51%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.32 |AvgSamplesPerSec=14.40 +epoch: 0|step: 894|ppo_ep: 1|act_loss: 0.11956787109375|cri_loss: 0.0692138671875|unsuper_loss: 0.0 +average reward score: 5.44140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.30%) |Training time=0.44s (20.15%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.40 +epoch: 0|step: 895|ppo_ep: 1|act_loss: 0.0298919677734375|cri_loss: 0.025909423828125|unsuper_loss: 0.0 +average reward score: 5.58984375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.14%) |Training time=0.44s (20.33%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.40 +epoch: 0|step: 896|ppo_ep: 1|act_loss: 0.169189453125|cri_loss: 0.094482421875|unsuper_loss: 0.0 +average reward score: 5.49609375 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.81s (77.34%) |Training time=0.43s (18.45%) |Others=0.10 (4.21%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.40 +[2023-04-14 09:20:37,862] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096 +epoch: 0|step: 897|ppo_ep: 1|act_loss: 0.0097503662109375|cri_loss: 0.008026123046875|unsuper_loss: 0.0 +average reward score: 5.234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.43%) |Training time=0.41s (19.00%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.40 +epoch: 0|step: 898|ppo_ep: 1|act_loss: -0.071533203125|cri_loss: -0.0288238525390625|unsuper_loss: 0.0 +average reward score: 5.23828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.79%) |Training time=0.43s (19.67%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.40 +[2023-04-14 09:20:42,190] [INFO] [logging.py:96:log_dist] [Rank 0] step=900, skipped=13, lr=[9.43020838726756e-06, 9.43020838726756e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:20:42,208] [INFO] [timer.py:199:stop] epoch=0/micro_step=900/global_step=900, RunningAvgSamplesPerSec=107.83954100227736, CurrSamplesPerSec=118.38544986262222, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:20:42,302] [INFO] [logging.py:96:log_dist] [Rank 0] step=900, skipped=15, lr=[4.886692007019939e-06, 4.886692007019939e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 899|ppo_ep: 1|act_loss: -0.032318115234375|cri_loss: -0.01042938232421875|unsuper_loss: 0.0 +average reward score: 5.43359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.51%) |Training time=0.43s (19.90%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.40 +epoch: 0|step: 900|ppo_ep: 1|act_loss: -0.1773681640625|cri_loss: -0.0633544921875|unsuper_loss: 0.0 +average reward score: 5.58984375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.36%) |Training time=0.44s (20.11%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.40 +epoch: 0|step: 901|ppo_ep: 1|act_loss: -0.02392578125|cri_loss: 0.00042724609375|unsuper_loss: 0.0 +average reward score: 5.8125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.78%) |Training time=0.43s (19.68%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.40 +epoch: 0|step: 902|ppo_ep: 1|act_loss: 0.1036376953125|cri_loss: 0.0618896484375|unsuper_loss: 0.0 +average reward score: 5.5234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.45%) |Training time=0.43s (20.00%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.40 +epoch: 0|step: 903|ppo_ep: 1|act_loss: 0.25048828125|cri_loss: 0.17138671875|unsuper_loss: 0.0 +average reward score: 5.203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.30%) |Training time=0.44s (20.15%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.40 +epoch: 0|step: 904|ppo_ep: 1|act_loss: 0.1358642578125|cri_loss: 0.0823974609375|unsuper_loss: 0.0 +average reward score: 5.6796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.49%) |Training time=0.43s (19.97%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.40 +epoch: 0|step: 905|ppo_ep: 1|act_loss: -0.014801025390625|cri_loss: 0.017059326171875|unsuper_loss: 0.0 +average reward score: 6.02734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.28%) |Training time=0.44s (20.18%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.40 +epoch: 0|step: 906|ppo_ep: 1|act_loss: -0.05535888671875|cri_loss: -0.0138092041015625|unsuper_loss: 0.0 +average reward score: 5.35546875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.49%) |Training time=0.43s (19.97%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.40 +epoch: 0|step: 907|ppo_ep: 1|act_loss: -0.06268310546875|cri_loss: -0.0184478759765625|unsuper_loss: 0.0 +average reward score: 6.30859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.26%) |Training time=0.44s (20.21%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.40 +epoch: 0|step: 908|ppo_ep: 1|act_loss: 0.07330322265625|cri_loss: 0.0513916015625|unsuper_loss: 0.0 +average reward score: 5.71875 +------------------------------------------------------------------------------------- +|E2E latency=2.40s |Gather latency=0.00s (0.00%) |Generate time=1.64s (68.21%) |Training time=0.44s (18.19%) |Others=0.33 (13.60%)|CurSamplesPerSec=13.34 |AvgSamplesPerSec=14.40 +[2023-04-14 09:21:04,148] [INFO] [logging.py:96:log_dist] [Rank 0] step=910, skipped=13, lr=[9.424631512821333e-06, 9.424631512821333e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:21:04,166] [INFO] [timer.py:199:stop] epoch=0/micro_step=910/global_step=910, RunningAvgSamplesPerSec=107.93576925943141, CurrSamplesPerSec=117.37038319604036, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:21:04,259] [INFO] [logging.py:96:log_dist] [Rank 0] step=910, skipped=15, lr=[4.883809504964325e-06, 4.883809504964325e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 909|ppo_ep: 1|act_loss: -0.1029052734375|cri_loss: -0.020233154296875|unsuper_loss: 0.0 +average reward score: 5.65234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.45%) |Training time=0.44s (20.03%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.40 +epoch: 0|step: 910|ppo_ep: 1|act_loss: 0.165283203125|cri_loss: 0.11480712890625|unsuper_loss: 0.0 +average reward score: 5.48046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.15%) |Training time=0.44s (20.32%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.40 +epoch: 0|step: 911|ppo_ep: 1|act_loss: 0.1444091796875|cri_loss: 0.0936279296875|unsuper_loss: 0.0 +average reward score: 5.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.82s (77.78%) |Training time=0.42s (17.99%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.65 |AvgSamplesPerSec=14.40 +epoch: 0|step: 912|ppo_ep: 1|act_loss: 0.0100555419921875|cri_loss: 0.03411865234375|unsuper_loss: 0.0 +average reward score: 5.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.38%) |Training time=0.44s (20.09%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.40 +epoch: 0|step: 913|ppo_ep: 1|act_loss: 0.0283355712890625|cri_loss: 0.022918701171875|unsuper_loss: 0.0 +average reward score: 5.59375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.51%) |Training time=0.46s (20.97%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.40 +[2023-04-14 09:21:15,175] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 4096, reducing to 2048 +epoch: 0|step: 914|ppo_ep: 1|act_loss: -0.191162109375|cri_loss: -0.0084228515625|unsuper_loss: 0.0 +average reward score: 5.13671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.52%) |Training time=0.43s (19.88%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.40 +[2023-04-14 09:21:17,321] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 2048, reducing to 1024 +epoch: 0|step: 915|ppo_ep: 1|act_loss: 0.1402587890625|cri_loss: 0.149658203125|unsuper_loss: 0.0 +average reward score: 5.3359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.54%) |Training time=0.43s (19.88%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.40 +epoch: 0|step: 916|ppo_ep: 1|act_loss: -0.060089111328125|cri_loss: -0.01513671875|unsuper_loss: 0.0 +average reward score: 4.9765625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.59%) |Training time=0.46s (20.92%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.40 +epoch: 0|step: 917|ppo_ep: 1|act_loss: 0.202880859375|cri_loss: 0.1185302734375|unsuper_loss: 0.0 +average reward score: 5.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.68%) |Training time=0.45s (20.78%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.40 +epoch: 0|step: 918|ppo_ep: 1|act_loss: 0.038116455078125|cri_loss: 0.027862548828125|unsuper_loss: 0.0 +average reward score: 5.015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.59%) |Training time=0.45s (20.88%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.40 +[2023-04-14 09:21:26,014] [INFO] [logging.py:96:log_dist] [Rank 0] step=920, skipped=15, lr=[9.420120921365356e-06, 9.420120921365356e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:21:26,032] [INFO] [timer.py:199:stop] epoch=0/micro_step=920/global_step=920, RunningAvgSamplesPerSec=108.00241603015057, CurrSamplesPerSec=111.10003311039004, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:21:26,125] [INFO] [logging.py:96:log_dist] [Rank 0] step=920, skipped=15, lr=[4.880891669101221e-06, 4.880891669101221e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 919|ppo_ep: 1|act_loss: -0.0063018798828125|cri_loss: 0.0113983154296875|unsuper_loss: 0.0 +average reward score: 5.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.71%) |Training time=0.45s (20.75%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.40 +epoch: 0|step: 920|ppo_ep: 1|act_loss: 0.07421875|cri_loss: 0.04022216796875|unsuper_loss: 0.0 +average reward score: 5.41015625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.53%) |Training time=0.46s (20.94%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.40 +epoch: 0|step: 921|ppo_ep: 1|act_loss: 0.251953125|cri_loss: 0.1390380859375|unsuper_loss: 0.0 +average reward score: 4.82421875 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.66s (74.98%) |Training time=0.45s (20.53%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.40 +epoch: 0|step: 922|ppo_ep: 1|act_loss: 0.0997314453125|cri_loss: 0.060546875|unsuper_loss: 0.0 +average reward score: 5.390625 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.73s (75.79%) |Training time=0.45s (19.83%) |Others=0.10 (4.38%)|CurSamplesPerSec=14.05 |AvgSamplesPerSec=14.40 +epoch: 0|step: 923|ppo_ep: 1|act_loss: 0.0804443359375|cri_loss: 0.051025390625|unsuper_loss: 0.0 +average reward score: 5.59765625 +------------------------------------------------------------------------------------- +|E2E latency=2.73s |Gather latency=0.00s (0.00%) |Generate time=1.63s (59.51%) |Training time=0.45s (16.48%) |Others=0.66 (24.01%)|CurSamplesPerSec=11.71 |AvgSamplesPerSec=14.40 +epoch: 0|step: 924|ppo_ep: 1|act_loss: -0.0679931640625|cri_loss: -0.022064208984375|unsuper_loss: 0.0 +average reward score: 5.72265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.62%) |Training time=0.45s (20.85%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.40 +epoch: 0|step: 925|ppo_ep: 1|act_loss: -0.064208984375|cri_loss: -0.0222625732421875|unsuper_loss: 0.0 +average reward score: 5.40625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.48%) |Training time=0.46s (20.98%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.40 +epoch: 0|step: 926|ppo_ep: 1|act_loss: -0.00994873046875|cri_loss: 0.00093841552734375|unsuper_loss: 0.0 +average reward score: 6.06640625 +------------------------------------------------------------------------------------- +|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.81s (76.77%) |Training time=0.45s (19.00%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.61 |AvgSamplesPerSec=14.40 +epoch: 0|step: 927|ppo_ep: 1|act_loss: -0.01502227783203125|cri_loss: -0.00019073486328125|unsuper_loss: 0.0 +average reward score: 5.6796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.46%) |Training time=0.43s (19.95%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.40 +epoch: 0|step: 928|ppo_ep: 1|act_loss: -0.0238800048828125|cri_loss: -0.0068359375|unsuper_loss: 0.0 +average reward score: 5.67578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.31%) |Training time=0.43s (20.13%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.40 +[2023-04-14 09:21:48,591] [INFO] [logging.py:96:log_dist] [Rank 0] step=930, skipped=15, lr=[9.414421387372385e-06, 9.414421387372385e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:21:48,609] [INFO] [timer.py:199:stop] epoch=0/micro_step=930/global_step=930, RunningAvgSamplesPerSec=108.04897948065681, CurrSamplesPerSec=117.29724807363407, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:21:48,702] [INFO] [logging.py:96:log_dist] [Rank 0] step=930, skipped=15, lr=[4.877938542679992e-06, 4.877938542679992e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 929|ppo_ep: 1|act_loss: -0.038818359375|cri_loss: -0.0157012939453125|unsuper_loss: 0.0 +average reward score: 5.97265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.26%) |Training time=0.44s (20.20%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.40 +epoch: 0|step: 930|ppo_ep: 1|act_loss: 0.05596923828125|cri_loss: 0.030120849609375|unsuper_loss: 0.0 +average reward score: 5.47265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.93%) |Training time=0.44s (20.53%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.40 +epoch: 0|step: 931|ppo_ep: 1|act_loss: 0.02618408203125|cri_loss: 0.01433563232421875|unsuper_loss: 0.0 +average reward score: 5.89453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.10%) |Training time=0.44s (20.36%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.40 +epoch: 0|step: 932|ppo_ep: 1|act_loss: 0.002109527587890625|cri_loss: 0.001583099365234375|unsuper_loss: 0.0 +average reward score: 5.8515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.03%) |Training time=0.44s (20.42%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.40 +epoch: 0|step: 933|ppo_ep: 1|act_loss: 0.03338623046875|cri_loss: 0.0186920166015625|unsuper_loss: 0.0 +average reward score: 5.828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.77%) |Training time=0.45s (20.69%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.40 +epoch: 0|step: 934|ppo_ep: 1|act_loss: 0.0928955078125|cri_loss: 0.05352783203125|unsuper_loss: 0.0 +average reward score: 5.23828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.92%) |Training time=0.45s (20.54%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.40 +epoch: 0|step: 935|ppo_ep: 1|act_loss: 0.0657958984375|cri_loss: 0.034393310546875|unsuper_loss: 0.0 +average reward score: 4.85546875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.11%) |Training time=0.44s (20.34%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.40 +epoch: 0|step: 936|ppo_ep: 1|act_loss: 0.0772705078125|cri_loss: 0.042449951171875|unsuper_loss: 0.0 +average reward score: 5.3515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.01%) |Training time=0.44s (20.45%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.40 +epoch: 0|step: 937|ppo_ep: 1|act_loss: -0.01788330078125|cri_loss: -0.008026123046875|unsuper_loss: 0.0 +average reward score: 5.3515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.80%) |Training time=0.45s (20.65%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.40 +epoch: 0|step: 938|ppo_ep: 1|act_loss: -0.053955078125|cri_loss: -0.024993896484375|unsuper_loss: 0.0 +average reward score: 5.55859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.99%) |Training time=0.44s (20.46%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.40 +[2023-04-14 09:22:10,262] [INFO] [logging.py:96:log_dist] [Rank 0] step=940, skipped=15, lr=[9.408653827083077e-06, 9.408653827083077e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:22:10,280] [INFO] [timer.py:199:stop] epoch=0/micro_step=940/global_step=940, RunningAvgSamplesPerSec=108.10580988208648, CurrSamplesPerSec=113.72648917961666, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:22:10,374] [INFO] [logging.py:96:log_dist] [Rank 0] step=940, skipped=15, lr=[4.874950169473097e-06, 4.874950169473097e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 939|ppo_ep: 1|act_loss: -0.029998779296875|cri_loss: -0.0115203857421875|unsuper_loss: 0.0 +average reward score: 5.453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.93%) |Training time=0.44s (20.46%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.40 +epoch: 0|step: 940|ppo_ep: 1|act_loss: -0.0288848876953125|cri_loss: -0.0118560791015625|unsuper_loss: 0.0 +average reward score: 5.890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.74%) |Training time=0.45s (20.72%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.40 +epoch: 0|step: 941|ppo_ep: 1|act_loss: -0.04986572265625|cri_loss: -0.02099609375|unsuper_loss: 0.0 +average reward score: 5.5234375 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.98%) |Training time=0.44s (18.78%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.40 +epoch: 0|step: 942|ppo_ep: 1|act_loss: -0.00787353515625|cri_loss: -0.002758026123046875|unsuper_loss: 0.0 +average reward score: 5.05859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.30%) |Training time=0.44s (20.16%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.40 +epoch: 0|step: 943|ppo_ep: 1|act_loss: -0.0435791015625|cri_loss: -0.0201873779296875|unsuper_loss: 0.0 +average reward score: 6.0078125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.14%) |Training time=0.44s (20.34%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.40 +epoch: 0|step: 944|ppo_ep: 1|act_loss: 0.05010986328125|cri_loss: 0.0257720947265625|unsuper_loss: 0.0 +average reward score: 4.5703125 +------------------------------------------------------------------------------------- +|E2E latency=2.45s |Gather latency=0.00s (0.00%) |Generate time=1.62s (66.19%) |Training time=0.44s (18.12%) |Others=0.38 (15.68%)|CurSamplesPerSec=13.04 |AvgSamplesPerSec=14.40 +epoch: 0|step: 945|ppo_ep: 1|act_loss: 0.0153350830078125|cri_loss: 0.00891876220703125|unsuper_loss: 0.0 +average reward score: 4.94140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.84%) |Training time=0.45s (20.62%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.40 +epoch: 0|step: 946|ppo_ep: 1|act_loss: 0.058349609375|cri_loss: 0.033203125|unsuper_loss: 0.0 +average reward score: 4.88671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.88%) |Training time=0.45s (20.58%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.40 +epoch: 0|step: 947|ppo_ep: 1|act_loss: 0.108642578125|cri_loss: 0.0631103515625|unsuper_loss: 0.0 +average reward score: 4.83203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.35%) |Training time=0.43s (20.08%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.40 +epoch: 0|step: 948|ppo_ep: 1|act_loss: 0.010772705078125|cri_loss: 0.00787353515625|unsuper_loss: 0.0 +average reward score: 4.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.70%) |Training time=0.43s (19.76%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.40 +[2023-04-14 09:22:32,403] [INFO] [logging.py:96:log_dist] [Rank 0] step=950, skipped=15, lr=[9.402818325986586e-06, 9.402818325986586e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:22:32,421] [INFO] [timer.py:199:stop] epoch=0/micro_step=950/global_step=950, RunningAvgSamplesPerSec=108.17638441637497, CurrSamplesPerSec=115.0792441441233, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:22:32,514] [INFO] [logging.py:96:log_dist] [Rank 0] step=950, skipped=15, lr=[4.871926593775433e-06, 4.871926593775433e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 949|ppo_ep: 1|act_loss: -0.05145263671875|cri_loss: -0.0225830078125|unsuper_loss: 0.0 +average reward score: 4.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.19%) |Training time=0.44s (20.27%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.40 +epoch: 0|step: 950|ppo_ep: 1|act_loss: 0.0074462890625|cri_loss: 0.004741668701171875|unsuper_loss: 0.0 +average reward score: 4.765625 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.64s (70.07%) |Training time=0.46s (19.71%) |Others=0.24 (10.22%)|CurSamplesPerSec=13.65 |AvgSamplesPerSec=14.40 +epoch: 0|step: 951|ppo_ep: 1|act_loss: -0.062255859375|cri_loss: -0.0283966064453125|unsuper_loss: 0.0 +average reward score: 4.30859375 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.73s (76.23%) |Training time=0.44s (19.42%) |Others=0.10 (4.35%)|CurSamplesPerSec=14.10 |AvgSamplesPerSec=14.40 +epoch: 0|step: 952|ppo_ep: 1|act_loss: -0.0128021240234375|cri_loss: -0.003631591796875|unsuper_loss: 0.0 +average reward score: 4.62890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.73%) |Training time=0.45s (20.74%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.40 +epoch: 0|step: 953|ppo_ep: 1|act_loss: -0.0440673828125|cri_loss: -0.0195465087890625|unsuper_loss: 0.0 +average reward score: 4.96484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.51%) |Training time=0.46s (20.96%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.40 +epoch: 0|step: 954|ppo_ep: 1|act_loss: -0.02337646484375|cri_loss: -0.0098724365234375|unsuper_loss: 0.0 +average reward score: 4.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.07%) |Training time=0.44s (20.36%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.40 +epoch: 0|step: 955|ppo_ep: 1|act_loss: 0.00646209716796875|cri_loss: 0.005374908447265625|unsuper_loss: 0.0 +average reward score: 4.6484375 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.64s (72.41%) |Training time=0.44s (19.38%) |Others=0.19 (8.21%)|CurSamplesPerSec=14.15 |AvgSamplesPerSec=14.40 +epoch: 0|step: 956|ppo_ep: 1|act_loss: -0.00202178955078125|cri_loss: 0.002307891845703125|unsuper_loss: 0.0 +average reward score: 4.7109375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.90%) |Training time=0.43s (19.56%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.40 +epoch: 0|step: 957|ppo_ep: 1|act_loss: 0.0703125|cri_loss: 0.038421630859375|unsuper_loss: 0.0 +average reward score: 4.63671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.01%) |Training time=0.42s (19.45%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.40 +epoch: 0|step: 958|ppo_ep: 1|act_loss: 0.0372314453125|cri_loss: 0.0194244384765625|unsuper_loss: 0.0 +average reward score: 4.56640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.19%) |Training time=0.42s (19.25%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.40 +[2023-04-14 09:22:54,486] [INFO] [logging.py:96:log_dist] [Rank 0] step=960, skipped=15, lr=[9.396914970579121e-06, 9.396914970579121e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:22:54,504] [INFO] [timer.py:199:stop] epoch=0/micro_step=960/global_step=960, RunningAvgSamplesPerSec=108.26503201681297, CurrSamplesPerSec=133.41457227490508, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:22:54,597] [INFO] [logging.py:96:log_dist] [Rank 0] step=960, skipped=15, lr=[4.86886786040369e-06, 4.86886786040369e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 959|ppo_ep: 1|act_loss: 0.00933074951171875|cri_loss: 0.005535125732421875|unsuper_loss: 0.0 +average reward score: 4.99609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.89%) |Training time=0.40s (18.57%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.40 +epoch: 0|step: 960|ppo_ep: 1|act_loss: 0.065673828125|cri_loss: 0.034149169921875|unsuper_loss: 0.0 +average reward score: 5.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.00%) |Training time=0.45s (20.47%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.40 +epoch: 0|step: 961|ppo_ep: 1|act_loss: 0.04327392578125|cri_loss: 0.023590087890625|unsuper_loss: 0.0 +average reward score: 4.6796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.43%) |Training time=0.43s (20.02%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.41 +epoch: 0|step: 962|ppo_ep: 1|act_loss: -0.0791015625|cri_loss: -0.03839111328125|unsuper_loss: 0.0 +average reward score: 5.015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.31%) |Training time=0.44s (20.15%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41 +epoch: 0|step: 963|ppo_ep: 1|act_loss: 0.05120849609375|cri_loss: 0.027435302734375|unsuper_loss: 0.0 +average reward score: 5.18359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.79%) |Training time=0.43s (19.67%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41 +epoch: 0|step: 964|ppo_ep: 1|act_loss: 0.0233154296875|cri_loss: 0.014129638671875|unsuper_loss: 0.0 +average reward score: 5.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.50%) |Training time=0.43s (19.95%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41 +epoch: 0|step: 965|ppo_ep: 1|act_loss: 0.08660888671875|cri_loss: 0.04522705078125|unsuper_loss: 0.0 +average reward score: 4.3359375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.68%) |Training time=0.45s (20.78%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.41 +epoch: 0|step: 966|ppo_ep: 1|act_loss: -0.00670623779296875|cri_loss: -0.0023937225341796875|unsuper_loss: 0.0 +average reward score: 5.03125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.71%) |Training time=0.45s (20.76%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.41 +epoch: 0|step: 967|ppo_ep: 1|act_loss: -0.03643798828125|cri_loss: -0.016998291015625|unsuper_loss: 0.0 +average reward score: 5.109375 +------------------------------------------------------------------------------------- +|E2E latency=2.44s |Gather latency=0.00s (0.00%) |Generate time=1.63s (66.53%) |Training time=0.45s (18.34%) |Others=0.37 (15.13%)|CurSamplesPerSec=13.09 |AvgSamplesPerSec=14.41 +epoch: 0|step: 968|ppo_ep: 1|act_loss: -0.0234375|cri_loss: -0.0086212158203125|unsuper_loss: 0.0 +average reward score: 5.3125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.90%) |Training time=0.44s (20.54%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41 +[2023-04-14 09:23:16,482] [INFO] [logging.py:96:log_dist] [Rank 0] step=970, skipped=15, lr=[9.390943848362648e-06, 9.390943848362648e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:23:16,499] [INFO] [timer.py:199:stop] epoch=0/micro_step=970/global_step=970, RunningAvgSamplesPerSec=108.32570871091323, CurrSamplesPerSec=111.58485938589877, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:23:16,592] [INFO] [logging.py:96:log_dist] [Rank 0] step=970, skipped=15, lr=[4.8657740146956724e-06, 4.8657740146956724e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 969|ppo_ep: 1|act_loss: -0.00354766845703125|cri_loss: -0.0011920928955078125|unsuper_loss: 0.0 +average reward score: 5.51171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.65%) |Training time=0.45s (20.83%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41 +epoch: 0|step: 970|ppo_ep: 1|act_loss: -0.0154571533203125|cri_loss: -0.00266265869140625|unsuper_loss: 0.0 +average reward score: 5.08984375 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.54%) |Training time=0.46s (20.98%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.46 |AvgSamplesPerSec=14.41 +epoch: 0|step: 971|ppo_ep: 1|act_loss: -0.0240478515625|cri_loss: -0.01140594482421875|unsuper_loss: 0.0 +average reward score: 5.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.38%) |Training time=0.41s (19.03%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41 +epoch: 0|step: 972|ppo_ep: 1|act_loss: -0.014373779296875|cri_loss: -0.00548553466796875|unsuper_loss: 0.0 +average reward score: 5.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.47%) |Training time=0.43s (19.96%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.41 +epoch: 0|step: 973|ppo_ep: 1|act_loss: -0.0204620361328125|cri_loss: -0.00933837890625|unsuper_loss: 0.0 +average reward score: 6.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.12%) |Training time=0.44s (20.34%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41 +epoch: 0|step: 974|ppo_ep: 1|act_loss: 0.0244140625|cri_loss: 0.0131072998046875|unsuper_loss: 0.0 +average reward score: 5.09765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.16%) |Training time=0.44s (20.33%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41 +epoch: 0|step: 975|ppo_ep: 1|act_loss: 0.022674560546875|cri_loss: 0.013916015625|unsuper_loss: 0.0 +average reward score: 6.2734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.12%) |Training time=0.44s (20.33%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41 +epoch: 0|step: 976|ppo_ep: 1|act_loss: -0.0102996826171875|cri_loss: -0.003696441650390625|unsuper_loss: 0.0 +average reward score: 6.1328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.92%) |Training time=0.45s (20.54%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41 +epoch: 0|step: 977|ppo_ep: 1|act_loss: -0.0123748779296875|cri_loss: -0.00479888916015625|unsuper_loss: 0.0 +average reward score: 5.69140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.66%) |Training time=0.45s (20.81%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41 +epoch: 0|step: 978|ppo_ep: 1|act_loss: -0.020172119140625|cri_loss: -0.00809478759765625|unsuper_loss: 0.0 +average reward score: 5.91015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.85%) |Training time=0.45s (20.62%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41 +[2023-04-14 09:23:38,256] [INFO] [logging.py:96:log_dist] [Rank 0] step=980, skipped=15, lr=[9.384905047843602e-06, 9.384905047843602e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:23:38,616] [INFO] [timer.py:199:stop] epoch=0/micro_step=980/global_step=980, RunningAvgSamplesPerSec=108.26986922621812, CurrSamplesPerSec=51.679806183969674, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:23:38,710] [INFO] [logging.py:96:log_dist] [Rank 0] step=980, skipped=15, lr=[4.862645102509638e-06, 4.862645102509638e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 979|ppo_ep: 1|act_loss: -0.041595458984375|cri_loss: -0.0178680419921875|unsuper_loss: 0.0 +average reward score: 5.859375 +------------------------------------------------------------------------------------- +|E2E latency=2.53s |Gather latency=0.00s (0.00%) |Generate time=1.64s (64.81%) |Training time=0.79s (31.28%) |Others=0.10 (3.91%)|CurSamplesPerSec=12.67 |AvgSamplesPerSec=14.41 +epoch: 0|step: 980|ppo_ep: 1|act_loss: 0.01123046875|cri_loss: 0.0067138671875|unsuper_loss: 0.0 +average reward score: 6.11328125 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.73s (75.98%) |Training time=0.45s (19.68%) |Others=0.10 (4.34%)|CurSamplesPerSec=14.08 |AvgSamplesPerSec=14.41 +epoch: 0|step: 981|ppo_ep: 1|act_loss: 0.071533203125|cri_loss: 0.037384033203125|unsuper_loss: 0.0 +average reward score: 5.9609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.54%) |Training time=0.45s (20.92%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41 +epoch: 0|step: 982|ppo_ep: 1|act_loss: -0.0034942626953125|cri_loss: -8.7738037109375e-05|unsuper_loss: 0.0 +average reward score: 5.921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.84%) |Training time=0.45s (20.61%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41 +epoch: 0|step: 983|ppo_ep: 1|act_loss: -0.00067138671875|cri_loss: 3.4809112548828125e-05|unsuper_loss: 0.0 +average reward score: 5.76953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.72%) |Training time=0.45s (20.74%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41 +epoch: 0|step: 984|ppo_ep: 1|act_loss: -0.009124755859375|cri_loss: -0.004352569580078125|unsuper_loss: 0.0 +average reward score: 6.12890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.54%) |Training time=0.45s (20.91%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41 +epoch: 0|step: 985|ppo_ep: 1|act_loss: -0.0148773193359375|cri_loss: -0.00699615478515625|unsuper_loss: 0.0 +average reward score: 5.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.00%) |Training time=0.44s (18.75%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.41 +epoch: 0|step: 986|ppo_ep: 1|act_loss: -0.018524169921875|cri_loss: -0.0087890625|unsuper_loss: 0.0 +average reward score: 5.6640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.38%) |Training time=0.44s (20.06%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41 +epoch: 0|step: 987|ppo_ep: 1|act_loss: 0.069580078125|cri_loss: 0.036865234375|unsuper_loss: 0.0 +average reward score: 5.77734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.34%) |Training time=0.44s (20.12%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41 +epoch: 0|step: 988|ppo_ep: 1|act_loss: 0.01471710205078125|cri_loss: 0.00804901123046875|unsuper_loss: 0.0 +average reward score: 6.09375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.91%) |Training time=0.45s (20.53%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41 +[2023-04-14 09:24:00,562] [INFO] [logging.py:96:log_dist] [Rank 0] step=990, skipped=15, lr=[9.378798658531574e-06, 9.378798658531574e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:24:00,580] [INFO] [timer.py:199:stop] epoch=0/micro_step=990/global_step=990, RunningAvgSamplesPerSec=108.31861477028136, CurrSamplesPerSec=114.56618951285925, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:24:00,673] [INFO] [logging.py:96:log_dist] [Rank 0] step=990, skipped=15, lr=[4.859481170223614e-06, 4.859481170223614e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 989|ppo_ep: 1|act_loss: 0.0231475830078125|cri_loss: 0.0121002197265625|unsuper_loss: 0.0 +average reward score: 6.42578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.06%) |Training time=0.44s (20.41%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 990|ppo_ep: 1|act_loss: -0.007503509521484375|cri_loss: -0.003055572509765625|unsuper_loss: 0.0 +average reward score: 6.16015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.04%) |Training time=0.44s (20.39%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41 +epoch: 0|step: 991|ppo_ep: 1|act_loss: 0.0267181396484375|cri_loss: 0.01448822021484375|unsuper_loss: 0.0 +average reward score: 6.234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.72%) |Training time=0.45s (20.65%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41 +epoch: 0|step: 992|ppo_ep: 1|act_loss: 0.029205322265625|cri_loss: 0.017303466796875|unsuper_loss: 0.0 +average reward score: 6.21875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.71%) |Training time=0.45s (20.74%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41 +epoch: 0|step: 993|ppo_ep: 1|act_loss: -0.031524658203125|cri_loss: -0.01493072509765625|unsuper_loss: 0.0 +average reward score: 5.78515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.63%) |Training time=0.45s (20.84%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.41 +epoch: 0|step: 994|ppo_ep: 1|act_loss: -0.037261962890625|cri_loss: -0.016632080078125|unsuper_loss: 0.0 +average reward score: 5.89453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.71%) |Training time=0.45s (20.74%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41 +epoch: 0|step: 995|ppo_ep: 1|act_loss: 0.0576171875|cri_loss: 0.0328369140625|unsuper_loss: 0.0 +average reward score: 6.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.40%) |Training time=0.46s (20.98%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41 +epoch: 0|step: 996|ppo_ep: 1|act_loss: 0.0141754150390625|cri_loss: 0.0078125|unsuper_loss: 0.0 +average reward score: 5.375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.63%) |Training time=0.45s (20.84%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41 +epoch: 0|step: 997|ppo_ep: 1|act_loss: 0.04144287109375|cri_loss: 0.0221099853515625|unsuper_loss: 0.0 +average reward score: 6.1640625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.45%) |Training time=0.46s (21.01%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41 +epoch: 0|step: 998|ppo_ep: 1|act_loss: 0.00325775146484375|cri_loss: 0.0024585723876953125|unsuper_loss: 0.0 +average reward score: 5.97265625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.88%) |Training time=0.45s (20.62%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.41 +[2023-04-14 09:24:22,297] [INFO] [logging.py:96:log_dist] [Rank 0] step=1000, skipped=15, lr=[9.372624770937987e-06, 9.372624770937987e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:24:22,315] [INFO] [timer.py:199:stop] epoch=0/micro_step=1000/global_step=1000, RunningAvgSamplesPerSec=108.34636245517726, CurrSamplesPerSec=111.06482200944505, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:24:22,408] [INFO] [logging.py:96:log_dist] [Rank 0] step=1000, skipped=15, lr=[4.856282264734708e-06, 4.856282264734708e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 999|ppo_ep: 1|act_loss: 0.003055572509765625|cri_loss: 0.0018558502197265625|unsuper_loss: 0.0 +average reward score: 6.09375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.69%) |Training time=0.45s (20.79%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1000|ppo_ep: 1|act_loss: -0.008026123046875|cri_loss: -0.003406524658203125|unsuper_loss: 0.0 +average reward score: 5.75390625 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.68%) |Training time=0.44s (19.02%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1001|ppo_ep: 1|act_loss: -0.03692626953125|cri_loss: -0.017974853515625|unsuper_loss: 0.0 +average reward score: 5.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.69%) |Training time=0.43s (19.77%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1002|ppo_ep: 1|act_loss: -0.041015625|cri_loss: -0.0196990966796875|unsuper_loss: 0.0 +average reward score: 5.515625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.52%) |Training time=0.43s (19.95%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1003|ppo_ep: 1|act_loss: 0.01285552978515625|cri_loss: 0.006832122802734375|unsuper_loss: 0.0 +average reward score: 5.49609375 +------------------------------------------------------------------------------------- +|E2E latency=3.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (51.14%) |Training time=0.44s (14.02%) |Others=1.10 (34.84%)|CurSamplesPerSec=10.12 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1004|ppo_ep: 1|act_loss: 0.014739990234375|cri_loss: 0.0085296630859375|unsuper_loss: 0.0 +average reward score: 5.42578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.93%) |Training time=0.44s (20.51%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1005|ppo_ep: 1|act_loss: 0.0325927734375|cri_loss: 0.019317626953125|unsuper_loss: 0.0 +average reward score: 5.9453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.77%) |Training time=0.45s (20.66%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1006|ppo_ep: 1|act_loss: 0.00997161865234375|cri_loss: 0.00556182861328125|unsuper_loss: 0.0 +average reward score: 5.7265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.56%) |Training time=0.45s (20.89%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1007|ppo_ep: 1|act_loss: 0.01336669921875|cri_loss: 0.00836944580078125|unsuper_loss: 0.0 +average reward score: 5.4453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.01%) |Training time=0.44s (20.44%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1008|ppo_ep: 1|act_loss: -0.04754638671875|cri_loss: -0.018310546875|unsuper_loss: 0.0 +average reward score: 5.71875 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.46%) |Training time=0.46s (21.05%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.41 +[2023-04-14 09:24:45,254] [INFO] [logging.py:96:log_dist] [Rank 0] step=1010, skipped=15, lr=[9.366383476574745e-06, 9.366383476574745e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:24:45,272] [INFO] [timer.py:199:stop] epoch=0/micro_step=1010/global_step=1010, RunningAvgSamplesPerSec=108.39448732121316, CurrSamplesPerSec=110.60107949535652, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:24:45,365] [INFO] [logging.py:96:log_dist] [Rank 0] step=1010, skipped=15, lr=[4.853048433458417e-06, 4.853048433458417e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1009|ppo_ep: 1|act_loss: -0.010223388671875|cri_loss: -0.004230499267578125|unsuper_loss: 0.0 +average reward score: 5.6328125 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.75%) |Training time=0.45s (19.93%) |Others=0.10 (4.32%)|CurSamplesPerSec=14.10 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1010|ppo_ep: 1|act_loss: -0.05865478515625|cri_loss: -0.0288543701171875|unsuper_loss: 0.0 +average reward score: 5.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.53%) |Training time=0.45s (20.93%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1011|ppo_ep: 1|act_loss: -0.03656005859375|cri_loss: -0.017578125|unsuper_loss: 0.0 +average reward score: 5.47265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.98%) |Training time=0.44s (20.47%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1012|ppo_ep: 1|act_loss: -0.009002685546875|cri_loss: -0.003559112548828125|unsuper_loss: 0.0 +average reward score: 5.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.94%) |Training time=0.44s (20.52%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1013|ppo_ep: 1|act_loss: 0.029022216796875|cri_loss: 0.0154571533203125|unsuper_loss: 0.0 +average reward score: 5.85546875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.88%) |Training time=0.45s (20.57%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1014|ppo_ep: 1|act_loss: 0.0226593017578125|cri_loss: 0.01175689697265625|unsuper_loss: 0.0 +average reward score: 5.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.72%) |Training time=0.45s (20.73%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1015|ppo_ep: 1|act_loss: -0.0012493133544921875|cri_loss: 0.0007171630859375|unsuper_loss: 0.0 +average reward score: 5.66015625 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.00%) |Training time=0.44s (18.78%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.70 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1016|ppo_ep: 1|act_loss: 0.038787841796875|cri_loss: 0.020965576171875|unsuper_loss: 0.0 +average reward score: 5.43359375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.84%) |Training time=0.43s (19.62%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1017|ppo_ep: 1|act_loss: -0.019256591796875|cri_loss: -0.00925445556640625|unsuper_loss: 0.0 +average reward score: 5.65625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.90%) |Training time=0.44s (20.55%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1018|ppo_ep: 1|act_loss: -0.06585693359375|cri_loss: -0.031982421875|unsuper_loss: 0.0 +average reward score: 6.078125 +------------------------------------------------------------------------------------- +|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.62s (76.81%) |Training time=0.39s (18.33%) |Others=0.10 (4.85%)|CurSamplesPerSec=15.17 |AvgSamplesPerSec=14.41 +[2023-04-14 09:25:07,744] [INFO] [logging.py:96:log_dist] [Rank 0] step=1020, skipped=15, lr=[9.360074867952892e-06, 9.360074867952892e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:25:07,762] [INFO] [timer.py:199:stop] epoch=0/micro_step=1020/global_step=1020, RunningAvgSamplesPerSec=108.46349582136385, CurrSamplesPerSec=110.18052442942728, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:25:07,855] [INFO] [logging.py:96:log_dist] [Rank 0] step=1020, skipped=15, lr=[4.849779724327923e-06, 4.849779724327923e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1019|ppo_ep: 1|act_loss: 0.060546875|cri_loss: 0.03167724609375|unsuper_loss: 0.0 +average reward score: 5.390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.45s (20.88%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1020|ppo_ep: 1|act_loss: 0.036865234375|cri_loss: 0.019134521484375|unsuper_loss: 0.0 +average reward score: 5.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.89%) |Training time=0.45s (20.57%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1021|ppo_ep: 1|act_loss: 0.0103302001953125|cri_loss: 0.00662994384765625|unsuper_loss: 0.0 +average reward score: 5.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.70%) |Training time=0.45s (20.77%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1022|ppo_ep: 1|act_loss: -0.0287017822265625|cri_loss: -0.01360321044921875|unsuper_loss: 0.0 +average reward score: 5.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.06%) |Training time=0.44s (20.39%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1023|ppo_ep: 1|act_loss: 8.0108642578125e-05|cri_loss: 0.00118255615234375|unsuper_loss: 0.0 +average reward score: 4.91015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.97%) |Training time=0.44s (20.47%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1024|ppo_ep: 1|act_loss: -0.00025081634521484375|cri_loss: 0.0006818771362304688|unsuper_loss: 0.0 +average reward score: 5.42578125 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.55%) |Training time=0.45s (20.64%) |Others=0.11 (4.81%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1025|ppo_ep: 1|act_loss: 0.051239013671875|cri_loss: 0.027984619140625|unsuper_loss: 0.0 +average reward score: 5.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.83%) |Training time=0.45s (20.62%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1026|ppo_ep: 1|act_loss: -0.001129150390625|cri_loss: 0.001171112060546875|unsuper_loss: 0.0 +average reward score: 5.23828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.72%) |Training time=0.45s (20.74%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1027|ppo_ep: 1|act_loss: 0.031158447265625|cri_loss: 0.0161895751953125|unsuper_loss: 0.0 +average reward score: 5.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.87%) |Training time=0.45s (20.59%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1028|ppo_ep: 1|act_loss: 0.01092529296875|cri_loss: 0.006145477294921875|unsuper_loss: 0.0 +average reward score: 5.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.57%) |Training time=0.45s (20.90%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41 +[2023-04-14 09:25:29,433] [INFO] [logging.py:96:log_dist] [Rank 0] step=1030, skipped=15, lr=[9.353699038581223e-06, 9.353699038581223e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:25:29,451] [INFO] [timer.py:199:stop] epoch=0/micro_step=1030/global_step=1030, RunningAvgSamplesPerSec=108.50163632858461, CurrSamplesPerSec=113.407363407382, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:25:29,544] [INFO] [logging.py:96:log_dist] [Rank 0] step=1030, skipped=15, lr=[4.84647618579338e-06, 4.84647618579338e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1029|ppo_ep: 1|act_loss: -0.00655364990234375|cri_loss: -0.00279998779296875|unsuper_loss: 0.0 +average reward score: 5.3125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.93%) |Training time=0.44s (20.54%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1030|ppo_ep: 1|act_loss: 0.0004353523254394531|cri_loss: 0.001064300537109375|unsuper_loss: 0.0 +average reward score: 5.28515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.73%) |Training time=0.45s (20.70%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1031|ppo_ep: 1|act_loss: -0.03240966796875|cri_loss: -0.015289306640625|unsuper_loss: 0.0 +average reward score: 5.19140625 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.19%) |Training time=0.43s (18.57%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1032|ppo_ep: 1|act_loss: 0.00957489013671875|cri_loss: 0.005382537841796875|unsuper_loss: 0.0 +average reward score: 5.7421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.78%) |Training time=0.45s (20.63%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1033|ppo_ep: 1|act_loss: 0.00380706787109375|cri_loss: 0.003082275390625|unsuper_loss: 0.0 +average reward score: 5.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.66%) |Training time=0.45s (20.78%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1034|ppo_ep: 1|act_loss: 0.040985107421875|cri_loss: 0.024444580078125|unsuper_loss: 0.0 +average reward score: 5.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.51s |Gather latency=0.00s (0.00%) |Generate time=1.62s (64.50%) |Training time=0.45s (17.83%) |Others=0.44 (17.66%)|CurSamplesPerSec=12.76 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1035|ppo_ep: 1|act_loss: 0.0794677734375|cri_loss: 0.04132080078125|unsuper_loss: 0.0 +average reward score: 5.3125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.74%) |Training time=0.45s (20.69%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1036|ppo_ep: 1|act_loss: 0.020751953125|cri_loss: 0.0122833251953125|unsuper_loss: 0.0 +average reward score: 5.078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.38%) |Training time=0.43s (20.07%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1037|ppo_ep: 1|act_loss: -0.04425048828125|cri_loss: -0.0213623046875|unsuper_loss: 0.0 +average reward score: 5.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.59%) |Training time=0.46s (20.91%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1038|ppo_ep: 1|act_loss: -0.0184478759765625|cri_loss: -0.0088348388671875|unsuper_loss: 0.0 +average reward score: 5.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.96%) |Training time=0.45s (19.69%) |Others=0.10 (4.35%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.41 +[2023-04-14 09:25:51,701] [INFO] [logging.py:96:log_dist] [Rank 0] step=1040, skipped=15, lr=[9.347256082964908e-06, 9.347256082964908e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:25:51,719] [INFO] [timer.py:199:stop] epoch=0/micro_step=1040/global_step=1040, RunningAvgSamplesPerSec=108.5441294190187, CurrSamplesPerSec=113.64030904049277, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:25:51,812] [INFO] [logging.py:96:log_dist] [Rank 0] step=1040, skipped=15, lr=[4.843137866821196e-06, 4.843137866821196e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1039|ppo_ep: 1|act_loss: -0.0318603515625|cri_loss: -0.015228271484375|unsuper_loss: 0.0 +average reward score: 6.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.90%) |Training time=0.44s (20.58%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1040|ppo_ep: 1|act_loss: -0.003589630126953125|cri_loss: -0.0008382797241210938|unsuper_loss: 0.0 +average reward score: 5.859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.07%) |Training time=0.44s (20.36%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1041|ppo_ep: 1|act_loss: 0.020965576171875|cri_loss: 0.0125579833984375|unsuper_loss: 0.0 +average reward score: 6.23828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.95%) |Training time=0.44s (20.49%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1042|ppo_ep: 1|act_loss: 0.06640625|cri_loss: 0.03594970703125|unsuper_loss: 0.0 +average reward score: 6.8046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.68%) |Training time=0.43s (19.77%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1043|ppo_ep: 1|act_loss: 0.0677490234375|cri_loss: 0.035980224609375|unsuper_loss: 0.0 +average reward score: 6.69140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.70%) |Training time=0.43s (19.74%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1044|ppo_ep: 1|act_loss: 0.028533935546875|cri_loss: 0.015899658203125|unsuper_loss: 0.0 +average reward score: 6.125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.34%) |Training time=0.43s (20.09%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1045|ppo_ep: 1|act_loss: -0.02740478515625|cri_loss: -0.011199951171875|unsuper_loss: 0.0 +average reward score: 6.6171875 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.11%) |Training time=0.43s (18.61%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1046|ppo_ep: 1|act_loss: -0.00704193115234375|cri_loss: -0.0029010772705078125|unsuper_loss: 0.0 +average reward score: 6.53515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.64%) |Training time=0.43s (19.75%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1047|ppo_ep: 1|act_loss: -0.041900634765625|cri_loss: -0.0191650390625|unsuper_loss: 0.0 +average reward score: 5.9765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.03%) |Training time=0.42s (19.39%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1048|ppo_ep: 1|act_loss: -0.02435302734375|cri_loss: -0.01096343994140625|unsuper_loss: 0.0 +average reward score: 6.5234375 +------------------------------------------------------------------------------------- +|E2E latency=3.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (51.33%) |Training time=0.42s (13.24%) |Others=1.13 (35.43%)|CurSamplesPerSec=10.03 |AvgSamplesPerSec=14.41 +[2023-04-14 09:26:14,502] [INFO] [logging.py:96:log_dist] [Rank 0] step=1050, skipped=15, lr=[9.340746096604097e-06, 9.340746096604097e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:26:14,520] [INFO] [timer.py:199:stop] epoch=0/micro_step=1050/global_step=1050, RunningAvgSamplesPerSec=108.63875987318835, CurrSamplesPerSec=119.30008719666715, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:26:14,613] [INFO] [logging.py:96:log_dist] [Rank 0] step=1050, skipped=15, lr=[4.839764816893315e-06, 4.839764816893315e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1049|ppo_ep: 1|act_loss: -0.0160675048828125|cri_loss: -0.0062408447265625|unsuper_loss: 0.0 +average reward score: 6.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.49%) |Training time=0.43s (19.96%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1050|ppo_ep: 1|act_loss: 0.043060302734375|cri_loss: 0.02301025390625|unsuper_loss: 0.0 +average reward score: 6.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.75%) |Training time=0.43s (19.67%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1051|ppo_ep: 1|act_loss: 0.0732421875|cri_loss: 0.038848876953125|unsuper_loss: 0.0 +average reward score: 6.47265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.35%) |Training time=0.42s (19.48%) |Others=0.11 (5.17%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1052|ppo_ep: 1|act_loss: 0.10516357421875|cri_loss: 0.055908203125|unsuper_loss: 0.0 +average reward score: 6.5546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.51%) |Training time=0.43s (19.92%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1053|ppo_ep: 1|act_loss: 0.01715087890625|cri_loss: 0.0106201171875|unsuper_loss: 0.0 +average reward score: 7.18359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.70%) |Training time=0.43s (19.74%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1054|ppo_ep: 1|act_loss: -0.00988006591796875|cri_loss: -0.0038089752197265625|unsuper_loss: 0.0 +average reward score: 6.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.75%) |Training time=0.43s (19.70%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1055|ppo_ep: 1|act_loss: -0.012725830078125|cri_loss: -0.0054473876953125|unsuper_loss: 0.0 +average reward score: 5.83984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.56%) |Training time=0.43s (19.87%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1056|ppo_ep: 1|act_loss: -0.029205322265625|cri_loss: -0.0108184814453125|unsuper_loss: 0.0 +average reward score: 6.203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.64%) |Training time=0.43s (19.80%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1057|ppo_ep: 1|act_loss: -0.03485107421875|cri_loss: -0.0145416259765625|unsuper_loss: 0.0 +average reward score: 5.52734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.77%) |Training time=0.43s (19.69%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1058|ppo_ep: 1|act_loss: -0.027923583984375|cri_loss: -0.0127105712890625|unsuper_loss: 0.0 +average reward score: 6.32421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.77%) |Training time=0.43s (19.68%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41 +[2023-04-14 09:26:36,137] [INFO] [logging.py:96:log_dist] [Rank 0] step=1060, skipped=15, lr=[9.334169175992489e-06, 9.334169175992489e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:26:36,479] [INFO] [timer.py:199:stop] epoch=0/micro_step=1060/global_step=1060, RunningAvgSamplesPerSec=108.62859228172402, CurrSamplesPerSec=53.83196378092723, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:26:36,653] [INFO] [logging.py:96:log_dist] [Rank 0] step=1060, skipped=15, lr=[4.836357086006471e-06, 4.836357086006471e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1059|ppo_ep: 1|act_loss: 0.0212249755859375|cri_loss: 0.01244354248046875|unsuper_loss: 0.0 +average reward score: 6.14453125 +------------------------------------------------------------------------------------- +|E2E latency=2.58s |Gather latency=0.00s (0.00%) |Generate time=1.63s (63.28%) |Training time=0.76s (29.42%) |Others=0.19 (7.30%)|CurSamplesPerSec=12.41 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1060|ppo_ep: 1|act_loss: 0.059539794921875|cri_loss: 0.03155517578125|unsuper_loss: 0.0 +average reward score: 5.90234375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.67s (75.82%) |Training time=0.42s (19.23%) |Others=0.11 (4.94%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1061|ppo_ep: 1|act_loss: 0.025146484375|cri_loss: 0.0146636962890625|unsuper_loss: 0.0 +average reward score: 5.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.44%) |Training time=0.43s (19.97%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1062|ppo_ep: 1|act_loss: 0.04669189453125|cri_loss: 0.0294342041015625|unsuper_loss: 0.0 +average reward score: 5.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.02%) |Training time=0.44s (20.41%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1063|ppo_ep: 1|act_loss: -0.025238037109375|cri_loss: -0.00970458984375|unsuper_loss: 0.0 +average reward score: 5.28515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.94%) |Training time=0.44s (20.52%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1064|ppo_ep: 1|act_loss: -0.0404052734375|cri_loss: -0.018768310546875|unsuper_loss: 0.0 +average reward score: 5.91015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.58%) |Training time=0.43s (19.87%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1065|ppo_ep: 1|act_loss: -0.181884765625|cri_loss: -0.07672119140625|unsuper_loss: 0.0 +average reward score: 5.13671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.63%) |Training time=0.43s (19.79%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1066|ppo_ep: 1|act_loss: 0.025543212890625|cri_loss: 0.014892578125|unsuper_loss: 0.0 +average reward score: 5.42578125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.36%) |Training time=0.44s (20.13%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1067|ppo_ep: 1|act_loss: 0.03558349609375|cri_loss: 0.0216064453125|unsuper_loss: 0.0 +average reward score: 5.6171875 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.73s (76.62%) |Training time=0.43s (19.02%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.15 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1068|ppo_ep: 1|act_loss: 0.0838623046875|cri_loss: 0.0458984375|unsuper_loss: 0.0 +average reward score: 4.89453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.66%) |Training time=0.43s (19.77%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41 +[2023-04-14 09:26:58,334] [INFO] [logging.py:96:log_dist] [Rank 0] step=1070, skipped=15, lr=[9.327525418615915e-06, 9.327525418615915e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:26:58,352] [INFO] [timer.py:199:stop] epoch=0/micro_step=1070/global_step=1070, RunningAvgSamplesPerSec=108.71501038930042, CurrSamplesPerSec=122.18717841341514, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:26:58,445] [INFO] [logging.py:96:log_dist] [Rank 0] step=1070, skipped=15, lr=[4.8329147246714595e-06, 4.8329147246714595e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1069|ppo_ep: 1|act_loss: 0.06964111328125|cri_loss: 0.0382080078125|unsuper_loss: 0.0 +average reward score: 5.17578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.82%) |Training time=0.42s (19.64%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1070|ppo_ep: 1|act_loss: -0.0236053466796875|cri_loss: -0.008270263671875|unsuper_loss: 0.0 +average reward score: 5.52734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.72%) |Training time=0.43s (19.72%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1071|ppo_ep: 1|act_loss: 0.009796142578125|cri_loss: 0.010284423828125|unsuper_loss: 0.0 +average reward score: 5.6015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.72%) |Training time=0.42s (19.70%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1072|ppo_ep: 1|act_loss: 0.021942138671875|cri_loss: 0.013916015625|unsuper_loss: 0.0 +average reward score: 5.07421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.89%) |Training time=0.44s (20.56%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1073|ppo_ep: 1|act_loss: 0.016815185546875|cri_loss: 0.0126800537109375|unsuper_loss: 0.0 +average reward score: 5.17578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.25%) |Training time=0.44s (20.19%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1074|ppo_ep: 1|act_loss: 0.0816650390625|cri_loss: 0.04913330078125|unsuper_loss: 0.0 +average reward score: 4.76171875 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.67s (74.00%) |Training time=0.48s (21.30%) |Others=0.11 (4.70%)|CurSamplesPerSec=14.19 |AvgSamplesPerSec=14.41 +[2023-04-14 09:27:11,505] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 1075|ppo_ep: 1|act_loss: 0.052154541015625|cri_loss: 0.035186767578125|unsuper_loss: 0.0 +average reward score: 4.91796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.31%) |Training time=0.44s (20.20%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1076|ppo_ep: 1|act_loss: -0.02850341796875|cri_loss: -0.00995635986328125|unsuper_loss: 0.0 +average reward score: 4.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.93%) |Training time=0.45s (20.55%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1077|ppo_ep: 1|act_loss: 0.0109100341796875|cri_loss: 0.01220703125|unsuper_loss: 0.0 +average reward score: 4.75 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.29%) |Training time=0.43s (20.15%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.41 +[2023-04-14 09:27:17,989] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384 +epoch: 0|step: 1078|ppo_ep: 1|act_loss: -0.045440673828125|cri_loss: 0.000762939453125|unsuper_loss: 0.0 +average reward score: 4.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.52%) |Training time=0.44s (20.28%) |Others=0.09 (4.20%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.41 +[2023-04-14 09:27:20,053] [INFO] [logging.py:96:log_dist] [Rank 0] step=1080, skipped=15, lr=[9.320814922950886e-06, 9.320814922950886e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:27:20,071] [INFO] [timer.py:199:stop] epoch=0/micro_step=1080/global_step=1080, RunningAvgSamplesPerSec=108.79082765447629, CurrSamplesPerSec=116.2762955904011, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:27:20,164] [INFO] [logging.py:96:log_dist] [Rank 0] step=1080, skipped=17, lr=[4.830135935951734e-06, 4.830135935951734e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1079|ppo_ep: 1|act_loss: 0.034088134765625|cri_loss: 0.0189666748046875|unsuper_loss: 0.0 +average reward score: 4.1953125 +------------------------------------------------------------------------------------- +|E2E latency=2.72s |Gather latency=0.00s (0.00%) |Generate time=1.64s (60.37%) |Training time=0.44s (16.12%) |Others=0.64 (23.51%)|CurSamplesPerSec=11.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1080|ppo_ep: 1|act_loss: 0.00751495361328125|cri_loss: 0.01019287109375|unsuper_loss: 0.0 +average reward score: 4.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.20%) |Training time=0.44s (20.22%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1081|ppo_ep: 1|act_loss: -0.002838134765625|cri_loss: 0.00644683837890625|unsuper_loss: 0.0 +average reward score: 4.43359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.08%) |Training time=0.44s (20.37%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1082|ppo_ep: 1|act_loss: 0.0215911865234375|cri_loss: 0.01245880126953125|unsuper_loss: 0.0 +average reward score: 3.99609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.76%) |Training time=0.45s (20.71%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1083|ppo_ep: 1|act_loss: 0.0736083984375|cri_loss: 0.04473876953125|unsuper_loss: 0.0 +average reward score: 3.4453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.03%) |Training time=0.44s (20.42%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1084|ppo_ep: 1|act_loss: 0.0174407958984375|cri_loss: 0.01013946533203125|unsuper_loss: 0.0 +average reward score: 4.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.98%) |Training time=0.44s (20.44%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1085|ppo_ep: 1|act_loss: 0.0565185546875|cri_loss: 0.04034423828125|unsuper_loss: 0.0 +average reward score: 3.533203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.75%) |Training time=0.45s (20.70%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1086|ppo_ep: 1|act_loss: -0.1109619140625|cri_loss: -0.04583740234375|unsuper_loss: 0.0 +average reward score: 3.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.62s (68.75%) |Training time=0.44s (18.86%) |Others=0.29 (12.39%)|CurSamplesPerSec=13.57 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1087|ppo_ep: 1|act_loss: 0.01435089111328125|cri_loss: 0.008758544921875|unsuper_loss: 0.0 +average reward score: 3.759765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.85%) |Training time=0.45s (20.61%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1088|ppo_ep: 1|act_loss: 0.047698974609375|cri_loss: 0.025726318359375|unsuper_loss: 0.0 +average reward score: 4.625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.96%) |Training time=0.44s (20.49%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41 +[2023-04-14 09:27:42,603] [INFO] [logging.py:96:log_dist] [Rank 0] step=1090, skipped=15, lr=[9.314037788463137e-06, 9.314037788463137e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:27:42,621] [INFO] [timer.py:199:stop] epoch=0/micro_step=1090/global_step=1090, RunningAvgSamplesPerSec=108.83461521557098, CurrSamplesPerSec=114.59602382121284, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:27:42,714] [INFO] [logging.py:96:log_dist] [Rank 0] step=1090, skipped=17, lr=[4.826631368735207e-06, 4.826631368735207e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1089|ppo_ep: 1|act_loss: -0.01056671142578125|cri_loss: -0.00135040283203125|unsuper_loss: 0.0 +average reward score: 4.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.83%) |Training time=0.44s (18.96%) |Others=0.10 (4.21%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1090|ppo_ep: 1|act_loss: 0.05499267578125|cri_loss: 0.038818359375|unsuper_loss: 0.0 +average reward score: 4.03125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.65%) |Training time=0.45s (20.84%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1091|ppo_ep: 1|act_loss: 0.348388671875|cri_loss: 0.212890625|unsuper_loss: 0.0 +average reward score: 3.84765625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.17%) |Training time=0.44s (20.31%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1092|ppo_ep: 1|act_loss: 0.2303466796875|cri_loss: 0.137939453125|unsuper_loss: 0.0 +average reward score: 3.955078125 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.25%) |Training time=0.44s (20.05%) |Others=0.15 (6.70%)|CurSamplesPerSec=14.44 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1093|ppo_ep: 1|act_loss: 0.0892333984375|cri_loss: 0.049407958984375|unsuper_loss: 0.0 +average reward score: 3.763671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.05%) |Training time=0.44s (20.41%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1094|ppo_ep: 1|act_loss: 0.0048675537109375|cri_loss: 0.007244110107421875|unsuper_loss: 0.0 +average reward score: 3.6640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.00%) |Training time=0.44s (20.46%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1095|ppo_ep: 1|act_loss: -0.173828125|cri_loss: -0.07159423828125|unsuper_loss: 0.0 +average reward score: 4.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.98%) |Training time=0.44s (20.48%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1096|ppo_ep: 1|act_loss: -0.06512451171875|cri_loss: -0.02496337890625|unsuper_loss: 0.0 +average reward score: 4.2734375 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.74s (76.39%) |Training time=0.44s (19.29%) |Others=0.10 (4.33%)|CurSamplesPerSec=14.03 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1097|ppo_ep: 1|act_loss: -0.01050567626953125|cri_loss: 0.0022735595703125|unsuper_loss: 0.0 +average reward score: 4.17578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.37%) |Training time=0.43s (20.08%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1098|ppo_ep: 1|act_loss: 0.01104736328125|cri_loss: 0.01215362548828125|unsuper_loss: 0.0 +average reward score: 4.64453125 +------------------------------------------------------------------------------------- +|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.62s (76.93%) |Training time=0.39s (18.41%) |Others=0.10 (4.66%)|CurSamplesPerSec=15.16 |AvgSamplesPerSec=14.41 +[2023-04-14 09:28:05,186] [INFO] [logging.py:96:log_dist] [Rank 0] step=1100, skipped=15, lr=[9.307194115606148e-06, 9.307194115606148e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:28:05,204] [INFO] [timer.py:199:stop] epoch=0/micro_step=1100/global_step=1100, RunningAvgSamplesPerSec=108.9002611488009, CurrSamplesPerSec=115.39545924679588, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:28:05,296] [INFO] [logging.py:96:log_dist] [Rank 0] step=1100, skipped=17, lr=[4.823092315229102e-06, 4.823092315229102e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1099|ppo_ep: 1|act_loss: -0.0202178955078125|cri_loss: -0.003326416015625|unsuper_loss: 0.0 +average reward score: 3.9765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.13%) |Training time=0.44s (20.34%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1100|ppo_ep: 1|act_loss: -0.01149749755859375|cri_loss: -0.00286865234375|unsuper_loss: 0.0 +average reward score: 4.5859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.96%) |Training time=0.44s (20.50%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1101|ppo_ep: 1|act_loss: -0.02362060546875|cri_loss: -0.00521087646484375|unsuper_loss: 0.0 +average reward score: 4.79296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.05%) |Training time=0.44s (20.40%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1102|ppo_ep: 1|act_loss: 0.00652313232421875|cri_loss: 0.00743865966796875|unsuper_loss: 0.0 +average reward score: 4.76171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.87%) |Training time=0.45s (20.58%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1103|ppo_ep: 1|act_loss: 0.023468017578125|cri_loss: 0.01531982421875|unsuper_loss: 0.0 +average reward score: 5.0234375 +------------------------------------------------------------------------------------- +|E2E latency=3.39s |Gather latency=0.00s (0.00%) |Generate time=1.78s (52.71%) |Training time=0.44s (12.95%) |Others=1.16 (34.34%)|CurSamplesPerSec=9.45 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1104|ppo_ep: 1|act_loss: 0.036376953125|cri_loss: 0.01971435546875|unsuper_loss: 0.0 +average reward score: 4.43359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.45%) |Training time=0.43s (19.78%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1105|ppo_ep: 1|act_loss: 0.07080078125|cri_loss: 0.03900146484375|unsuper_loss: 0.0 +average reward score: 4.6796875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.01%) |Training time=0.42s (19.47%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1106|ppo_ep: 1|act_loss: 0.011138916015625|cri_loss: 0.0069122314453125|unsuper_loss: 0.0 +average reward score: 4.63671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.34%) |Training time=0.44s (20.12%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1107|ppo_ep: 1|act_loss: 0.0400390625|cri_loss: 0.022705078125|unsuper_loss: 0.0 +average reward score: 4.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.37%) |Training time=0.43s (20.06%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1108|ppo_ep: 1|act_loss: -0.109130859375|cri_loss: -0.045318603515625|unsuper_loss: 0.0 +average reward score: 4.1796875 +------------------------------------------------------------------------------------- +|E2E latency=2.87s |Gather latency=0.00s (0.00%) |Generate time=1.63s (56.75%) |Training time=0.44s (15.15%) |Others=0.81 (28.10%)|CurSamplesPerSec=11.13 |AvgSamplesPerSec=14.41 +[2023-04-14 09:28:28,786] [INFO] [logging.py:96:log_dist] [Rank 0] step=1110, skipped=15, lr=[9.300284005819661e-06, 9.300284005819661e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:28:28,804] [INFO] [timer.py:199:stop] epoch=0/micro_step=1110/global_step=1110, RunningAvgSamplesPerSec=108.96558639565792, CurrSamplesPerSec=116.06514008993427, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:28:28,897] [INFO] [logging.py:96:log_dist] [Rank 0] step=1110, skipped=17, lr=[4.8195188278907305e-06, 4.8195188278907305e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1109|ppo_ep: 1|act_loss: -0.0684814453125|cri_loss: -0.0257110595703125|unsuper_loss: 0.0 +average reward score: 4.12109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.19%) |Training time=0.44s (20.28%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1110|ppo_ep: 1|act_loss: 0.0562744140625|cri_loss: 0.031768798828125|unsuper_loss: 0.0 +average reward score: 5.16796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.98%) |Training time=0.44s (20.47%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1111|ppo_ep: 1|act_loss: -0.0019283294677734375|cri_loss: 0.0|unsuper_loss: 0.0 +average reward score: 4.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.26%) |Training time=0.44s (20.20%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1112|ppo_ep: 1|act_loss: 0.04559326171875|cri_loss: 0.0241241455078125|unsuper_loss: 0.0 +average reward score: 5.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.81%) |Training time=0.43s (19.66%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1113|ppo_ep: 1|act_loss: 0.0626220703125|cri_loss: 0.03411865234375|unsuper_loss: 0.0 +average reward score: 4.84765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.02%) |Training time=0.44s (20.44%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1114|ppo_ep: 1|act_loss: 0.01316070556640625|cri_loss: 0.00732421875|unsuper_loss: 0.0 +average reward score: 4.48828125 +------------------------------------------------------------------------------------- +|E2E latency=2.56s |Gather latency=0.00s (0.00%) |Generate time=1.62s (63.50%) |Training time=0.44s (17.30%) |Others=0.49 (19.20%)|CurSamplesPerSec=12.51 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1115|ppo_ep: 1|act_loss: -0.030792236328125|cri_loss: -0.014434814453125|unsuper_loss: 0.0 +average reward score: 3.97265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.05%) |Training time=0.44s (20.38%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1116|ppo_ep: 1|act_loss: 0.01446533203125|cri_loss: 0.0109100341796875|unsuper_loss: 0.0 +average reward score: 4.43359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.04%) |Training time=0.44s (20.42%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1117|ppo_ep: 1|act_loss: -0.0129547119140625|cri_loss: -0.00620269775390625|unsuper_loss: 0.0 +average reward score: 5.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.27%) |Training time=0.43s (18.50%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.77 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1118|ppo_ep: 1|act_loss: 0.0660400390625|cri_loss: 0.03564453125|unsuper_loss: 0.0 +average reward score: 5.03125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.07%) |Training time=0.44s (20.37%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41 +[2023-04-14 09:28:50,989] [INFO] [logging.py:96:log_dist] [Rank 0] step=1120, skipped=15, lr=[9.293307561528172e-06, 9.293307561528172e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:28:51,007] [INFO] [timer.py:199:stop] epoch=0/micro_step=1120/global_step=1120, RunningAvgSamplesPerSec=109.03085977626249, CurrSamplesPerSec=126.14459975996218, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:28:51,103] [INFO] [logging.py:96:log_dist] [Rank 0] step=1120, skipped=17, lr=[4.815910959687795e-06, 4.815910959687795e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1119|ppo_ep: 1|act_loss: 0.044677734375|cri_loss: 0.0248870849609375|unsuper_loss: 0.0 +average reward score: 4.21875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.08%) |Training time=0.42s (19.25%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1120|ppo_ep: 1|act_loss: 0.0006799697875976562|cri_loss: 0.001979827880859375|unsuper_loss: 0.0 +average reward score: 4.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.89%) |Training time=0.42s (19.56%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1121|ppo_ep: 1|act_loss: 0.0147705078125|cri_loss: 0.00927734375|unsuper_loss: 0.0 +average reward score: 5.06640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.70%) |Training time=0.45s (20.75%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1122|ppo_ep: 1|act_loss: 0.0072174072265625|cri_loss: 0.00495147705078125|unsuper_loss: 0.0 +average reward score: 4.875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.66%) |Training time=0.45s (20.79%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1123|ppo_ep: 1|act_loss: 0.047943115234375|cri_loss: 0.0294342041015625|unsuper_loss: 0.0 +average reward score: 4.54296875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.74%) |Training time=0.45s (20.43%) |Others=0.11 (4.83%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1124|ppo_ep: 1|act_loss: 0.00019073486328125|cri_loss: 0.0015573501586914062|unsuper_loss: 0.0 +average reward score: 4.015625 +------------------------------------------------------------------------------------- +|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.65s (71.87%) |Training time=0.45s (19.42%) |Others=0.20 (8.71%)|CurSamplesPerSec=13.91 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1125|ppo_ep: 1|act_loss: 0.04345703125|cri_loss: 0.0232696533203125|unsuper_loss: 0.0 +average reward score: 4.109375 +------------------------------------------------------------------------------------- +|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.62s (76.99%) |Training time=0.39s (18.33%) |Others=0.10 (4.68%)|CurSamplesPerSec=15.18 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1126|ppo_ep: 1|act_loss: -0.046051025390625|cri_loss: -0.02191162109375|unsuper_loss: 0.0 +average reward score: 4.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.09%) |Training time=0.44s (20.34%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1127|ppo_ep: 1|act_loss: -0.0043487548828125|cri_loss: -0.0015249252319335938|unsuper_loss: 0.0 +average reward score: 5.03125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.11%) |Training time=0.44s (20.32%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1128|ppo_ep: 1|act_loss: 0.04193115234375|cri_loss: 0.023284912109375|unsuper_loss: 0.0 +average reward score: 4.8125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.19%) |Training time=0.44s (20.25%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +[2023-04-14 09:29:12,723] [INFO] [logging.py:96:log_dist] [Rank 0] step=1130, skipped=15, lr=[9.286264886139418e-06, 9.286264886139418e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:29:12,741] [INFO] [timer.py:199:stop] epoch=0/micro_step=1130/global_step=1130, RunningAvgSamplesPerSec=109.09936493188563, CurrSamplesPerSec=115.27771474509555, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:29:12,834] [INFO] [logging.py:96:log_dist] [Rank 0] step=1130, skipped=17, lr=[4.812268764097606e-06, 4.812268764097606e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1129|ppo_ep: 1|act_loss: 0.0244140625|cri_loss: 0.0134124755859375|unsuper_loss: 0.0 +average reward score: 4.63671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.05%) |Training time=0.44s (20.41%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1130|ppo_ep: 1|act_loss: 0.0638427734375|cri_loss: 0.0350341796875|unsuper_loss: 0.0 +average reward score: 4.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.02%) |Training time=0.44s (20.41%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1131|ppo_ep: 1|act_loss: 0.012664794921875|cri_loss: 0.00897216796875|unsuper_loss: 0.0 +average reward score: 4.8203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.42%) |Training time=0.43s (20.02%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1132|ppo_ep: 1|act_loss: -0.006801605224609375|cri_loss: -0.00240325927734375|unsuper_loss: 0.0 +average reward score: 3.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.80s (76.72%) |Training time=0.44s (18.77%) |Others=0.11 (4.51%)|CurSamplesPerSec=13.64 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1133|ppo_ep: 1|act_loss: -0.061920166015625|cri_loss: -0.02935791015625|unsuper_loss: 0.0 +average reward score: 4.25 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.95%) |Training time=0.44s (20.39%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1134|ppo_ep: 1|act_loss: -0.033355712890625|cri_loss: -0.0149993896484375|unsuper_loss: 0.0 +average reward score: 4.8828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.02%) |Training time=0.42s (19.42%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1135|ppo_ep: 1|act_loss: -0.023468017578125|cri_loss: -0.01123046875|unsuper_loss: 0.0 +average reward score: 4.31640625 +------------------------------------------------------------------------------------- +|E2E latency=2.51s |Gather latency=0.00s (0.00%) |Generate time=1.65s (65.98%) |Training time=0.40s (16.15%) |Others=0.45 (17.87%)|CurSamplesPerSec=12.77 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1136|ppo_ep: 1|act_loss: -0.0093231201171875|cri_loss: -0.004039764404296875|unsuper_loss: 0.0 +average reward score: 5.02734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.92%) |Training time=0.44s (20.52%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1137|ppo_ep: 1|act_loss: 0.023101806640625|cri_loss: 0.0122833251953125|unsuper_loss: 0.0 +average reward score: 4.63671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.99%) |Training time=0.44s (20.45%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1138|ppo_ep: 1|act_loss: 0.02978515625|cri_loss: 0.0157318115234375|unsuper_loss: 0.0 +average reward score: 4.2109375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.90%) |Training time=0.45s (20.55%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41 +[2023-04-14 09:29:34,886] [INFO] [logging.py:96:log_dist] [Rank 0] step=1140, skipped=15, lr=[9.279156084042835e-06, 9.279156084042835e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:29:34,904] [INFO] [timer.py:199:stop] epoch=0/micro_step=1140/global_step=1140, RunningAvgSamplesPerSec=109.17206823492205, CurrSamplesPerSec=121.0444594753028, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:29:34,997] [INFO] [logging.py:96:log_dist] [Rank 0] step=1140, skipped=17, lr=[4.80859229510629e-06, 4.80859229510629e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1139|ppo_ep: 1|act_loss: 0.02178955078125|cri_loss: 0.011505126953125|unsuper_loss: 0.0 +average reward score: 3.923828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.76%) |Training time=0.43s (19.72%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1140|ppo_ep: 1|act_loss: -0.01971435546875|cri_loss: -0.00472259521484375|unsuper_loss: 0.0 +average reward score: 4.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.19%) |Training time=0.44s (20.26%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1141|ppo_ep: 1|act_loss: -0.04974365234375|cri_loss: -0.022308349609375|unsuper_loss: 0.0 +average reward score: 4.73828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.89%) |Training time=0.45s (20.58%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1142|ppo_ep: 1|act_loss: 0.0038242340087890625|cri_loss: 0.003997802734375|unsuper_loss: 0.0 +average reward score: 4.66796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.12%) |Training time=0.44s (20.32%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1143|ppo_ep: 1|act_loss: 0.006988525390625|cri_loss: 0.00522613525390625|unsuper_loss: 0.0 +average reward score: 4.69140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.05%) |Training time=0.44s (20.41%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1144|ppo_ep: 1|act_loss: 0.0173187255859375|cri_loss: 0.0114898681640625|unsuper_loss: 0.0 +average reward score: 4.8515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.82%) |Training time=0.45s (20.63%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1145|ppo_ep: 1|act_loss: 0.0137481689453125|cri_loss: 0.0083465576171875|unsuper_loss: 0.0 +average reward score: 5.01171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.84%) |Training time=0.45s (20.61%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1146|ppo_ep: 1|act_loss: 0.04644775390625|cri_loss: 0.025482177734375|unsuper_loss: 0.0 +average reward score: 5.1015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.91%) |Training time=0.44s (20.54%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1147|ppo_ep: 1|act_loss: -0.000690460205078125|cri_loss: 0.0012369155883789062|unsuper_loss: 0.0 +average reward score: 4.29296875 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.80s (76.88%) |Training time=0.44s (18.92%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.66 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1148|ppo_ep: 1|act_loss: 0.0494384765625|cri_loss: 0.0260009765625|unsuper_loss: 0.0 +average reward score: 4.12109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.21%) |Training time=0.44s (20.23%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.41 +[2023-04-14 09:29:56,703] [INFO] [logging.py:96:log_dist] [Rank 0] step=1150, skipped=15, lr=[9.27198126060802e-06, 9.27198126060802e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:29:56,721] [INFO] [timer.py:199:stop] epoch=0/micro_step=1150/global_step=1150, RunningAvgSamplesPerSec=109.21557534240085, CurrSamplesPerSec=115.28613124391221, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:29:56,814] [INFO] [logging.py:96:log_dist] [Rank 0] step=1150, skipped=17, lr=[4.804881607207983e-06, 4.804881607207983e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1149|ppo_ep: 1|act_loss: 0.0153045654296875|cri_loss: 0.01132965087890625|unsuper_loss: 0.0 +average reward score: 5.625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.15%) |Training time=0.44s (20.32%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1150|ppo_ep: 1|act_loss: -0.0416259765625|cri_loss: -0.018768310546875|unsuper_loss: 0.0 +average reward score: 4.6796875 +------------------------------------------------------------------------------------- +|E2E latency=2.93s |Gather latency=0.00s (0.00%) |Generate time=1.64s (55.98%) |Training time=0.42s (14.40%) |Others=0.87 (29.62%)|CurSamplesPerSec=10.94 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1151|ppo_ep: 1|act_loss: -0.05712890625|cri_loss: -0.0264434814453125|unsuper_loss: 0.0 +average reward score: 3.96875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.46%) |Training time=0.43s (19.94%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1152|ppo_ep: 1|act_loss: -0.078369140625|cri_loss: -0.036956787109375|unsuper_loss: 0.0 +average reward score: 4.48046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.08%) |Training time=0.44s (20.37%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1153|ppo_ep: 1|act_loss: -0.02105712890625|cri_loss: -0.0081329345703125|unsuper_loss: 0.0 +average reward score: 4.91015625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.01%) |Training time=0.45s (20.49%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1154|ppo_ep: 1|act_loss: 0.0235443115234375|cri_loss: 0.01214599609375|unsuper_loss: 0.0 +average reward score: 4.66796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.40%) |Training time=0.43s (20.04%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1155|ppo_ep: 1|act_loss: 0.0322265625|cri_loss: 0.0175933837890625|unsuper_loss: 0.0 +average reward score: 4.71484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.46%) |Training time=0.43s (19.98%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1156|ppo_ep: 1|act_loss: 0.02813720703125|cri_loss: 0.01535797119140625|unsuper_loss: 0.0 +average reward score: 3.984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.18%) |Training time=0.44s (20.24%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1157|ppo_ep: 1|act_loss: 0.008514404296875|cri_loss: 0.005710601806640625|unsuper_loss: 0.0 +average reward score: 4.1953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.39%) |Training time=0.43s (20.02%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1158|ppo_ep: 1|act_loss: 0.0053863525390625|cri_loss: 0.0035552978515625|unsuper_loss: 0.0 +average reward score: 4.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.96%) |Training time=0.43s (19.94%) |Others=0.11 (5.10%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41 +[2023-04-14 09:30:19,101] [INFO] [logging.py:96:log_dist] [Rank 0] step=1160, skipped=15, lr=[9.264740522183165e-06, 9.264740522183165e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:30:19,119] [INFO] [timer.py:199:stop] epoch=0/micro_step=1160/global_step=1160, RunningAvgSamplesPerSec=109.29043728671321, CurrSamplesPerSec=118.15315274075587, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:30:19,212] [INFO] [logging.py:96:log_dist] [Rank 0] step=1160, skipped=17, lr=[4.801136755404034e-06, 4.801136755404034e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1159|ppo_ep: 1|act_loss: -0.0248260498046875|cri_loss: -0.01092529296875|unsuper_loss: 0.0 +average reward score: 4.16796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.39%) |Training time=0.43s (20.07%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1160|ppo_ep: 1|act_loss: -0.0252685546875|cri_loss: -0.01198577880859375|unsuper_loss: 0.0 +average reward score: 4.75 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.42%) |Training time=0.44s (20.06%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1161|ppo_ep: 1|act_loss: -0.025543212890625|cri_loss: -0.01092529296875|unsuper_loss: 0.0 +average reward score: 4.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.79s (77.13%) |Training time=0.43s (18.60%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.80 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1162|ppo_ep: 1|act_loss: -0.08428955078125|cri_loss: -0.0318603515625|unsuper_loss: 0.0 +average reward score: 5.06640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.12%) |Training time=0.44s (20.33%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1163|ppo_ep: 1|act_loss: 0.057861328125|cri_loss: 0.0304412841796875|unsuper_loss: 0.0 +average reward score: 5.62890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.46%) |Training time=0.43s (19.98%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1164|ppo_ep: 1|act_loss: 0.05133056640625|cri_loss: 0.028472900390625|unsuper_loss: 0.0 +average reward score: 4.40234375 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.63s (69.99%) |Training time=0.44s (18.87%) |Others=0.26 (11.14%)|CurSamplesPerSec=13.75 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1165|ppo_ep: 1|act_loss: 0.0540771484375|cri_loss: 0.027984619140625|unsuper_loss: 0.0 +average reward score: 5.75390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.60%) |Training time=0.43s (19.83%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1166|ppo_ep: 1|act_loss: 0.00930023193359375|cri_loss: 0.005130767822265625|unsuper_loss: 0.0 +average reward score: 4.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.08%) |Training time=0.44s (20.37%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1167|ppo_ep: 1|act_loss: -0.0136566162109375|cri_loss: -0.006427764892578125|unsuper_loss: 0.0 +average reward score: 4.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.96%) |Training time=0.44s (20.49%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1168|ppo_ep: 1|act_loss: -0.025115966796875|cri_loss: -0.0105133056640625|unsuper_loss: 0.0 +average reward score: 4.28515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.92%) |Training time=0.45s (20.54%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41 +[2023-04-14 09:30:41,075] [INFO] [logging.py:96:log_dist] [Rank 0] step=1170, skipped=15, lr=[9.25743397609348e-06, 9.25743397609348e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:30:41,093] [INFO] [timer.py:199:stop] epoch=0/micro_step=1170/global_step=1170, RunningAvgSamplesPerSec=109.34554743622506, CurrSamplesPerSec=113.94323795455607, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:30:41,186] [INFO] [logging.py:96:log_dist] [Rank 0] step=1170, skipped=17, lr=[4.797357795202179e-06, 4.797357795202179e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1169|ppo_ep: 1|act_loss: -0.04736328125|cri_loss: -0.018341064453125|unsuper_loss: 0.0 +average reward score: 3.822265625 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.62s (71.43%) |Training time=0.44s (19.54%) |Others=0.21 (9.03%)|CurSamplesPerSec=14.09 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1170|ppo_ep: 1|act_loss: -0.05035400390625|cri_loss: -0.021392822265625|unsuper_loss: 0.0 +average reward score: 4.90625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.10%) |Training time=0.44s (20.35%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1171|ppo_ep: 1|act_loss: 0.00125885009765625|cri_loss: 0.0021820068359375|unsuper_loss: 0.0 +average reward score: 4.43359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.96%) |Training time=0.44s (20.50%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1172|ppo_ep: 1|act_loss: -0.00449371337890625|cri_loss: -0.0005588531494140625|unsuper_loss: 0.0 +average reward score: 3.71484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.86%) |Training time=0.45s (20.60%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1173|ppo_ep: 1|act_loss: 0.022857666015625|cri_loss: 0.01275634765625|unsuper_loss: 0.0 +average reward score: 3.71484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.98%) |Training time=0.44s (20.47%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1174|ppo_ep: 1|act_loss: 0.087158203125|cri_loss: 0.047210693359375|unsuper_loss: 0.0 +average reward score: 4.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.03%) |Training time=0.44s (20.41%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1175|ppo_ep: 1|act_loss: -0.0106964111328125|cri_loss: -0.0047454833984375|unsuper_loss: 0.0 +average reward score: 4.234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.31%) |Training time=0.44s (20.16%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1176|ppo_ep: 1|act_loss: -0.036376953125|cri_loss: -0.0170440673828125|unsuper_loss: 0.0 +average reward score: 5.74609375 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.12%) |Training time=0.46s (19.58%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1177|ppo_ep: 1|act_loss: -0.05767822265625|cri_loss: -0.0277862548828125|unsuper_loss: 0.0 +average reward score: 4.23828125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.65%) |Training time=0.45s (20.82%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1178|ppo_ep: 1|act_loss: 0.023590087890625|cri_loss: 0.014129638671875|unsuper_loss: 0.0 +average reward score: 4.44140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.05%) |Training time=0.44s (20.40%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41 +[2023-04-14 09:31:03,012] [INFO] [logging.py:96:log_dist] [Rank 0] step=1180, skipped=15, lr=[9.250061730639604e-06, 9.250061730639604e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:31:03,276] [INFO] [timer.py:199:stop] epoch=0/micro_step=1180/global_step=1180, RunningAvgSamplesPerSec=109.3098238350795, CurrSamplesPerSec=63.37054074560561, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:31:03,369] [INFO] [logging.py:96:log_dist] [Rank 0] step=1180, skipped=17, lr=[4.793544782615725e-06, 4.793544782615725e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1179|ppo_ep: 1|act_loss: 0.005401611328125|cri_loss: 0.0032062530517578125|unsuper_loss: 0.0 +average reward score: 3.94921875 +------------------------------------------------------------------------------------- +|E2E latency=2.41s |Gather latency=0.00s (0.00%) |Generate time=1.64s (68.15%) |Training time=0.67s (27.78%) |Others=0.10 (4.08%)|CurSamplesPerSec=13.29 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1180|ppo_ep: 1|act_loss: -0.025115966796875|cri_loss: -0.011627197265625|unsuper_loss: 0.0 +average reward score: 4.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.60%) |Training time=0.43s (19.84%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1181|ppo_ep: 1|act_loss: 0.06866455078125|cri_loss: 0.036651611328125|unsuper_loss: 0.0 +average reward score: 4.6171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.16%) |Training time=0.44s (20.30%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1182|ppo_ep: 1|act_loss: -0.0316162109375|cri_loss: -0.014556884765625|unsuper_loss: 0.0 +average reward score: 4.6015625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.45%) |Training time=0.44s (20.06%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1183|ppo_ep: 1|act_loss: 0.024444580078125|cri_loss: 0.0137939453125|unsuper_loss: 0.0 +average reward score: 5.03125 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.74s (77.02%) |Training time=0.42s (18.62%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1184|ppo_ep: 1|act_loss: -0.04498291015625|cri_loss: -0.02166748046875|unsuper_loss: 0.0 +average reward score: 4.625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.60%) |Training time=0.43s (19.86%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1185|ppo_ep: 1|act_loss: 0.0167236328125|cri_loss: 0.009552001953125|unsuper_loss: 0.0 +average reward score: 4.8203125 +------------------------------------------------------------------------------------- +|E2E latency=2.49s |Gather latency=0.00s (0.00%) |Generate time=1.64s (65.59%) |Training time=0.43s (17.23%) |Others=0.43 (17.18%)|CurSamplesPerSec=12.83 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1186|ppo_ep: 1|act_loss: -0.0032024383544921875|cri_loss: -0.0007781982421875|unsuper_loss: 0.0 +average reward score: 5.01171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.92%) |Training time=0.42s (19.54%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1187|ppo_ep: 1|act_loss: -0.017578125|cri_loss: -0.00304412841796875|unsuper_loss: 0.0 +average reward score: 3.634765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.42%) |Training time=0.41s (19.05%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1188|ppo_ep: 1|act_loss: -0.009918212890625|cri_loss: -0.0033702850341796875|unsuper_loss: 0.0 +average reward score: 4.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.60%) |Training time=0.43s (19.85%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41 +[2023-04-14 09:31:25,384] [INFO] [logging.py:96:log_dist] [Rank 0] step=1190, skipped=15, lr=[9.242623895096e-06, 9.242623895096e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:31:25,403] [INFO] [timer.py:199:stop] epoch=0/micro_step=1190/global_step=1190, RunningAvgSamplesPerSec=109.38797397111014, CurrSamplesPerSec=113.55204980063334, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:31:25,497] [INFO] [logging.py:96:log_dist] [Rank 0] step=1190, skipped=17, lr=[4.789697774162718e-06, 4.789697774162718e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1189|ppo_ep: 1|act_loss: -0.028289794921875|cri_loss: -0.01258087158203125|unsuper_loss: 0.0 +average reward score: 4.91796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.85%) |Training time=0.44s (20.53%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1190|ppo_ep: 1|act_loss: -0.020721435546875|cri_loss: -0.0089569091796875|unsuper_loss: 0.0 +average reward score: 4.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.98%) |Training time=0.44s (20.45%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1191|ppo_ep: 1|act_loss: -0.03985595703125|cri_loss: -0.018280029296875|unsuper_loss: 0.0 +average reward score: 4.02734375 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.79s (77.01%) |Training time=0.44s (18.72%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.77 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1192|ppo_ep: 1|act_loss: -0.05194091796875|cri_loss: -0.02423095703125|unsuper_loss: 0.0 +average reward score: 4.234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.96%) |Training time=0.44s (20.50%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1193|ppo_ep: 1|act_loss: 0.035552978515625|cri_loss: 0.018402099609375|unsuper_loss: 0.0 +average reward score: 4.6328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.94%) |Training time=0.44s (20.51%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1194|ppo_ep: 1|act_loss: 0.11077880859375|cri_loss: 0.060760498046875|unsuper_loss: 0.0 +average reward score: 4.484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.97%) |Training time=0.42s (19.47%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1195|ppo_ep: 1|act_loss: 0.10888671875|cri_loss: 0.05926513671875|unsuper_loss: 0.0 +average reward score: 4.8203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.44%) |Training time=0.43s (20.00%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1196|ppo_ep: 1|act_loss: 0.04571533203125|cri_loss: 0.0241851806640625|unsuper_loss: 0.0 +average reward score: 4.48828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.14%) |Training time=0.44s (20.30%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1197|ppo_ep: 1|act_loss: 0.028533935546875|cri_loss: 0.0150604248046875|unsuper_loss: 0.0 +average reward score: 4.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.62s (76.97%) |Training time=0.39s (18.34%) |Others=0.10 (4.69%)|CurSamplesPerSec=15.18 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1198|ppo_ep: 1|act_loss: -0.020477294921875|cri_loss: -0.00858306884765625|unsuper_loss: 0.0 +average reward score: 4.16796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.67%) |Training time=0.43s (19.78%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42 +[2023-04-14 09:31:47,505] [INFO] [logging.py:96:log_dist] [Rank 0] step=1200, skipped=15, lr=[9.235120579709336e-06, 9.235120579709336e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:31:47,520] [INFO] [timer.py:199:stop] epoch=0/micro_step=1200/global_step=1200, RunningAvgSamplesPerSec=109.46486512641026, CurrSamplesPerSec=118.27549262462009, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:31:47,613] [INFO] [logging.py:96:log_dist] [Rank 0] step=1200, skipped=17, lr=[4.7858168268651025e-06, 4.7858168268651025e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1199|ppo_ep: 1|act_loss: -0.04156494140625|cri_loss: -0.0186614990234375|unsuper_loss: 0.0 +average reward score: 4.33984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.45%) |Training time=0.43s (20.02%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1200|ppo_ep: 1|act_loss: -0.052703857421875|cri_loss: -0.02392578125|unsuper_loss: 0.0 +average reward score: 4.7421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.32%) |Training time=0.44s (20.14%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1201|ppo_ep: 1|act_loss: -0.0755615234375|cri_loss: -0.033935546875|unsuper_loss: 0.0 +average reward score: 4.234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.56%) |Training time=0.43s (19.91%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1202|ppo_ep: 1|act_loss: 0.0014781951904296875|cri_loss: 0.002246856689453125|unsuper_loss: 0.0 +average reward score: 4.328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.72%) |Training time=0.43s (19.74%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1203|ppo_ep: 1|act_loss: 0.00370025634765625|cri_loss: 0.0035266876220703125|unsuper_loss: 0.0 +average reward score: 4.2109375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.52%) |Training time=0.43s (19.94%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1204|ppo_ep: 1|act_loss: 0.00565338134765625|cri_loss: 0.003631591796875|unsuper_loss: 0.0 +average reward score: 4.109375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.22%) |Training time=0.44s (20.26%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1205|ppo_ep: 1|act_loss: 0.00897216796875|cri_loss: 0.00499725341796875|unsuper_loss: 0.0 +average reward score: 4.83984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.52%) |Training time=0.43s (19.94%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1206|ppo_ep: 1|act_loss: 0.01291656494140625|cri_loss: 0.007625579833984375|unsuper_loss: 0.0 +average reward score: 4.3203125 +------------------------------------------------------------------------------------- +|E2E latency=2.91s |Gather latency=0.00s (0.00%) |Generate time=1.81s (62.01%) |Training time=0.42s (14.56%) |Others=0.68 (23.43%)|CurSamplesPerSec=10.98 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1207|ppo_ep: 1|act_loss: 0.04498291015625|cri_loss: 0.024261474609375|unsuper_loss: 0.0 +average reward score: 4.453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.68%) |Training time=0.43s (19.77%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1208|ppo_ep: 1|act_loss: 0.0108795166015625|cri_loss: 0.00605010986328125|unsuper_loss: 0.0 +average reward score: 5.44921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.43%) |Training time=0.43s (20.03%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41 +[2023-04-14 09:32:09,937] [INFO] [logging.py:96:log_dist] [Rank 0] step=1210, skipped=15, lr=[9.22755189569684e-06, 9.22755189569684e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:32:09,955] [INFO] [timer.py:199:stop] epoch=0/micro_step=1210/global_step=1210, RunningAvgSamplesPerSec=109.54153886070056, CurrSamplesPerSec=129.74929212839035, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:32:10,048] [INFO] [logging.py:96:log_dist] [Rank 0] step=1210, skipped=17, lr=[4.7819019982478805e-06, 4.7819019982478805e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1209|ppo_ep: 1|act_loss: 0.0146484375|cri_loss: 0.008087158203125|unsuper_loss: 0.0 +average reward score: 4.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.45%) |Training time=0.41s (18.99%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1210|ppo_ep: 1|act_loss: -0.0238800048828125|cri_loss: -0.01039886474609375|unsuper_loss: 0.0 +average reward score: 5.46484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.82%) |Training time=0.43s (19.64%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1211|ppo_ep: 1|act_loss: 0.01238250732421875|cri_loss: 0.006862640380859375|unsuper_loss: 0.0 +average reward score: 4.66796875 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.66s (74.33%) |Training time=0.44s (19.62%) |Others=0.14 (6.05%)|CurSamplesPerSec=14.33 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1212|ppo_ep: 1|act_loss: -0.0153656005859375|cri_loss: -0.004589080810546875|unsuper_loss: 0.0 +average reward score: 4.5546875 +------------------------------------------------------------------------------------- +|E2E latency=2.67s |Gather latency=0.00s (0.00%) |Generate time=1.69s (63.17%) |Training time=0.44s (16.36%) |Others=0.55 (20.47%)|CurSamplesPerSec=11.98 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1213|ppo_ep: 1|act_loss: 0.00885772705078125|cri_loss: 0.005619049072265625|unsuper_loss: 0.0 +average reward score: 4.19921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.27%) |Training time=0.43s (20.14%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1214|ppo_ep: 1|act_loss: 0.0186920166015625|cri_loss: 0.0106964111328125|unsuper_loss: 0.0 +average reward score: 5.26171875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.51%) |Training time=0.43s (19.95%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1215|ppo_ep: 1|act_loss: -0.0203704833984375|cri_loss: -0.00943756103515625|unsuper_loss: 0.0 +average reward score: 4.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.17%) |Training time=0.44s (20.18%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1216|ppo_ep: 1|act_loss: -0.0011844635009765625|cri_loss: 0.00022029876708984375|unsuper_loss: 0.0 +average reward score: 4.6171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.05%) |Training time=0.44s (20.30%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1217|ppo_ep: 1|act_loss: -0.01849365234375|cri_loss: -0.00757598876953125|unsuper_loss: 0.0 +average reward score: 4.64453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.75%) |Training time=0.45s (20.69%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1218|ppo_ep: 1|act_loss: 0.00743865966796875|cri_loss: 0.00434112548828125|unsuper_loss: 0.0 +average reward score: 4.1328125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.56%) |Training time=0.44s (20.17%) |Others=0.11 (5.27%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.41 +[2023-04-14 09:32:32,197] [INFO] [logging.py:96:log_dist] [Rank 0] step=1220, skipped=15, lr=[9.219917955244674e-06, 9.219917955244674e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:32:32,214] [INFO] [timer.py:199:stop] epoch=0/micro_step=1220/global_step=1220, RunningAvgSamplesPerSec=109.59446571481158, CurrSamplesPerSec=116.17172795170941, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:32:32,308] [INFO] [logging.py:96:log_dist] [Rank 0] step=1220, skipped=17, lr=[4.777953346338256e-06, 4.777953346338256e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1219|ppo_ep: 1|act_loss: 0.03564453125|cri_loss: 0.01837158203125|unsuper_loss: 0.0 +average reward score: 4.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.18%) |Training time=0.44s (20.26%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1220|ppo_ep: 1|act_loss: 0.0859375|cri_loss: 0.046600341796875|unsuper_loss: 0.0 +average reward score: 4.80859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.06%) |Training time=0.44s (20.37%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1221|ppo_ep: 1|act_loss: -0.004993438720703125|cri_loss: -0.002292633056640625|unsuper_loss: 0.0 +average reward score: 4.91015625 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.63s (72.70%) |Training time=0.45s (20.14%) |Others=0.16 (7.17%)|CurSamplesPerSec=14.30 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1222|ppo_ep: 1|act_loss: -0.022979736328125|cri_loss: -0.009552001953125|unsuper_loss: 0.0 +average reward score: 4.7421875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.29%) |Training time=0.44s (20.18%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1223|ppo_ep: 1|act_loss: -0.041015625|cri_loss: -0.019622802734375|unsuper_loss: 0.0 +average reward score: 5.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.00%) |Training time=0.44s (20.39%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1224|ppo_ep: 1|act_loss: -0.0154571533203125|cri_loss: -0.007038116455078125|unsuper_loss: 0.0 +average reward score: 4.015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.01%) |Training time=0.42s (19.41%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1225|ppo_ep: 1|act_loss: -0.04803466796875|cri_loss: -0.022705078125|unsuper_loss: 0.0 +average reward score: 4.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.55%) |Training time=0.43s (19.89%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1226|ppo_ep: 1|act_loss: 0.0100250244140625|cri_loss: 0.00542449951171875|unsuper_loss: 0.0 +average reward score: 4.296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.41%) |Training time=0.43s (20.05%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1227|ppo_ep: 1|act_loss: 0.06689453125|cri_loss: 0.03717041015625|unsuper_loss: 0.0 +average reward score: 4.55859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.51%) |Training time=0.43s (19.93%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1228|ppo_ep: 1|act_loss: 0.047698974609375|cri_loss: 0.024688720703125|unsuper_loss: 0.0 +average reward score: 4.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.46%) |Training time=0.43s (20.00%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42 +[2023-04-14 09:32:53,944] [INFO] [logging.py:96:log_dist] [Rank 0] step=1230, skipped=15, lr=[9.212218871506249e-06, 9.212218871506249e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:32:53,962] [INFO] [timer.py:199:stop] epoch=0/micro_step=1230/global_step=1230, RunningAvgSamplesPerSec=109.6524556882401, CurrSamplesPerSec=118.64602981670637, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:32:54,055] [INFO] [logging.py:96:log_dist] [Rank 0] step=1230, skipped=17, lr=[4.773970929664774e-06, 4.773970929664774e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1229|ppo_ep: 1|act_loss: -0.00830078125|cri_loss: -0.0036220550537109375|unsuper_loss: 0.0 +average reward score: 4.5546875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.53%) |Training time=0.43s (19.94%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1230|ppo_ep: 1|act_loss: -0.00394439697265625|cri_loss: -0.0006084442138671875|unsuper_loss: 0.0 +average reward score: 4.94921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.23%) |Training time=0.44s (20.22%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1231|ppo_ep: 1|act_loss: -0.1021728515625|cri_loss: -0.039215087890625|unsuper_loss: 0.0 +average reward score: 5.01171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.25%) |Training time=0.44s (20.12%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1232|ppo_ep: 1|act_loss: -0.007389068603515625|cri_loss: -0.00281524658203125|unsuper_loss: 0.0 +average reward score: 4.1796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.34%) |Training time=0.44s (20.14%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1233|ppo_ep: 1|act_loss: 0.00864410400390625|cri_loss: 0.004642486572265625|unsuper_loss: 0.0 +average reward score: 5.3125 +------------------------------------------------------------------------------------- +|E2E latency=2.48s |Gather latency=0.00s (0.00%) |Generate time=1.64s (65.95%) |Training time=0.44s (17.58%) |Others=0.41 (16.47%)|CurSamplesPerSec=12.91 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1234|ppo_ep: 1|act_loss: 0.05426025390625|cri_loss: 0.03143310546875|unsuper_loss: 0.0 +average reward score: 4.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.21%) |Training time=0.44s (20.25%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1235|ppo_ep: 1|act_loss: 0.03375244140625|cri_loss: 0.017425537109375|unsuper_loss: 0.0 +average reward score: 4.6484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.26%) |Training time=0.44s (20.18%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1236|ppo_ep: 1|act_loss: 0.01275634765625|cri_loss: 0.007781982421875|unsuper_loss: 0.0 +average reward score: 4.58984375 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.19%) |Training time=0.43s (18.57%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1237|ppo_ep: 1|act_loss: 0.0258026123046875|cri_loss: 0.0137481689453125|unsuper_loss: 0.0 +average reward score: 5.11328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.50%) |Training time=0.43s (19.96%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1238|ppo_ep: 1|act_loss: -0.0137786865234375|cri_loss: -0.006542205810546875|unsuper_loss: 0.0 +average reward score: 4.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.41%) |Training time=0.43s (20.04%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42 +[2023-04-14 09:33:16,086] [INFO] [logging.py:96:log_dist] [Rank 0] step=1240, skipped=15, lr=[9.204454758600558e-06, 9.204454758600558e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:33:16,212] [INFO] [timer.py:199:stop] epoch=0/micro_step=1240/global_step=1240, RunningAvgSamplesPerSec=109.67958562836411, CurrSamplesPerSec=85.63239207564769, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:33:16,305] [INFO] [logging.py:96:log_dist] [Rank 0] step=1240, skipped=17, lr=[4.769954807256458e-06, 4.769954807256458e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1239|ppo_ep: 1|act_loss: -0.1005859375|cri_loss: -0.047332763671875|unsuper_loss: 0.0 +average reward score: 4.453125 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.64s (72.10%) |Training time=0.54s (23.57%) |Others=0.10 (4.32%)|CurSamplesPerSec=14.06 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1240|ppo_ep: 1|act_loss: 0.018646240234375|cri_loss: 0.01007843017578125|unsuper_loss: 0.0 +average reward score: 5.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.09%) |Training time=0.43s (19.45%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.54 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1241|ppo_ep: 1|act_loss: 0.0440673828125|cri_loss: 0.023956298828125|unsuper_loss: 0.0 +average reward score: 4.54296875 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.74s (76.14%) |Training time=0.45s (19.54%) |Others=0.10 (4.32%)|CurSamplesPerSec=14.01 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1242|ppo_ep: 1|act_loss: -0.012939453125|cri_loss: -0.00576019287109375|unsuper_loss: 0.0 +average reward score: 4.5859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.61%) |Training time=0.43s (19.84%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1243|ppo_ep: 1|act_loss: -0.013214111328125|cri_loss: -0.0034637451171875|unsuper_loss: 0.0 +average reward score: 4.40234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.59%) |Training time=0.43s (19.87%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1244|ppo_ep: 1|act_loss: -0.0278167724609375|cri_loss: -0.0135498046875|unsuper_loss: 0.0 +average reward score: 5.50390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.85%) |Training time=0.42s (19.60%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1245|ppo_ep: 1|act_loss: -0.0090789794921875|cri_loss: -0.00386810302734375|unsuper_loss: 0.0 +average reward score: 4.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.53%) |Training time=0.43s (19.83%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1246|ppo_ep: 1|act_loss: -0.0220794677734375|cri_loss: -0.01053619384765625|unsuper_loss: 0.0 +average reward score: 4.65625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.50%) |Training time=0.43s (19.83%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1247|ppo_ep: 1|act_loss: -0.0011386871337890625|cri_loss: 0.00011157989501953125|unsuper_loss: 0.0 +average reward score: 3.87109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.74%) |Training time=0.43s (19.68%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1248|ppo_ep: 1|act_loss: 0.03851318359375|cri_loss: 0.021759033203125|unsuper_loss: 0.0 +average reward score: 4.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.88%) |Training time=0.42s (19.56%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.42 +[2023-04-14 09:33:38,001] [INFO] [logging.py:96:log_dist] [Rank 0] step=1250, skipped=15, lr=[9.196625731610486e-06, 9.196625731610486e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:33:38,019] [INFO] [timer.py:199:stop] epoch=0/micro_step=1250/global_step=1250, RunningAvgSamplesPerSec=109.75391174162782, CurrSamplesPerSec=119.57984018331966, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:33:38,112] [INFO] [logging.py:96:log_dist] [Rank 0] step=1250, skipped=17, lr=[4.765905038641933e-06, 4.765905038641933e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1249|ppo_ep: 1|act_loss: 0.003772735595703125|cri_loss: 0.0023212432861328125|unsuper_loss: 0.0 +average reward score: 4.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.63s |Gather latency=0.00s (0.00%) |Generate time=1.64s (62.29%) |Training time=0.43s (16.38%) |Others=0.56 (21.33%)|CurSamplesPerSec=12.18 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1250|ppo_ep: 1|act_loss: 0.05157470703125|cri_loss: 0.0281829833984375|unsuper_loss: 0.0 +average reward score: 3.8984375 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.64s (77.15%) |Training time=0.39s (18.21%) |Others=0.10 (4.64%)|CurSamplesPerSec=15.04 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1251|ppo_ep: 1|act_loss: -0.022918701171875|cri_loss: -0.0090789794921875|unsuper_loss: 0.0 +average reward score: 4.48828125 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.81s (77.81%) |Training time=0.42s (17.95%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1252|ppo_ep: 1|act_loss: 0.08087158203125|cri_loss: 0.043609619140625|unsuper_loss: 0.0 +average reward score: 3.96484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.87%) |Training time=0.42s (19.56%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1253|ppo_ep: 1|act_loss: 0.01342010498046875|cri_loss: 0.0078125|unsuper_loss: 0.0 +average reward score: 4.3046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.60%) |Training time=0.43s (19.84%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1254|ppo_ep: 1|act_loss: -0.002349853515625|cri_loss: -0.00026798248291015625|unsuper_loss: 0.0 +average reward score: 4.15234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.46%) |Training time=0.43s (19.98%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1255|ppo_ep: 1|act_loss: 0.020172119140625|cri_loss: 0.01102447509765625|unsuper_loss: 0.0 +average reward score: 4.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.01%) |Training time=0.44s (20.39%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1256|ppo_ep: 1|act_loss: 0.00833892822265625|cri_loss: 0.0047454833984375|unsuper_loss: 0.0 +average reward score: 4.07421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.31%) |Training time=0.44s (20.13%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1257|ppo_ep: 1|act_loss: 0.01318359375|cri_loss: 0.007080078125|unsuper_loss: 0.0 +average reward score: 4.17578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.22%) |Training time=0.44s (20.22%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1258|ppo_ep: 1|act_loss: 0.0224609375|cri_loss: 0.0117340087890625|unsuper_loss: 0.0 +average reward score: 4.3125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.92%) |Training time=0.44s (20.52%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42 +[2023-04-14 09:34:00,234] [INFO] [logging.py:96:log_dist] [Rank 0] step=1260, skipped=15, lr=[9.188731906581099e-06, 9.188731906581099e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:34:00,253] [INFO] [timer.py:199:stop] epoch=0/micro_step=1260/global_step=1260, RunningAvgSamplesPerSec=109.82973644022236, CurrSamplesPerSec=115.90207136793875, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:34:00,767] [INFO] [logging.py:96:log_dist] [Rank 0] step=1260, skipped=17, lr=[4.761821683848535e-06, 4.761821683848535e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1259|ppo_ep: 1|act_loss: -0.0025997161865234375|cri_loss: -9.34600830078125e-05|unsuper_loss: 0.0 +average reward score: 3.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.58s |Gather latency=0.00s (0.00%) |Generate time=1.62s (62.88%) |Training time=0.86s (33.31%) |Others=0.10 (3.81%)|CurSamplesPerSec=12.39 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1260|ppo_ep: 1|act_loss: -0.0119476318359375|cri_loss: -0.00537872314453125|unsuper_loss: 0.0 +average reward score: 4.02734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.03%) |Training time=0.44s (20.41%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1261|ppo_ep: 1|act_loss: -0.062286376953125|cri_loss: -0.030242919921875|unsuper_loss: 0.0 +average reward score: 4.45703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.00%) |Training time=0.44s (20.45%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1262|ppo_ep: 1|act_loss: -0.04656982421875|cri_loss: -0.022186279296875|unsuper_loss: 0.0 +average reward score: 3.5546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.83%) |Training time=0.45s (20.62%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1263|ppo_ep: 1|act_loss: -0.03692626953125|cri_loss: -0.0166778564453125|unsuper_loss: 0.0 +average reward score: 3.6015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.03%) |Training time=0.44s (20.41%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1264|ppo_ep: 1|act_loss: 0.04559326171875|cri_loss: 0.0249481201171875|unsuper_loss: 0.0 +average reward score: 4.08984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.97%) |Training time=0.44s (20.48%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1265|ppo_ep: 1|act_loss: 0.032470703125|cri_loss: 0.0174713134765625|unsuper_loss: 0.0 +average reward score: 3.62109375 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.05%) |Training time=0.50s (22.48%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.45 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1266|ppo_ep: 1|act_loss: -0.03363037109375|cri_loss: -0.01528167724609375|unsuper_loss: 0.0 +average reward score: 4.3828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.99%) |Training time=0.44s (20.46%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1267|ppo_ep: 1|act_loss: -0.1041259765625|cri_loss: -0.0447998046875|unsuper_loss: 0.0 +average reward score: 4.125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.04%) |Training time=0.44s (20.42%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1268|ppo_ep: 1|act_loss: -0.0225982666015625|cri_loss: -0.00713348388671875|unsuper_loss: 0.0 +average reward score: 3.890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.65%) |Training time=0.45s (20.79%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42 +[2023-04-14 09:34:22,397] [INFO] [logging.py:96:log_dist] [Rank 0] step=1270, skipped=15, lr=[9.180773400517926e-06, 9.180773400517926e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:34:22,416] [INFO] [timer.py:199:stop] epoch=0/micro_step=1270/global_step=1270, RunningAvgSamplesPerSec=109.86249534800284, CurrSamplesPerSec=121.51866314532418, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:34:22,508] [INFO] [logging.py:96:log_dist] [Rank 0] step=1270, skipped=17, lr=[4.757704803401435e-06, 4.757704803401435e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1269|ppo_ep: 1|act_loss: 0.058837890625|cri_loss: 0.031768798828125|unsuper_loss: 0.0 +average reward score: 4.7109375 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.69s (76.27%) |Training time=0.43s (19.29%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1270|ppo_ep: 1|act_loss: 0.06817626953125|cri_loss: 0.036041259765625|unsuper_loss: 0.0 +average reward score: 4.24609375 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.73s (76.10%) |Training time=0.44s (19.54%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1271|ppo_ep: 1|act_loss: 0.0235595703125|cri_loss: 0.0127410888671875|unsuper_loss: 0.0 +average reward score: 4.41796875 +------------------------------------------------------------------------------------- +|E2E latency=2.45s |Gather latency=0.00s (0.00%) |Generate time=1.62s (66.17%) |Training time=0.44s (18.14%) |Others=0.38 (15.69%)|CurSamplesPerSec=13.07 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1272|ppo_ep: 1|act_loss: 0.017425537109375|cri_loss: 0.0092315673828125|unsuper_loss: 0.0 +average reward score: 4.04296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.16%) |Training time=0.44s (20.29%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1273|ppo_ep: 1|act_loss: 0.0494384765625|cri_loss: 0.0261993408203125|unsuper_loss: 0.0 +average reward score: 4.09375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.29%) |Training time=0.44s (20.15%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1274|ppo_ep: 1|act_loss: -0.03271484375|cri_loss: -0.011749267578125|unsuper_loss: 0.0 +average reward score: 4.09765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.27%) |Training time=0.44s (20.15%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1275|ppo_ep: 1|act_loss: -0.03143310546875|cri_loss: -0.0136566162109375|unsuper_loss: 0.0 +average reward score: 5.04296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.49%) |Training time=0.43s (19.95%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1276|ppo_ep: 1|act_loss: -0.00504302978515625|cri_loss: -0.0022125244140625|unsuper_loss: 0.0 +average reward score: 4.6953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.41%) |Training time=0.43s (20.03%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1277|ppo_ep: 1|act_loss: -0.0264892578125|cri_loss: -0.011505126953125|unsuper_loss: 0.0 +average reward score: 4.92578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.32%) |Training time=0.44s (20.13%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1278|ppo_ep: 1|act_loss: 0.00826263427734375|cri_loss: 0.005523681640625|unsuper_loss: 0.0 +average reward score: 4.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.55%) |Training time=0.43s (19.90%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42 +[2023-04-14 09:34:44,417] [INFO] [logging.py:96:log_dist] [Rank 0] step=1280, skipped=15, lr=[9.172750331385226e-06, 9.172750331385226e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:34:44,435] [INFO] [timer.py:199:stop] epoch=0/micro_step=1280/global_step=1280, RunningAvgSamplesPerSec=109.9137738235626, CurrSamplesPerSec=118.03625573723781, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:34:44,519] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +[2023-04-14 09:34:44,520] [INFO] [logging.py:96:log_dist] [Rank 0] step=1280, skipped=18, lr=[4.753970996989642e-06, 4.753970996989642e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1279|ppo_ep: 1|act_loss: -0.0079345703125|cri_loss: -0.003147125244140625|unsuper_loss: 0.0 +average reward score: 4.71875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.76%) |Training time=0.43s (20.16%) |Others=0.09 (4.07%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.42 +[2023-04-14 09:34:46,838] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 1280|ppo_ep: 1|act_loss: -0.0105133056640625|cri_loss: -0.004421234130859375|unsuper_loss: 0.0 +average reward score: 4.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.66%) |Training time=0.43s (18.54%) |Others=0.09 (3.80%)|CurSamplesPerSec=13.80 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1281|ppo_ep: 1|act_loss: 0.026824951171875|cri_loss: 0.01517486572265625|unsuper_loss: 0.0 +average reward score: 5.51953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.81%) |Training time=0.45s (20.63%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42 +[2023-04-14 09:34:51,046] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 1282|ppo_ep: 1|act_loss: -0.035003662109375|cri_loss: -0.0055084228515625|unsuper_loss: 0.0 +average reward score: 4.12109375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.86%) |Training time=0.42s (19.55%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1283|ppo_ep: 1|act_loss: -0.0618896484375|cri_loss: -0.0242919921875|unsuper_loss: 0.0 +average reward score: 4.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.62s |Gather latency=0.00s (0.00%) |Generate time=1.62s (61.85%) |Training time=0.44s (16.90%) |Others=0.56 (21.25%)|CurSamplesPerSec=12.21 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1284|ppo_ep: 1|act_loss: 0.0521240234375|cri_loss: 0.031524658203125|unsuper_loss: 0.0 +average reward score: 4.33984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.32%) |Training time=0.41s (19.09%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1285|ppo_ep: 1|act_loss: -0.01483154296875|cri_loss: 0.003387451171875|unsuper_loss: 0.0 +average reward score: 4.33984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.55%) |Training time=0.45s (20.90%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1286|ppo_ep: 1|act_loss: 0.0240478515625|cri_loss: 0.0176544189453125|unsuper_loss: 0.0 +average reward score: 4.65234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.78%) |Training time=0.45s (20.66%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1287|ppo_ep: 1|act_loss: 0.038238525390625|cri_loss: 0.034881591796875|unsuper_loss: 0.0 +average reward score: 4.04296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.77%) |Training time=0.45s (20.66%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42 +[2023-04-14 09:35:04,453] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096 +epoch: 0|step: 1288|ppo_ep: 1|act_loss: 0.074951171875|cri_loss: 0.040374755859375|unsuper_loss: 0.0 +average reward score: 4.43359375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.51%) |Training time=0.43s (19.90%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.42 +[2023-04-14 09:35:06,600] [INFO] [logging.py:96:log_dist] [Rank 0] step=1290, skipped=17, lr=[9.166285470551969e-06, 9.166285470551969e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:35:06,618] [INFO] [timer.py:199:stop] epoch=0/micro_step=1290/global_step=1290, RunningAvgSamplesPerSec=109.96179691661273, CurrSamplesPerSec=110.87710045939353, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:35:06,710] [INFO] [logging.py:96:log_dist] [Rank 0] step=1290, skipped=19, lr=[4.750210129048555e-06, 4.750210129048555e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1289|ppo_ep: 1|act_loss: 0.126953125|cri_loss: 0.0889892578125|unsuper_loss: 0.0 +average reward score: 4.3125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.60%) |Training time=0.45s (20.88%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1290|ppo_ep: 1|act_loss: 0.0721435546875|cri_loss: 0.037872314453125|unsuper_loss: 0.0 +average reward score: 4.87109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.65%) |Training time=0.45s (20.80%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1291|ppo_ep: 1|act_loss: -0.00913238525390625|cri_loss: 0.00107574462890625|unsuper_loss: 0.0 +average reward score: 4.83984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.71%) |Training time=0.45s (20.72%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1292|ppo_ep: 1|act_loss: 0.02349853515625|cri_loss: 0.01522064208984375|unsuper_loss: 0.0 +average reward score: 4.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.94%) |Training time=0.44s (20.49%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1293|ppo_ep: 1|act_loss: -0.00255584716796875|cri_loss: 0.004985809326171875|unsuper_loss: 0.0 +average reward score: 4.85546875 +------------------------------------------------------------------------------------- +|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.63s (71.22%) |Training time=0.44s (19.20%) |Others=0.22 (9.58%)|CurSamplesPerSec=14.00 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1294|ppo_ep: 1|act_loss: 0.004199981689453125|cri_loss: 0.0028781890869140625|unsuper_loss: 0.0 +average reward score: 4.55078125 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.29%) |Training time=0.46s (20.78%) |Others=0.13 (5.93%)|CurSamplesPerSec=14.45 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1295|ppo_ep: 1|act_loss: -0.0011138916015625|cri_loss: 0.0023860931396484375|unsuper_loss: 0.0 +average reward score: 4.6328125 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.69s (75.54%) |Training time=0.44s (19.73%) |Others=0.11 (4.73%)|CurSamplesPerSec=14.29 |AvgSamplesPerSec=14.42 +[2023-04-14 09:35:21,992] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 4096, reducing to 2048 +[2023-04-14 09:35:22,078] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384 +epoch: 0|step: 1296|ppo_ep: 1|act_loss: -0.000152587890625|cri_loss: 0.0015926361083984375|unsuper_loss: 0.0 +average reward score: 4.52734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.39%) |Training time=0.42s (19.43%) |Others=0.09 (4.18%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1297|ppo_ep: 1|act_loss: -0.1217041015625|cri_loss: -0.04266357421875|unsuper_loss: 0.0 +average reward score: 4.83203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.76%) |Training time=0.45s (20.68%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1298|ppo_ep: 1|act_loss: -0.1729736328125|cri_loss: -0.057708740234375|unsuper_loss: 0.0 +average reward score: 3.98046875 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.65s (69.72%) |Training time=0.44s (18.58%) |Others=0.28 (11.70%)|CurSamplesPerSec=13.53 |AvgSamplesPerSec=14.42 +[2023-04-14 09:35:28,767] [INFO] [logging.py:96:log_dist] [Rank 0] step=1300, skipped=18, lr=[9.158963278694846e-06, 9.158963278694846e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:35:28,785] [INFO] [timer.py:199:stop] epoch=0/micro_step=1300/global_step=1300, RunningAvgSamplesPerSec=110.0024475916536, CurrSamplesPerSec=127.61274741908309, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:35:28,879] [INFO] [logging.py:96:log_dist] [Rank 0] step=1300, skipped=20, lr=[4.746422244731743e-06, 4.746422244731743e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1299|ppo_ep: 1|act_loss: 0.052490234375|cri_loss: 0.03717041015625|unsuper_loss: 0.0 +average reward score: 5.03125 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.75s (77.32%) |Training time=0.41s (18.29%) |Others=0.10 (4.38%)|CurSamplesPerSec=14.15 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1300|ppo_ep: 1|act_loss: 0.082763671875|cri_loss: 0.052001953125|unsuper_loss: 0.0 +average reward score: 3.94921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.73%) |Training time=0.45s (20.73%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1301|ppo_ep: 1|act_loss: 0.08441162109375|cri_loss: 0.04736328125|unsuper_loss: 0.0 +average reward score: 4.875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.97%) |Training time=0.44s (20.47%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1302|ppo_ep: 1|act_loss: 0.0318603515625|cri_loss: 0.0181427001953125|unsuper_loss: 0.0 +average reward score: 4.953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.76%) |Training time=0.45s (20.60%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1303|ppo_ep: 1|act_loss: 0.031951904296875|cri_loss: 0.020355224609375|unsuper_loss: 0.0 +average reward score: 4.77734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.87%) |Training time=0.45s (20.58%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1304|ppo_ep: 1|act_loss: -0.00981903076171875|cri_loss: -0.0034885406494140625|unsuper_loss: 0.0 +average reward score: 4.796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.75%) |Training time=0.45s (20.67%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1305|ppo_ep: 1|act_loss: -0.0030975341796875|cri_loss: -0.0007276535034179688|unsuper_loss: 0.0 +average reward score: 4.73828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.02%) |Training time=0.44s (20.42%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1306|ppo_ep: 1|act_loss: 0.209228515625|cri_loss: 0.11376953125|unsuper_loss: 0.0 +average reward score: 4.63671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.80%) |Training time=0.45s (20.65%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1307|ppo_ep: 1|act_loss: 2.288818359375e-05|cri_loss: 0.0047454833984375|unsuper_loss: 0.0 +average reward score: 4.25 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.84%) |Training time=0.44s (20.58%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1308|ppo_ep: 1|act_loss: -0.1138916015625|cri_loss: -0.0352783203125|unsuper_loss: 0.0 +average reward score: 4.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.95%) |Training time=0.44s (20.48%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42 +[2023-04-14 09:35:50,565] [INFO] [logging.py:96:log_dist] [Rank 0] step=1310, skipped=18, lr=[9.150766485988878e-06, 9.150766485988878e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:35:50,950] [INFO] [timer.py:199:stop] epoch=0/micro_step=1310/global_step=1310, RunningAvgSamplesPerSec=109.92256089659742, CurrSamplesPerSec=49.722884509588546, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:35:51,044] [INFO] [logging.py:96:log_dist] [Rank 0] step=1310, skipped=20, lr=[4.742181853831721e-06, 4.742181853831721e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1309|ppo_ep: 1|act_loss: -0.1129150390625|cri_loss: -0.009521484375|unsuper_loss: 0.0 +average reward score: 4.7578125 +------------------------------------------------------------------------------------- +|E2E latency=2.69s |Gather latency=0.00s (0.00%) |Generate time=1.79s (66.42%) |Training time=0.81s (29.92%) |Others=0.10 (3.66%)|CurSamplesPerSec=11.88 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1310|ppo_ep: 1|act_loss: -0.14404296875|cri_loss: -0.050750732421875|unsuper_loss: 0.0 +average reward score: 5.12890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.92%) |Training time=0.44s (20.53%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1311|ppo_ep: 1|act_loss: 0.021026611328125|cri_loss: 0.028533935546875|unsuper_loss: 0.0 +average reward score: 5.12109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.80%) |Training time=0.45s (20.64%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1312|ppo_ep: 1|act_loss: 0.08160400390625|cri_loss: 0.04974365234375|unsuper_loss: 0.0 +average reward score: 4.71875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.04%) |Training time=0.44s (20.40%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1313|ppo_ep: 1|act_loss: 0.031280517578125|cri_loss: 0.0166015625|unsuper_loss: 0.0 +average reward score: 4.6484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.81%) |Training time=0.45s (20.64%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1314|ppo_ep: 1|act_loss: 0.0399169921875|cri_loss: 0.0246429443359375|unsuper_loss: 0.0 +average reward score: 4.23828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.48%) |Training time=0.43s (19.95%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1315|ppo_ep: 1|act_loss: -0.033721923828125|cri_loss: -0.014495849609375|unsuper_loss: 0.0 +average reward score: 4.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.71%) |Training time=0.45s (20.73%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1316|ppo_ep: 1|act_loss: 0.016845703125|cri_loss: 0.015380859375|unsuper_loss: 0.0 +average reward score: 4.87109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.61%) |Training time=0.43s (19.78%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1317|ppo_ep: 1|act_loss: -0.05865478515625|cri_loss: -0.0282440185546875|unsuper_loss: 0.0 +average reward score: 4.81640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.60%) |Training time=0.43s (19.83%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1318|ppo_ep: 1|act_loss: -0.00882720947265625|cri_loss: -0.00371551513671875|unsuper_loss: 0.0 +average reward score: 4.765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.46%) |Training time=0.43s (19.98%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.42 +[2023-04-14 09:36:12,565] [INFO] [logging.py:96:log_dist] [Rank 0] step=1320, skipped=18, lr=[9.142505574988243e-06, 9.142505574988243e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:36:12,583] [INFO] [timer.py:199:stop] epoch=0/micro_step=1320/global_step=1320, RunningAvgSamplesPerSec=109.96346053463445, CurrSamplesPerSec=112.99348394896265, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:36:12,676] [INFO] [logging.py:96:log_dist] [Rank 0] step=1320, skipped=20, lr=[4.737908228387656e-06, 4.737908228387656e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1319|ppo_ep: 1|act_loss: 0.0806884765625|cri_loss: 0.045318603515625|unsuper_loss: 0.0 +average reward score: 4.796875 +------------------------------------------------------------------------------------- +|E2E latency=2.42s |Gather latency=0.00s (0.00%) |Generate time=1.62s (67.12%) |Training time=0.45s (18.45%) |Others=0.35 (14.44%)|CurSamplesPerSec=13.24 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1320|ppo_ep: 1|act_loss: 0.0528564453125|cri_loss: 0.027801513671875|unsuper_loss: 0.0 +average reward score: 4.83203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.92%) |Training time=0.44s (20.52%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1321|ppo_ep: 1|act_loss: 0.047027587890625|cri_loss: 0.02423095703125|unsuper_loss: 0.0 +average reward score: 5.3046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.88%) |Training time=0.44s (20.56%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1322|ppo_ep: 1|act_loss: 0.0877685546875|cri_loss: 0.050018310546875|unsuper_loss: 0.0 +average reward score: 5.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.92%) |Training time=0.44s (20.53%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1323|ppo_ep: 1|act_loss: 0.02587890625|cri_loss: 0.01342010498046875|unsuper_loss: 0.0 +average reward score: 4.7421875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.74%) |Training time=0.44s (20.08%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1324|ppo_ep: 1|act_loss: 0.005809783935546875|cri_loss: 0.00439453125|unsuper_loss: 0.0 +average reward score: 5.30078125 +------------------------------------------------------------------------------------- +|E2E latency=2.62s |Gather latency=0.00s (0.00%) |Generate time=1.80s (68.66%) |Training time=0.44s (16.72%) |Others=0.38 (14.63%)|CurSamplesPerSec=12.22 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1325|ppo_ep: 1|act_loss: 0.014617919921875|cri_loss: 0.0078277587890625|unsuper_loss: 0.0 +average reward score: 4.8046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.12%) |Training time=0.44s (20.29%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1326|ppo_ep: 1|act_loss: 0.04364013671875|cri_loss: 0.0245361328125|unsuper_loss: 0.0 +average reward score: 4.88671875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.52%) |Training time=0.45s (20.56%) |Others=0.11 (4.92%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1327|ppo_ep: 1|act_loss: -0.030120849609375|cri_loss: -0.01197052001953125|unsuper_loss: 0.0 +average reward score: 4.8828125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.13%) |Training time=0.44s (20.34%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1328|ppo_ep: 1|act_loss: 0.016876220703125|cri_loss: 0.01165771484375|unsuper_loss: 0.0 +average reward score: 4.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.74s (76.74%) |Training time=0.43s (18.87%) |Others=0.10 (4.39%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.42 +[2023-04-14 09:36:35,048] [INFO] [logging.py:96:log_dist] [Rank 0] step=1330, skipped=18, lr=[9.134180668139572e-06, 9.134180668139572e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:36:35,574] [INFO] [timer.py:199:stop] epoch=0/micro_step=1330/global_step=1330, RunningAvgSamplesPerSec=109.86232216762367, CurrSamplesPerSec=41.83281054421599, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:36:35,667] [INFO] [logging.py:96:log_dist] [Rank 0] step=1330, skipped=20, lr=[4.733601431744987e-06, 4.733601431744987e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1329|ppo_ep: 1|act_loss: 0.00733184814453125|cri_loss: 0.004749298095703125|unsuper_loss: 0.0 +average reward score: 4.1796875 +------------------------------------------------------------------------------------- +|E2E latency=2.67s |Gather latency=0.00s (0.00%) |Generate time=1.64s (61.50%) |Training time=0.93s (34.79%) |Others=0.10 (3.71%)|CurSamplesPerSec=12.00 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1330|ppo_ep: 1|act_loss: 0.0728759765625|cri_loss: 0.03802490234375|unsuper_loss: 0.0 +average reward score: 5.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.93%) |Training time=0.44s (20.51%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1331|ppo_ep: 1|act_loss: 0.013946533203125|cri_loss: 0.00748443603515625|unsuper_loss: 0.0 +average reward score: 4.7109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.00%) |Training time=0.44s (20.44%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1332|ppo_ep: 1|act_loss: -0.0016345977783203125|cri_loss: 0.0003871917724609375|unsuper_loss: 0.0 +average reward score: 4.7421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.33%) |Training time=0.43s (20.11%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1333|ppo_ep: 1|act_loss: -0.047210693359375|cri_loss: -0.0224609375|unsuper_loss: 0.0 +average reward score: 4.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.00%) |Training time=0.44s (20.38%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1334|ppo_ep: 1|act_loss: -0.0223236083984375|cri_loss: -0.0106201171875|unsuper_loss: 0.0 +average reward score: 4.67578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.76%) |Training time=0.45s (20.69%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1335|ppo_ep: 1|act_loss: -0.031768798828125|cri_loss: -0.0149383544921875|unsuper_loss: 0.0 +average reward score: 4.6484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.94%) |Training time=0.44s (20.49%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1336|ppo_ep: 1|act_loss: -0.0111541748046875|cri_loss: -0.005096435546875|unsuper_loss: 0.0 +average reward score: 4.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.62s (68.90%) |Training time=0.44s (18.63%) |Others=0.29 (12.47%)|CurSamplesPerSec=13.58 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1337|ppo_ep: 1|act_loss: 0.04083251953125|cri_loss: 0.023773193359375|unsuper_loss: 0.0 +average reward score: 4.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.96%) |Training time=0.44s (20.47%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1338|ppo_ep: 1|act_loss: 0.0168609619140625|cri_loss: 0.009246826171875|unsuper_loss: 0.0 +average reward score: 4.48828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.96%) |Training time=0.44s (20.48%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42 +[2023-04-14 09:36:57,425] [INFO] [logging.py:96:log_dist] [Rank 0] step=1340, skipped=18, lr=[9.125791888838067e-06, 9.125791888838067e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:36:57,437] [INFO] [timer.py:199:stop] epoch=0/micro_step=1340/global_step=1340, RunningAvgSamplesPerSec=109.8848984580583, CurrSamplesPerSec=100.39286272603175, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:36:57,549] [INFO] [logging.py:96:log_dist] [Rank 0] step=1340, skipped=20, lr=[4.729261527740829e-06, 4.729261527740829e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1339|ppo_ep: 1|act_loss: 0.03173828125|cri_loss: 0.01629638671875|unsuper_loss: 0.0 +average reward score: 4.37890625 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.09%) |Training time=0.49s (22.06%) |Others=0.11 (4.85%)|CurSamplesPerSec=14.35 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1340|ppo_ep: 1|act_loss: 0.06378173828125|cri_loss: 0.037933349609375|unsuper_loss: 0.0 +average reward score: 4.14453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.80%) |Training time=0.45s (20.65%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1341|ppo_ep: 1|act_loss: 0.00018310546875|cri_loss: 0.0010709762573242188|unsuper_loss: 0.0 +average reward score: 4.68359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.92%) |Training time=0.44s (20.53%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1342|ppo_ep: 1|act_loss: 0.042938232421875|cri_loss: 0.0228118896484375|unsuper_loss: 0.0 +average reward score: 5.01171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.00%) |Training time=0.44s (20.46%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1343|ppo_ep: 1|act_loss: -0.014617919921875|cri_loss: -0.00555419921875|unsuper_loss: 0.0 +average reward score: 3.982421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.58%) |Training time=0.43s (19.86%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1344|ppo_ep: 1|act_loss: 0.02569580078125|cri_loss: 0.014404296875|unsuper_loss: 0.0 +average reward score: 4.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.90%) |Training time=0.40s (18.48%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1345|ppo_ep: 1|act_loss: 0.1107177734375|cri_loss: 0.06866455078125|unsuper_loss: 0.0 +average reward score: 4.34375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.83%) |Training time=0.45s (20.60%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1346|ppo_ep: 1|act_loss: -0.05108642578125|cri_loss: -0.02252197265625|unsuper_loss: 0.0 +average reward score: 4.9453125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.98%) |Training time=0.47s (21.53%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1347|ppo_ep: 1|act_loss: -0.043212890625|cri_loss: -0.020538330078125|unsuper_loss: 0.0 +average reward score: 5.09375 +------------------------------------------------------------------------------------- +|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.62s (76.96%) |Training time=0.39s (18.37%) |Others=0.10 (4.68%)|CurSamplesPerSec=15.20 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1348|ppo_ep: 1|act_loss: 0.0045166015625|cri_loss: 0.00312042236328125|unsuper_loss: 0.0 +average reward score: 4.6953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.55%) |Training time=0.45s (20.92%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.42 +[2023-04-14 09:37:19,283] [INFO] [logging.py:96:log_dist] [Rank 0] step=1350, skipped=18, lr=[9.117339361425675e-06, 9.117339361425675e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:37:19,301] [INFO] [timer.py:199:stop] epoch=0/micro_step=1350/global_step=1350, RunningAvgSamplesPerSec=109.93319858905161, CurrSamplesPerSec=114.07213302011552, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:37:19,394] [INFO] [logging.py:96:log_dist] [Rank 0] step=1350, skipped=20, lr=[4.72488858070303e-06, 4.72488858070303e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1349|ppo_ep: 1|act_loss: -0.035736083984375|cri_loss: -0.01666259765625|unsuper_loss: 0.0 +average reward score: 4.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.11%) |Training time=0.44s (20.37%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1350|ppo_ep: 1|act_loss: -0.00806427001953125|cri_loss: -0.0030517578125|unsuper_loss: 0.0 +average reward score: 4.8515625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.02%) |Training time=0.45s (20.48%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1351|ppo_ep: 1|act_loss: -0.015472412109375|cri_loss: -0.00713348388671875|unsuper_loss: 0.0 +average reward score: 5.0859375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.71%) |Training time=0.46s (20.82%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1352|ppo_ep: 1|act_loss: 0.00513458251953125|cri_loss: 0.0028324127197265625|unsuper_loss: 0.0 +average reward score: 4.546875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.37%) |Training time=0.46s (21.11%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1353|ppo_ep: 1|act_loss: -0.00102996826171875|cri_loss: 0.0009441375732421875|unsuper_loss: 0.0 +average reward score: 3.984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.71%) |Training time=0.45s (20.75%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1354|ppo_ep: 1|act_loss: -0.0282135009765625|cri_loss: -0.01287841796875|unsuper_loss: 0.0 +average reward score: 4.58984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.74%) |Training time=0.43s (19.70%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1355|ppo_ep: 1|act_loss: 0.0009326934814453125|cri_loss: 0.0012836456298828125|unsuper_loss: 0.0 +average reward score: 4.5546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.45%) |Training time=0.43s (20.01%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1356|ppo_ep: 1|act_loss: -0.025543212890625|cri_loss: -0.0117034912109375|unsuper_loss: 0.0 +average reward score: 4.8828125 +------------------------------------------------------------------------------------- +|E2E latency=3.70s |Gather latency=0.00s (0.00%) |Generate time=1.81s (49.06%) |Training time=0.44s (11.88%) |Others=1.44 (39.06%)|CurSamplesPerSec=8.65 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1357|ppo_ep: 1|act_loss: 0.0008974075317382812|cri_loss: 0.001956939697265625|unsuper_loss: 0.0 +average reward score: 5.24609375 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.73s (76.27%) |Training time=0.44s (19.26%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.09 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1358|ppo_ep: 1|act_loss: 0.018463134765625|cri_loss: 0.010040283203125|unsuper_loss: 0.0 +average reward score: 4.72265625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.81%) |Training time=0.43s (19.63%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42 +[2023-04-14 09:37:42,662] [INFO] [logging.py:96:log_dist] [Rank 0] step=1360, skipped=18, lr=[9.10882321118924e-06, 9.10882321118924e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:37:42,680] [INFO] [timer.py:199:stop] epoch=0/micro_step=1360/global_step=1360, RunningAvgSamplesPerSec=109.96246013017377, CurrSamplesPerSec=110.81549098318833, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:37:42,773] [INFO] [logging.py:96:log_dist] [Rank 0] step=1360, skipped=20, lr=[4.720482655449212e-06, 4.720482655449212e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1359|ppo_ep: 1|act_loss: 0.0360107421875|cri_loss: 0.0187225341796875|unsuper_loss: 0.0 +average reward score: 4.9921875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.74%) |Training time=0.45s (20.74%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1360|ppo_ep: 1|act_loss: 0.0202484130859375|cri_loss: 0.0105438232421875|unsuper_loss: 0.0 +average reward score: 5.25 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.40%) |Training time=0.46s (21.07%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1361|ppo_ep: 1|act_loss: 0.0132904052734375|cri_loss: 0.007068634033203125|unsuper_loss: 0.0 +average reward score: 4.32421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.58%) |Training time=0.45s (20.86%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1362|ppo_ep: 1|act_loss: -0.06787109375|cri_loss: -0.032440185546875|unsuper_loss: 0.0 +average reward score: 5.04296875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.30%) |Training time=0.46s (21.17%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1363|ppo_ep: 1|act_loss: -0.052978515625|cri_loss: -0.02508544921875|unsuper_loss: 0.0 +average reward score: 5.30859375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.46%) |Training time=0.46s (20.91%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1364|ppo_ep: 1|act_loss: -0.09228515625|cri_loss: -0.043792724609375|unsuper_loss: 0.0 +average reward score: 5.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.14%) |Training time=0.47s (21.34%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1365|ppo_ep: 1|act_loss: -0.03424072265625|cri_loss: -0.0164642333984375|unsuper_loss: 0.0 +average reward score: 4.921875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.29%) |Training time=0.46s (21.19%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1366|ppo_ep: 1|act_loss: -0.0079803466796875|cri_loss: -0.0024280548095703125|unsuper_loss: 0.0 +average reward score: 4.9375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.89%) |Training time=0.45s (20.59%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1367|ppo_ep: 1|act_loss: 0.0533447265625|cri_loss: 0.0282745361328125|unsuper_loss: 0.0 +average reward score: 5.24609375 +------------------------------------------------------------------------------------- +|E2E latency=2.47s |Gather latency=0.00s (0.00%) |Generate time=1.62s (65.55%) |Training time=0.46s (18.74%) |Others=0.39 (15.72%)|CurSamplesPerSec=12.95 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1368|ppo_ep: 1|act_loss: -0.009552001953125|cri_loss: -0.0042877197265625|unsuper_loss: 0.0 +average reward score: 5.08984375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.40%) |Training time=0.46s (21.07%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.42 +[2023-04-14 09:38:04,730] [INFO] [logging.py:96:log_dist] [Rank 0] step=1370, skipped=18, lr=[9.100243564358655e-06, 9.100243564358655e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:38:04,748] [INFO] [timer.py:199:stop] epoch=0/micro_step=1370/global_step=1370, RunningAvgSamplesPerSec=109.94952050263434, CurrSamplesPerSec=109.40402084107701, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:38:04,841] [INFO] [logging.py:96:log_dist] [Rank 0] step=1370, skipped=20, lr=[4.716043817285819e-06, 4.716043817285819e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1369|ppo_ep: 1|act_loss: 0.04583740234375|cri_loss: 0.0237579345703125|unsuper_loss: 0.0 +average reward score: 5.40234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.54%) |Training time=0.46s (20.94%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1370|ppo_ep: 1|act_loss: 0.039947509765625|cri_loss: 0.0226593017578125|unsuper_loss: 0.0 +average reward score: 4.58203125 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.57%) |Training time=0.46s (20.94%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1371|ppo_ep: 1|act_loss: 0.01641845703125|cri_loss: 0.0088348388671875|unsuper_loss: 0.0 +average reward score: 5.04296875 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.47%) |Training time=0.45s (19.29%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1372|ppo_ep: 1|act_loss: 0.012786865234375|cri_loss: 0.007312774658203125|unsuper_loss: 0.0 +average reward score: 4.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.91%) |Training time=0.47s (21.22%) |Others=0.11 (4.86%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1373|ppo_ep: 1|act_loss: 0.037200927734375|cri_loss: 0.023590087890625|unsuper_loss: 0.0 +average reward score: 4.7421875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.93%) |Training time=0.45s (20.55%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1374|ppo_ep: 1|act_loss: -0.037384033203125|cri_loss: -0.01812744140625|unsuper_loss: 0.0 +average reward score: 5.09375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.33%) |Training time=0.46s (21.15%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1375|ppo_ep: 1|act_loss: -0.021392822265625|cri_loss: -0.00673675537109375|unsuper_loss: 0.0 +average reward score: 4.43359375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.21%) |Training time=0.46s (21.27%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1376|ppo_ep: 1|act_loss: 0.03839111328125|cri_loss: 0.020843505859375|unsuper_loss: 0.0 +average reward score: 4.7578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.26%) |Training time=0.46s (21.21%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1377|ppo_ep: 1|act_loss: 0.05328369140625|cri_loss: 0.0286865234375|unsuper_loss: 0.0 +average reward score: 4.87890625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.99%) |Training time=0.47s (21.46%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1378|ppo_ep: 1|act_loss: 0.003376007080078125|cri_loss: 0.0021820068359375|unsuper_loss: 0.0 +average reward score: 4.6171875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.39%) |Training time=0.46s (21.09%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.42 +[2023-04-14 09:38:26,722] [INFO] [logging.py:96:log_dist] [Rank 0] step=1380, skipped=18, lr=[9.091600548104982e-06, 9.091600548104982e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:38:26,740] [INFO] [timer.py:199:stop] epoch=0/micro_step=1380/global_step=1380, RunningAvgSamplesPerSec=109.93347342240803, CurrSamplesPerSec=109.94820174058928, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:38:26,833] [INFO] [logging.py:96:log_dist] [Rank 0] step=1380, skipped=20, lr=[4.711572132007139e-06, 4.711572132007139e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1379|ppo_ep: 1|act_loss: 0.05718994140625|cri_loss: 0.0311737060546875|unsuper_loss: 0.0 +average reward score: 4.49609375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.42%) |Training time=0.46s (20.95%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1380|ppo_ep: 1|act_loss: 0.00634765625|cri_loss: 0.0057830810546875|unsuper_loss: 0.0 +average reward score: 4.625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.11%) |Training time=0.47s (21.36%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1381|ppo_ep: 1|act_loss: -0.0004782676696777344|cri_loss: 3.814697265625e-06|unsuper_loss: 0.0 +average reward score: 4.6953125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.61%) |Training time=0.45s (20.88%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1382|ppo_ep: 1|act_loss: -0.0220947265625|cri_loss: -0.0086822509765625|unsuper_loss: 0.0 +average reward score: 4.65625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.37%) |Training time=0.46s (21.11%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1383|ppo_ep: 1|act_loss: 0.0055999755859375|cri_loss: 0.00392913818359375|unsuper_loss: 0.0 +average reward score: 4.79296875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.45%) |Training time=0.46s (21.01%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1384|ppo_ep: 1|act_loss: -0.0077667236328125|cri_loss: -0.003047943115234375|unsuper_loss: 0.0 +average reward score: 4.5703125 +------------------------------------------------------------------------------------- +|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.73s (75.31%) |Training time=0.47s (20.39%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.94 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1385|ppo_ep: 1|act_loss: -0.040283203125|cri_loss: -0.0183868408203125|unsuper_loss: 0.0 +average reward score: 5.046875 +------------------------------------------------------------------------------------- +|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.82s (76.63%) |Training time=0.46s (19.22%) |Others=0.10 (4.15%)|CurSamplesPerSec=13.49 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1386|ppo_ep: 1|act_loss: -0.00937652587890625|cri_loss: -0.00380706787109375|unsuper_loss: 0.0 +average reward score: 4.546875 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.73s (75.67%) |Training time=0.46s (20.02%) |Others=0.10 (4.31%)|CurSamplesPerSec=14.02 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1387|ppo_ep: 1|act_loss: -0.01216888427734375|cri_loss: -0.0052642822265625|unsuper_loss: 0.0 +average reward score: 4.80859375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.95%) |Training time=0.47s (21.50%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1388|ppo_ep: 1|act_loss: 0.021453857421875|cri_loss: 0.0120391845703125|unsuper_loss: 0.0 +average reward score: 4.8046875 +------------------------------------------------------------------------------------- +|E2E latency=2.38s |Gather latency=0.00s (0.00%) |Generate time=1.65s (69.38%) |Training time=0.44s (18.50%) |Others=0.29 (12.11%)|CurSamplesPerSec=13.42 |AvgSamplesPerSec=14.42 +[2023-04-14 09:38:49,163] [INFO] [logging.py:96:log_dist] [Rank 0] step=1390, skipped=18, lr=[9.082894290538575e-06, 9.082894290538575e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:38:49,180] [INFO] [timer.py:199:stop] epoch=0/micro_step=1390/global_step=1390, RunningAvgSamplesPerSec=109.91676030089931, CurrSamplesPerSec=106.04061561798818, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:38:49,273] [INFO] [logging.py:96:log_dist] [Rank 0] step=1390, skipped=20, lr=[4.707067665894335e-06, 4.707067665894335e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1389|ppo_ep: 1|act_loss: 0.0421142578125|cri_loss: 0.0222015380859375|unsuper_loss: 0.0 +average reward score: 4.61328125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.25%) |Training time=0.46s (21.25%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1390|ppo_ep: 1|act_loss: 0.01336669921875|cri_loss: 0.006893157958984375|unsuper_loss: 0.0 +average reward score: 4.62109375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.60%) |Training time=0.46s (20.91%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1391|ppo_ep: 1|act_loss: 0.06072998046875|cri_loss: 0.03216552734375|unsuper_loss: 0.0 +average reward score: 4.89453125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.21%) |Training time=0.47s (21.29%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1392|ppo_ep: 1|act_loss: -0.034271240234375|cri_loss: -0.0163726806640625|unsuper_loss: 0.0 +average reward score: 4.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.46%) |Training time=0.46s (21.02%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1393|ppo_ep: 1|act_loss: 0.08160400390625|cri_loss: 0.043060302734375|unsuper_loss: 0.0 +average reward score: 4.61328125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.03%) |Training time=0.45s (20.49%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1394|ppo_ep: 1|act_loss: -0.052520751953125|cri_loss: -0.0250244140625|unsuper_loss: 0.0 +average reward score: 4.671875 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.62s (69.56%) |Training time=0.46s (19.95%) |Others=0.24 (10.49%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1395|ppo_ep: 1|act_loss: -0.0552978515625|cri_loss: -0.024566650390625|unsuper_loss: 0.0 +average reward score: 4.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.01%) |Training time=0.47s (21.47%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1396|ppo_ep: 1|act_loss: -0.06494140625|cri_loss: -0.030853271484375|unsuper_loss: 0.0 +average reward score: 4.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.22%) |Training time=0.47s (21.28%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1397|ppo_ep: 1|act_loss: -0.051849365234375|cri_loss: -0.0247039794921875|unsuper_loss: 0.0 +average reward score: 5.23828125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.59%) |Training time=0.45s (20.87%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1398|ppo_ep: 1|act_loss: -0.0325927734375|cri_loss: -0.0157318115234375|unsuper_loss: 0.0 +average reward score: 4.72265625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.21%) |Training time=0.42s (19.24%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.42 +[2023-04-14 09:39:11,139] [INFO] [logging.py:96:log_dist] [Rank 0] step=1400, skipped=18, lr=[9.074124920707169e-06, 9.074124920707169e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:39:11,157] [INFO] [timer.py:199:stop] epoch=0/micro_step=1400/global_step=1400, RunningAvgSamplesPerSec=109.92654260577895, CurrSamplesPerSec=134.14327848044334, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:39:11,252] [INFO] [logging.py:96:log_dist] [Rank 0] step=1400, skipped=20, lr=[4.702530485714462e-06, 4.702530485714462e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1399|ppo_ep: 1|act_loss: -0.02215576171875|cri_loss: -0.009674072265625|unsuper_loss: 0.0 +average reward score: 5.1875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.68s (76.91%) |Training time=0.40s (18.43%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1400|ppo_ep: 1|act_loss: 0.0113372802734375|cri_loss: 0.006031036376953125|unsuper_loss: 0.0 +average reward score: 5.1953125 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.66s (74.61%) |Training time=0.46s (20.91%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.39 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1401|ppo_ep: 1|act_loss: 0.0224761962890625|cri_loss: 0.01213836669921875|unsuper_loss: 0.0 +average reward score: 5.0234375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.85%) |Training time=0.43s (19.63%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1402|ppo_ep: 1|act_loss: -0.0025653839111328125|cri_loss: -0.000286102294921875|unsuper_loss: 0.0 +average reward score: 5.10546875 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.16%) |Training time=0.45s (20.36%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1403|ppo_ep: 1|act_loss: 0.00897216796875|cri_loss: 0.005275726318359375|unsuper_loss: 0.0 +average reward score: 4.92578125 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.43%) |Training time=0.44s (20.11%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1404|ppo_ep: 1|act_loss: -0.051849365234375|cri_loss: -0.02508544921875|unsuper_loss: 0.0 +average reward score: 4.6640625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.88%) |Training time=0.43s (19.60%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1405|ppo_ep: 1|act_loss: 0.0389404296875|cri_loss: 0.02093505859375|unsuper_loss: 0.0 +average reward score: 4.6796875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.12%) |Training time=0.44s (20.32%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1406|ppo_ep: 1|act_loss: -0.00616455078125|cri_loss: -0.00276947021484375|unsuper_loss: 0.0 +average reward score: 4.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.10%) |Training time=0.42s (19.32%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1407|ppo_ep: 1|act_loss: -0.0345458984375|cri_loss: -0.016998291015625|unsuper_loss: 0.0 +average reward score: 4.8125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.68s (77.05%) |Training time=0.40s (18.42%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1408|ppo_ep: 1|act_loss: -0.016632080078125|cri_loss: -0.00641632080078125|unsuper_loss: 0.0 +average reward score: 4.75390625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.81%) |Training time=0.43s (19.60%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.42 +[2023-04-14 09:39:33,017] [INFO] [logging.py:96:log_dist] [Rank 0] step=1410, skipped=18, lr=[9.065292568593984e-06, 9.065292568593984e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:39:33,035] [INFO] [timer.py:199:stop] epoch=0/micro_step=1410/global_step=1410, RunningAvgSamplesPerSec=109.99575030295641, CurrSamplesPerSec=120.44529078042189, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:39:33,127] [INFO] [logging.py:96:log_dist] [Rank 0] step=1410, skipped=20, lr=[4.697960658719475e-06, 4.697960658719475e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1409|ppo_ep: 1|act_loss: 0.052001953125|cri_loss: 0.027801513671875|unsuper_loss: 0.0 +average reward score: 5.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.68%) |Training time=0.43s (19.78%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1410|ppo_ep: 1|act_loss: 0.01788330078125|cri_loss: 0.0096588134765625|unsuper_loss: 0.0 +average reward score: 5.16015625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.56%) |Training time=0.43s (19.92%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1411|ppo_ep: 1|act_loss: 0.0845947265625|cri_loss: 0.04779052734375|unsuper_loss: 0.0 +average reward score: 4.6328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.32%) |Training time=0.44s (20.15%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1412|ppo_ep: 1|act_loss: -0.00656890869140625|cri_loss: -0.0027256011962890625|unsuper_loss: 0.0 +average reward score: 4.71484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.55%) |Training time=0.41s (18.89%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1413|ppo_ep: 1|act_loss: 0.018768310546875|cri_loss: 0.00978851318359375|unsuper_loss: 0.0 +average reward score: 4.515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.15%) |Training time=0.41s (18.95%) |Others=0.11 (4.90%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1414|ppo_ep: 1|act_loss: -0.06195068359375|cri_loss: -0.029815673828125|unsuper_loss: 0.0 +average reward score: 4.76171875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.08%) |Training time=0.49s (22.42%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1415|ppo_ep: 1|act_loss: -0.056732177734375|cri_loss: -0.0274505615234375|unsuper_loss: 0.0 +average reward score: 4.86328125 +------------------------------------------------------------------------------------- +|E2E latency=2.45s |Gather latency=0.00s (0.00%) |Generate time=1.86s (76.14%) |Training time=0.49s (19.85%) |Others=0.10 (4.01%)|CurSamplesPerSec=13.07 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1416|ppo_ep: 1|act_loss: -0.016510009765625|cri_loss: -0.007720947265625|unsuper_loss: 0.0 +average reward score: 5.296875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.30%) |Training time=0.48s (22.19%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1417|ppo_ep: 1|act_loss: 0.018585205078125|cri_loss: 0.01007080078125|unsuper_loss: 0.0 +average reward score: 5.10546875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.31%) |Training time=0.48s (22.19%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1418|ppo_ep: 1|act_loss: 0.04681396484375|cri_loss: 0.0242156982421875|unsuper_loss: 0.0 +average reward score: 5.08984375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.18%) |Training time=0.49s (22.35%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.42 +[2023-04-14 09:39:55,090] [INFO] [logging.py:96:log_dist] [Rank 0] step=1420, skipped=18, lr=[9.056397365115782e-06, 9.056397365115782e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:39:55,108] [INFO] [timer.py:199:stop] epoch=0/micro_step=1420/global_step=1420, RunningAvgSamplesPerSec=109.97072218501401, CurrSamplesPerSec=93.94947837627466, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:39:55,200] [INFO] [logging.py:96:log_dist] [Rank 0] step=1420, skipped=20, lr=[4.693358252645234e-06, 4.693358252645234e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1419|ppo_ep: 1|act_loss: 0.0171661376953125|cri_loss: 0.00890350341796875|unsuper_loss: 0.0 +average reward score: 5.17578125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.59%) |Training time=0.50s (22.96%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1420|ppo_ep: 1|act_loss: -0.01690673828125|cri_loss: -0.00787353515625|unsuper_loss: 0.0 +average reward score: 4.515625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.05%) |Training time=0.47s (21.43%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1421|ppo_ep: 1|act_loss: -0.0090484619140625|cri_loss: -0.003841400146484375|unsuper_loss: 0.0 +average reward score: 4.59375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.37%) |Training time=0.48s (22.14%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1422|ppo_ep: 1|act_loss: -0.03363037109375|cri_loss: -0.01525115966796875|unsuper_loss: 0.0 +average reward score: 4.875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.55%) |Training time=0.48s (21.94%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1423|ppo_ep: 1|act_loss: -0.00421142578125|cri_loss: -0.001033782958984375|unsuper_loss: 0.0 +average reward score: 5.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.37%) |Training time=0.48s (22.13%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1424|ppo_ep: 1|act_loss: 0.040191650390625|cri_loss: 0.020843505859375|unsuper_loss: 0.0 +average reward score: 4.77734375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.41%) |Training time=0.48s (22.07%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1425|ppo_ep: 1|act_loss: 0.0694580078125|cri_loss: 0.03741455078125|unsuper_loss: 0.0 +average reward score: 4.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.88%) |Training time=0.49s (22.61%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1426|ppo_ep: 1|act_loss: -0.0015506744384765625|cri_loss: 0.000782012939453125|unsuper_loss: 0.0 +average reward score: 4.77734375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.80%) |Training time=0.50s (22.71%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1427|ppo_ep: 1|act_loss: -0.0264739990234375|cri_loss: -0.0119781494140625|unsuper_loss: 0.0 +average reward score: 4.70703125 +------------------------------------------------------------------------------------- +|E2E latency=2.44s |Gather latency=0.00s (0.00%) |Generate time=1.58s (64.64%) |Training time=0.50s (20.50%) |Others=0.36 (14.86%)|CurSamplesPerSec=13.11 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1428|ppo_ep: 1|act_loss: -0.052154541015625|cri_loss: -0.024078369140625|unsuper_loss: 0.0 +average reward score: 4.7578125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.42%) |Training time=0.50s (23.06%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42 +[2023-04-14 09:40:17,160] [INFO] [logging.py:96:log_dist] [Rank 0] step=1430, skipped=18, lr=[9.04743944212094e-06, 9.04743944212094e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:40:17,178] [INFO] [timer.py:199:stop] epoch=0/micro_step=1430/global_step=1430, RunningAvgSamplesPerSec=109.87632875471583, CurrSamplesPerSec=92.99834883909462, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:40:17,305] [INFO] [logging.py:96:log_dist] [Rank 0] step=1430, skipped=20, lr=[4.688723335710501e-06, 4.688723335710501e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1429|ppo_ep: 1|act_loss: 0.0249176025390625|cri_loss: 0.01410675048828125|unsuper_loss: 0.0 +average reward score: 4.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.20%) |Training time=0.51s (22.89%) |Others=0.13 (5.91%)|CurSamplesPerSec=14.45 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1430|ppo_ep: 1|act_loss: 0.01303863525390625|cri_loss: 0.00823974609375|unsuper_loss: 0.0 +average reward score: 5.10546875 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.73s (74.82%) |Training time=0.48s (20.91%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.82 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1431|ppo_ep: 1|act_loss: -0.0116119384765625|cri_loss: -0.0054779052734375|unsuper_loss: 0.0 +average reward score: 4.90625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.72%) |Training time=0.50s (22.78%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1432|ppo_ep: 1|act_loss: 0.0116424560546875|cri_loss: 0.00733184814453125|unsuper_loss: 0.0 +average reward score: 5.52734375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.62%) |Training time=0.48s (21.90%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1433|ppo_ep: 1|act_loss: 0.019805908203125|cri_loss: 0.01030731201171875|unsuper_loss: 0.0 +average reward score: 4.67578125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.72%) |Training time=0.50s (22.78%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1434|ppo_ep: 1|act_loss: 0.0243682861328125|cri_loss: 0.0135650634765625|unsuper_loss: 0.0 +average reward score: 5.1171875 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.72%) |Training time=0.52s (23.55%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.49 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1435|ppo_ep: 1|act_loss: -0.00748443603515625|cri_loss: -0.0032596588134765625|unsuper_loss: 0.0 +average reward score: 5.109375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.50%) |Training time=0.50s (22.77%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1436|ppo_ep: 1|act_loss: 0.019012451171875|cri_loss: 0.01018524169921875|unsuper_loss: 0.0 +average reward score: 5.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.41%) |Training time=0.50s (23.08%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1437|ppo_ep: 1|act_loss: 0.01898193359375|cri_loss: 0.010498046875|unsuper_loss: 0.0 +average reward score: 4.64453125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.64%) |Training time=0.50s (22.85%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1438|ppo_ep: 1|act_loss: 0.0021076202392578125|cri_loss: 0.0012311935424804688|unsuper_loss: 0.0 +average reward score: 5.296875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.67%) |Training time=0.50s (22.84%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.42 +[2023-04-14 09:40:39,193] [INFO] [logging.py:96:log_dist] [Rank 0] step=1440, skipped=18, lr=[9.038418932387486e-06, 9.038418932387486e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:40:39,211] [INFO] [timer.py:199:stop] epoch=0/micro_step=1440/global_step=1440, RunningAvgSamplesPerSec=109.76133992307255, CurrSamplesPerSec=94.28474446729834, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:40:39,304] [INFO] [logging.py:96:log_dist] [Rank 0] step=1440, skipped=20, lr=[4.684055976615924e-06, 4.684055976615924e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1439|ppo_ep: 1|act_loss: -0.04058837890625|cri_loss: -0.0198516845703125|unsuper_loss: 0.0 +average reward score: 5.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.59%) |Training time=0.50s (22.94%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1440|ppo_ep: 1|act_loss: -0.08544921875|cri_loss: -0.04156494140625|unsuper_loss: 0.0 +average reward score: 4.83984375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.56%) |Training time=0.50s (22.86%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1441|ppo_ep: 1|act_loss: 0.008087158203125|cri_loss: 0.005504608154296875|unsuper_loss: 0.0 +average reward score: 5.125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.60%) |Training time=0.50s (22.90%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1442|ppo_ep: 1|act_loss: -0.0049896240234375|cri_loss: -0.001865386962890625|unsuper_loss: 0.0 +average reward score: 5.9921875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.33%) |Training time=0.51s (23.17%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1443|ppo_ep: 1|act_loss: 0.02874755859375|cri_loss: 0.01544189453125|unsuper_loss: 0.0 +average reward score: 5.15625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.61%) |Training time=0.50s (22.89%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1444|ppo_ep: 1|act_loss: 0.0271453857421875|cri_loss: 0.0143585205078125|unsuper_loss: 0.0 +average reward score: 4.92578125 +------------------------------------------------------------------------------------- +|E2E latency=2.45s |Gather latency=0.00s (0.00%) |Generate time=1.78s (72.41%) |Training time=0.58s (23.59%) |Others=0.10 (4.01%)|CurSamplesPerSec=13.04 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1445|ppo_ep: 1|act_loss: -0.0135955810546875|cri_loss: -0.00629425048828125|unsuper_loss: 0.0 +average reward score: 5.1796875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.75%) |Training time=0.50s (22.73%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1446|ppo_ep: 1|act_loss: -0.0104522705078125|cri_loss: -0.00444793701171875|unsuper_loss: 0.0 +average reward score: 5.40625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.35%) |Training time=0.51s (23.17%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1447|ppo_ep: 1|act_loss: -0.009429931640625|cri_loss: -0.00450897216796875|unsuper_loss: 0.0 +average reward score: 5.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.84%) |Training time=0.52s (23.64%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1448|ppo_ep: 1|act_loss: -0.06170654296875|cri_loss: -0.029632568359375|unsuper_loss: 0.0 +average reward score: 5.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.32%) |Training time=0.48s (22.16%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.42 +[2023-04-14 09:41:01,351] [INFO] [logging.py:96:log_dist] [Rank 0] step=1450, skipped=18, lr=[9.029335969621133e-06, 9.029335969621133e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:41:01,370] [INFO] [timer.py:199:stop] epoch=0/micro_step=1450/global_step=1450, RunningAvgSamplesPerSec=109.61442490775492, CurrSamplesPerSec=87.41166563657832, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:41:01,462] [INFO] [logging.py:96:log_dist] [Rank 0] step=1450, skipped=20, lr=[4.679356244543027e-06, 4.679356244543027e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1449|ppo_ep: 1|act_loss: -0.037841796875|cri_loss: -0.0179443359375|unsuper_loss: 0.0 +average reward score: 5.16796875 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.78%) |Training time=0.53s (23.81%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.41 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1450|ppo_ep: 1|act_loss: 0.0478515625|cri_loss: 0.024993896484375|unsuper_loss: 0.0 +average reward score: 5.1953125 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.66%) |Training time=0.51s (22.84%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.42 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1451|ppo_ep: 1|act_loss: 0.04669189453125|cri_loss: 0.0257415771484375|unsuper_loss: 0.0 +average reward score: 5.859375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.93%) |Training time=0.50s (22.59%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1452|ppo_ep: 1|act_loss: 0.055023193359375|cri_loss: 0.0299072265625|unsuper_loss: 0.0 +average reward score: 4.83984375 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.94%) |Training time=0.48s (21.58%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1453|ppo_ep: 1|act_loss: 0.026885986328125|cri_loss: 0.0149993896484375|unsuper_loss: 0.0 +average reward score: 6.34375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.59%) |Training time=0.50s (22.93%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1454|ppo_ep: 1|act_loss: 0.0374755859375|cri_loss: 0.0211029052734375|unsuper_loss: 0.0 +average reward score: 5.625 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.67%) |Training time=0.53s (23.88%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.45 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1455|ppo_ep: 1|act_loss: 0.021514892578125|cri_loss: 0.0114288330078125|unsuper_loss: 0.0 +average reward score: 4.8984375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.60%) |Training time=0.50s (22.93%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1456|ppo_ep: 1|act_loss: -0.03704833984375|cri_loss: -0.017303466796875|unsuper_loss: 0.0 +average reward score: 6.1953125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.59%) |Training time=0.50s (22.87%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1457|ppo_ep: 1|act_loss: -0.0538330078125|cri_loss: -0.0260467529296875|unsuper_loss: 0.0 +average reward score: 5.6328125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.48%) |Training time=0.48s (22.04%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1458|ppo_ep: 1|act_loss: -0.018829345703125|cri_loss: -0.00910186767578125|unsuper_loss: 0.0 +average reward score: 5.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.19%) |Training time=0.49s (22.31%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.42 +[2023-04-14 09:41:23,569] [INFO] [logging.py:96:log_dist] [Rank 0] step=1460, skipped=18, lr=[9.020190688453302e-06, 9.020190688453302e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:41:23,671] [INFO] [timer.py:199:stop] epoch=0/micro_step=1460/global_step=1460, RunningAvgSamplesPerSec=109.47243506677798, CurrSamplesPerSec=72.63209936403898, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:41:23,763] [INFO] [logging.py:96:log_dist] [Rank 0] step=1460, skipped=20, lr=[4.674624209153173e-06, 4.674624209153173e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1459|ppo_ep: 1|act_loss: 0.00940704345703125|cri_loss: 0.00525665283203125|unsuper_loss: 0.0 +average reward score: 5.625 +------------------------------------------------------------------------------------- +|E2E latency=2.48s |Gather latency=0.00s (0.00%) |Generate time=1.77s (71.68%) |Training time=0.60s (24.38%) |Others=0.10 (3.95%)|CurSamplesPerSec=12.93 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1460|ppo_ep: 1|act_loss: -0.01226043701171875|cri_loss: -0.00563812255859375|unsuper_loss: 0.0 +average reward score: 5.1171875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.26%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1461|ppo_ep: 1|act_loss: 0.034271240234375|cri_loss: 0.01806640625|unsuper_loss: 0.0 +average reward score: 5.6328125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.88%) |Training time=0.49s (22.61%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1462|ppo_ep: 1|act_loss: 0.079345703125|cri_loss: 0.04486083984375|unsuper_loss: 0.0 +average reward score: 5.62109375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.17%) |Training time=0.49s (22.31%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1463|ppo_ep: 1|act_loss: 0.00299835205078125|cri_loss: 0.0023288726806640625|unsuper_loss: 0.0 +average reward score: 5.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.43%) |Training time=0.48s (22.04%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1464|ppo_ep: 1|act_loss: -0.00670623779296875|cri_loss: -0.00295257568359375|unsuper_loss: 0.0 +average reward score: 5.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.74%) |Training time=0.47s (21.75%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1465|ppo_ep: 1|act_loss: -0.027069091796875|cri_loss: -0.0130462646484375|unsuper_loss: 0.0 +average reward score: 4.88671875 +------------------------------------------------------------------------------------- +|E2E latency=2.61s |Gather latency=0.00s (0.00%) |Generate time=1.61s (61.52%) |Training time=0.47s (17.96%) |Others=0.54 (20.52%)|CurSamplesPerSec=12.24 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1466|ppo_ep: 1|act_loss: 0.002590179443359375|cri_loss: 0.001628875732421875|unsuper_loss: 0.0 +average reward score: 5.1328125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.15%) |Training time=0.46s (21.33%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1467|ppo_ep: 1|act_loss: 0.00725555419921875|cri_loss: 0.00450897216796875|unsuper_loss: 0.0 +average reward score: 5.0234375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.04%) |Training time=0.47s (21.45%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1468|ppo_ep: 1|act_loss: 0.01209259033203125|cri_loss: 0.006256103515625|unsuper_loss: 0.0 +average reward score: 5.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.06%) |Training time=0.46s (21.34%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42 +[2023-04-14 09:41:45,881] [INFO] [logging.py:96:log_dist] [Rank 0] step=1470, skipped=18, lr=[9.010983224439122e-06, 9.010983224439122e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:41:45,900] [INFO] [timer.py:199:stop] epoch=0/micro_step=1470/global_step=1470, RunningAvgSamplesPerSec=109.42062658822219, CurrSamplesPerSec=106.38218975695435, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:41:45,992] [INFO] [logging.py:96:log_dist] [Rank 0] step=1470, skipped=20, lr=[4.6698599405865465e-06, 4.6698599405865465e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1469|ppo_ep: 1|act_loss: -0.00179290771484375|cri_loss: 0.0003948211669921875|unsuper_loss: 0.0 +average reward score: 5.37109375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.10%) |Training time=0.46s (21.37%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1470|ppo_ep: 1|act_loss: 0.0103302001953125|cri_loss: 0.006702423095703125|unsuper_loss: 0.0 +average reward score: 5.1875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.59%) |Training time=0.48s (21.90%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1471|ppo_ep: 1|act_loss: 0.02276611328125|cri_loss: 0.0134124755859375|unsuper_loss: 0.0 +average reward score: 5.85546875 +------------------------------------------------------------------------------------- +|E2E latency=2.60s |Gather latency=0.00s (0.00%) |Generate time=1.60s (61.64%) |Training time=0.47s (18.19%) |Others=0.52 (20.17%)|CurSamplesPerSec=12.33 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1472|ppo_ep: 1|act_loss: 0.0002186298370361328|cri_loss: 0.0006475448608398438|unsuper_loss: 0.0 +average reward score: 5.8515625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.57%) |Training time=0.48s (21.95%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1473|ppo_ep: 1|act_loss: 0.01290130615234375|cri_loss: 0.00920867919921875|unsuper_loss: 0.0 +average reward score: 5.203125 +------------------------------------------------------------------------------------- +|E2E latency=2.43s |Gather latency=0.00s (0.00%) |Generate time=1.85s (76.14%) |Training time=0.48s (19.81%) |Others=0.10 (4.04%)|CurSamplesPerSec=13.16 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1474|ppo_ep: 1|act_loss: -0.0024871826171875|cri_loss: -0.0009250640869140625|unsuper_loss: 0.0 +average reward score: 5.390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.58%) |Training time=0.48s (21.91%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1475|ppo_ep: 1|act_loss: -0.00386810302734375|cri_loss: -0.0015048980712890625|unsuper_loss: 0.0 +average reward score: 5.04296875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.74%) |Training time=0.50s (22.76%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1476|ppo_ep: 1|act_loss: -0.00803375244140625|cri_loss: -0.0038585662841796875|unsuper_loss: 0.0 +average reward score: 5.9375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.91%) |Training time=0.49s (22.57%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1477|ppo_ep: 1|act_loss: 0.034149169921875|cri_loss: 0.0176544189453125|unsuper_loss: 0.0 +average reward score: 5.234375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.10%) |Training time=0.49s (22.38%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1478|ppo_ep: 1|act_loss: 0.000904083251953125|cri_loss: 0.0015773773193359375|unsuper_loss: 0.0 +average reward score: 5.76171875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.70%) |Training time=0.48s (21.80%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.42 +[2023-04-14 09:42:08,344] [INFO] [logging.py:96:log_dist] [Rank 0] step=1480, skipped=18, lr=[9.00171371405542e-06, 9.00171371405542e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:42:08,362] [INFO] [timer.py:199:stop] epoch=0/micro_step=1480/global_step=1480, RunningAvgSamplesPerSec=109.35307940699091, CurrSamplesPerSec=102.23690081816804, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:42:08,454] [INFO] [logging.py:96:log_dist] [Rank 0] step=1480, skipped=20, lr=[4.665063509461098e-06, 4.665063509461098e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1479|ppo_ep: 1|act_loss: 0.053741455078125|cri_loss: 0.030670166015625|unsuper_loss: 0.0 +average reward score: 5.12109375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.60%) |Training time=0.48s (21.90%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1480|ppo_ep: 1|act_loss: 0.021942138671875|cri_loss: 0.01233673095703125|unsuper_loss: 0.0 +average reward score: 5.3203125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.44%) |Training time=0.48s (22.01%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1481|ppo_ep: 1|act_loss: -0.05841064453125|cri_loss: -0.0283660888671875|unsuper_loss: 0.0 +average reward score: 5.2734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.65%) |Training time=0.47s (21.83%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1482|ppo_ep: 1|act_loss: -0.03936767578125|cri_loss: -0.0176849365234375|unsuper_loss: 0.0 +average reward score: 5.96875 +------------------------------------------------------------------------------------- +|E2E latency=2.46s |Gather latency=0.00s (0.00%) |Generate time=1.60s (65.07%) |Training time=0.47s (19.00%) |Others=0.39 (15.93%)|CurSamplesPerSec=12.99 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1483|ppo_ep: 1|act_loss: 0.0780029296875|cri_loss: 0.04107666015625|unsuper_loss: 0.0 +average reward score: 5.66015625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.58%) |Training time=0.48s (21.91%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1484|ppo_ep: 1|act_loss: 0.010711669921875|cri_loss: 0.00705718994140625|unsuper_loss: 0.0 +average reward score: 4.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.98%) |Training time=0.47s (21.50%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1485|ppo_ep: 1|act_loss: 0.022308349609375|cri_loss: 0.01165008544921875|unsuper_loss: 0.0 +average reward score: 5.21875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.90%) |Training time=0.47s (21.57%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1486|ppo_ep: 1|act_loss: -0.0200347900390625|cri_loss: -0.0081634521484375|unsuper_loss: 0.0 +average reward score: 5.078125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.05%) |Training time=0.47s (21.43%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1487|ppo_ep: 1|act_loss: 0.01959228515625|cri_loss: 0.01219940185546875|unsuper_loss: 0.0 +average reward score: 5.16015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.96%) |Training time=0.47s (21.51%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1488|ppo_ep: 1|act_loss: -0.05877685546875|cri_loss: -0.0276641845703125|unsuper_loss: 0.0 +average reward score: 5.20703125 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.88%) |Training time=0.47s (19.92%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.66 |AvgSamplesPerSec=14.42 +[2023-04-14 09:42:30,551] [INFO] [logging.py:96:log_dist] [Rank 0] step=1490, skipped=18, lr=[8.992382294698705e-06, 8.992382294698705e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:42:30,570] [INFO] [timer.py:199:stop] epoch=0/micro_step=1490/global_step=1490, RunningAvgSamplesPerSec=109.31296286894379, CurrSamplesPerSec=103.17163047296637, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:42:30,664] [INFO] [logging.py:96:log_dist] [Rank 0] step=1490, skipped=20, lr=[4.660234986871507e-06, 4.660234986871507e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1489|ppo_ep: 1|act_loss: 0.0299530029296875|cri_loss: 0.0162506103515625|unsuper_loss: 0.0 +average reward score: 5.08984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.67%) |Training time=0.47s (21.74%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1490|ppo_ep: 1|act_loss: -0.02716064453125|cri_loss: -0.01160430908203125|unsuper_loss: 0.0 +average reward score: 4.71484375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.50%) |Training time=0.48s (21.96%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1491|ppo_ep: 1|act_loss: -0.006702423095703125|cri_loss: -0.0006561279296875|unsuper_loss: 0.0 +average reward score: 4.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.84%) |Training time=0.47s (21.62%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1492|ppo_ep: 1|act_loss: 0.037200927734375|cri_loss: 0.020538330078125|unsuper_loss: 0.0 +average reward score: 5.14453125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.49%) |Training time=0.48s (21.98%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1493|ppo_ep: 1|act_loss: -0.050750732421875|cri_loss: -0.02349853515625|unsuper_loss: 0.0 +average reward score: 4.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.39%) |Training time=0.48s (22.10%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1494|ppo_ep: 1|act_loss: 0.03173828125|cri_loss: 0.0195159912109375|unsuper_loss: 0.0 +average reward score: 5.23828125 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.60s (67.96%) |Training time=0.48s (20.14%) |Others=0.28 (11.90%)|CurSamplesPerSec=13.56 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1495|ppo_ep: 1|act_loss: -0.03515625|cri_loss: -0.0159454345703125|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.59%) |Training time=0.48s (21.88%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1496|ppo_ep: 1|act_loss: -0.05670166015625|cri_loss: -0.025726318359375|unsuper_loss: 0.0 +average reward score: 4.83203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.50%) |Training time=0.48s (21.98%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.42 +[2023-04-14 09:42:48,235] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 1497|ppo_ep: 1|act_loss: 0.013671875|cri_loss: 0.013336181640625|unsuper_loss: 0.0 +average reward score: 4.99609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.47s (21.93%) |Others=0.09 (4.05%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42 +[2023-04-14 09:42:50,401] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 1498|ppo_ep: 1|act_loss: 0.046875|cri_loss: 0.0252685546875|unsuper_loss: 0.0 +average reward score: 5.39453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.68%) |Training time=0.48s (22.20%) |Others=0.09 (4.12%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42 +[2023-04-14 09:42:52,461] [INFO] [logging.py:96:log_dist] [Rank 0] step=1500, skipped=18, lr=[8.982989104683118e-06, 8.982989104683118e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:42:52,480] [INFO] [timer.py:199:stop] epoch=0/micro_step=1500/global_step=1500, RunningAvgSamplesPerSec=109.25825586216985, CurrSamplesPerSec=100.98657928694233, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:42:52,573] [INFO] [logging.py:96:log_dist] [Rank 0] step=1500, skipped=22, lr=[4.656349111024974e-06, 4.656349111024974e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1499|ppo_ep: 1|act_loss: 0.037841796875|cri_loss: 0.023956298828125|unsuper_loss: 0.0 +average reward score: 4.86328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.42%) |Training time=0.48s (22.05%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1500|ppo_ep: 1|act_loss: -0.0390625|cri_loss: -0.0156402587890625|unsuper_loss: 0.0 +average reward score: 4.95703125 +------------------------------------------------------------------------------------- +|E2E latency=2.95s |Gather latency=0.00s (0.00%) |Generate time=1.60s (54.11%) |Training time=0.48s (16.34%) |Others=0.87 (29.55%)|CurSamplesPerSec=10.84 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1501|ppo_ep: 1|act_loss: 0.0438232421875|cri_loss: 0.024444580078125|unsuper_loss: 0.0 +average reward score: 5.41796875 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.98%) |Training time=0.48s (21.58%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1502|ppo_ep: 1|act_loss: 0.054840087890625|cri_loss: 0.037109375|unsuper_loss: 0.0 +average reward score: 4.57421875 +------------------------------------------------------------------------------------- +|E2E latency=2.47s |Gather latency=0.00s (0.00%) |Generate time=1.86s (75.41%) |Training time=0.51s (20.59%) |Others=0.10 (4.00%)|CurSamplesPerSec=12.98 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1503|ppo_ep: 1|act_loss: 0.0931396484375|cri_loss: 0.05364990234375|unsuper_loss: 0.0 +average reward score: 4.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.03%) |Training time=0.46s (21.42%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1504|ppo_ep: 1|act_loss: -0.000888824462890625|cri_loss: 0.0021839141845703125|unsuper_loss: 0.0 +average reward score: 5.234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.11%) |Training time=0.46s (21.34%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1505|ppo_ep: 1|act_loss: -0.1180419921875|cri_loss: -0.049957275390625|unsuper_loss: 0.0 +average reward score: 5.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.76s |Gather latency=0.00s (0.00%) |Generate time=1.60s (57.98%) |Training time=0.46s (16.70%) |Others=0.70 (25.32%)|CurSamplesPerSec=11.60 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1506|ppo_ep: 1|act_loss: -0.0391845703125|cri_loss: -0.0111236572265625|unsuper_loss: 0.0 +average reward score: 4.72265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.30%) |Training time=0.46s (21.14%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1507|ppo_ep: 1|act_loss: -0.08062744140625|cri_loss: -0.0303497314453125|unsuper_loss: 0.0 +average reward score: 4.83984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.40%) |Training time=0.45s (21.03%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1508|ppo_ep: 1|act_loss: 0.01904296875|cri_loss: 0.0215911865234375|unsuper_loss: 0.0 +average reward score: 4.76171875 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.15%) |Training time=0.52s (23.41%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.44 |AvgSamplesPerSec=14.42 +[2023-04-14 09:43:15,905] [INFO] [logging.py:96:log_dist] [Rank 0] step=1510, skipped=18, lr=[8.973534283238398e-06, 8.973534283238398e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:43:15,923] [INFO] [timer.py:199:stop] epoch=0/micro_step=1510/global_step=1510, RunningAvgSamplesPerSec=109.20020953413868, CurrSamplesPerSec=92.72023189431013, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:43:16,016] [INFO] [logging.py:96:log_dist] [Rank 0] step=1510, skipped=22, lr=[4.651463004476193e-06, 4.651463004476193e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1509|ppo_ep: 1|act_loss: 0.111328125|cri_loss: 0.06182861328125|unsuper_loss: 0.0 +average reward score: 4.7421875 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.58%) |Training time=0.51s (23.00%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1510|ppo_ep: 1|act_loss: 0.0236968994140625|cri_loss: 0.0250091552734375|unsuper_loss: 0.0 +average reward score: 5.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.18%) |Training time=0.49s (22.34%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.42 +[2023-04-14 09:43:20,288] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, but hysteresis is 2. Reducing hysteresis to 1 +[2023-04-14 09:43:20,373] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384 +epoch: 0|step: 1511|ppo_ep: 1|act_loss: 0.0660400390625|cri_loss: 0.04168701171875|unsuper_loss: 0.0 +average reward score: 5.32421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.42%) |Training time=0.46s (21.44%) |Others=0.09 (4.14%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.42 +[2023-04-14 09:43:22,449] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096 +epoch: 0|step: 1512|ppo_ep: 1|act_loss: 0.1353759765625|cri_loss: 0.08154296875|unsuper_loss: 0.0 +average reward score: 5.49609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.78%) |Training time=0.47s (21.70%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1513|ppo_ep: 1|act_loss: 0.08203125|cri_loss: 0.04931640625|unsuper_loss: 0.0 +average reward score: 5.71875 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.66%) |Training time=0.48s (21.88%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1514|ppo_ep: 1|act_loss: -0.0709228515625|cri_loss: -0.0137939453125|unsuper_loss: 0.0 +average reward score: 5.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.65%) |Training time=0.48s (21.84%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1515|ppo_ep: 1|act_loss: -0.051605224609375|cri_loss: -0.006378173828125|unsuper_loss: 0.0 +average reward score: 5.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.70%) |Training time=0.47s (21.78%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1516|ppo_ep: 1|act_loss: 0.102783203125|cri_loss: 0.0628662109375|unsuper_loss: 0.0 +average reward score: 5.38671875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.42%) |Training time=0.48s (22.09%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1517|ppo_ep: 1|act_loss: 0.07171630859375|cri_loss: 0.06170654296875|unsuper_loss: 0.0 +average reward score: 4.97265625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.24%) |Training time=0.49s (22.26%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.42 +[2023-04-14 09:43:35,676] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 4096, reducing to 2048 +epoch: 0|step: 1518|ppo_ep: 1|act_loss: 0.1497802734375|cri_loss: 0.08740234375|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.75s (76.08%) |Training time=0.45s (19.66%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.88 |AvgSamplesPerSec=14.42 +[2023-04-14 09:43:37,835] [INFO] [logging.py:96:log_dist] [Rank 0] step=1520, skipped=21, lr=[8.966879312536848e-06, 8.966879312536848e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:43:37,854] [INFO] [timer.py:199:stop] epoch=0/micro_step=1520/global_step=1520, RunningAvgSamplesPerSec=109.15061098093494, CurrSamplesPerSec=101.99248759647922, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:43:37,946] [INFO] [logging.py:96:log_dist] [Rank 0] step=1520, skipped=23, lr=[4.647038240668007e-06, 4.647038240668007e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1519|ppo_ep: 1|act_loss: 0.0712890625|cri_loss: 0.10894775390625|unsuper_loss: 0.0 +average reward score: 4.89453125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.62%) |Training time=0.48s (21.88%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1520|ppo_ep: 1|act_loss: 0.173583984375|cri_loss: 0.12744140625|unsuper_loss: 0.0 +average reward score: 4.77734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.84%) |Training time=0.47s (21.64%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1521|ppo_ep: 1|act_loss: 0.228759765625|cri_loss: 0.144775390625|unsuper_loss: 0.0 +average reward score: 4.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.75s |Gather latency=0.00s (0.00%) |Generate time=1.60s (58.34%) |Training time=0.48s (17.52%) |Others=0.66 (24.14%)|CurSamplesPerSec=11.65 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1522|ppo_ep: 1|act_loss: 0.2275390625|cri_loss: 0.133056640625|unsuper_loss: 0.0 +average reward score: 4.83984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.27%) |Training time=0.46s (21.15%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41 +[2023-04-14 09:43:47,123] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 2048, reducing to 1024 +epoch: 0|step: 1523|ppo_ep: 1|act_loss: 0.055511474609375|cri_loss: 0.048187255859375|unsuper_loss: 0.0 +average reward score: 4.81640625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.70%) |Training time=0.48s (21.81%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1524|ppo_ep: 1|act_loss: 0.0124359130859375|cri_loss: 0.0382080078125|unsuper_loss: 0.0 +average reward score: 4.65234375 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (72.99%) |Training time=0.50s (22.55%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.46 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1525|ppo_ep: 1|act_loss: -0.3203125|cri_loss: -0.0809326171875|unsuper_loss: 0.0 +average reward score: 4.73828125 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.79%) |Training time=0.46s (20.74%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1526|ppo_ep: 1|act_loss: 0.00335693359375|cri_loss: 0.023529052734375|unsuper_loss: 0.0 +average reward score: 4.3359375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.81%) |Training time=0.50s (22.70%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1527|ppo_ep: 1|act_loss: -0.03948974609375|cri_loss: 0.019134521484375|unsuper_loss: 0.0 +average reward score: 4.76953125 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.64%) |Training time=0.48s (21.89%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1528|ppo_ep: 1|act_loss: 0.2244873046875|cri_loss: 0.134521484375|unsuper_loss: 0.0 +average reward score: 4.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.35%) |Training time=0.49s (22.18%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.42 +[2023-04-14 09:44:00,334] [INFO] [logging.py:96:log_dist] [Rank 0] step=1530, skipped=22, lr=[8.958278725693138e-06, 8.958278725693138e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:44:00,352] [INFO] [timer.py:199:stop] epoch=0/micro_step=1530/global_step=1530, RunningAvgSamplesPerSec=109.09060936426617, CurrSamplesPerSec=97.64463647454845, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:44:00,445] [INFO] [logging.py:96:log_dist] [Rank 0] step=1530, skipped=23, lr=[4.642091605675834e-06, 4.642091605675834e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1529|ppo_ep: 1|act_loss: 0.209228515625|cri_loss: 0.13525390625|unsuper_loss: 0.0 +average reward score: 4.234375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.23%) |Training time=0.49s (22.26%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.42 +[2023-04-14 09:44:02,664] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192 +epoch: 0|step: 1530|ppo_ep: 1|act_loss: -0.031982421875|cri_loss: 0.02606201171875|unsuper_loss: 0.0 +average reward score: 4.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.68s (75.53%) |Training time=0.45s (20.42%) |Others=0.09 (4.05%)|CurSamplesPerSec=14.43 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1531|ppo_ep: 1|act_loss: -0.10589599609375|cri_loss: -0.023681640625|unsuper_loss: 0.0 +average reward score: 4.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.71s (74.62%) |Training time=0.48s (20.78%) |Others=0.11 (4.59%)|CurSamplesPerSec=13.95 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1532|ppo_ep: 1|act_loss: -0.62890625|cri_loss: -0.153076171875|unsuper_loss: 0.0 +average reward score: 3.93359375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.27%) |Training time=0.49s (22.22%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1533|ppo_ep: 1|act_loss: 0.1341552734375|cri_loss: 0.111083984375|unsuper_loss: 0.0 +average reward score: 4.3671875 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.77s (74.85%) |Training time=0.50s (20.97%) |Others=0.10 (4.18%)|CurSamplesPerSec=13.53 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1534|ppo_ep: 1|act_loss: 0.129638671875|cri_loss: 0.0816650390625|unsuper_loss: 0.0 +average reward score: 3.388671875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.25%) |Training time=0.49s (22.25%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1535|ppo_ep: 1|act_loss: 0.484375|cri_loss: 0.39990234375|unsuper_loss: 0.0 +average reward score: 2.533203125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.18%) |Training time=0.49s (22.32%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1536|ppo_ep: 1|act_loss: 0.16015625|cri_loss: 0.1719970703125|unsuper_loss: 0.0 +average reward score: 2.78515625 +------------------------------------------------------------------------------------- +|E2E latency=2.88s |Gather latency=0.00s (0.00%) |Generate time=1.60s (55.54%) |Training time=0.49s (17.01%) |Others=0.79 (27.45%)|CurSamplesPerSec=11.10 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1537|ppo_ep: 1|act_loss: 0.40625|cri_loss: 0.2578125|unsuper_loss: 0.0 +average reward score: 4.32421875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.83%) |Training time=0.50s (22.67%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1538|ppo_ep: 1|act_loss: 0.3017578125|cri_loss: 0.2041015625|unsuper_loss: 0.0 +average reward score: 2.69140625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.03%) |Training time=0.49s (22.48%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.41 +[2023-04-14 09:44:23,196] [INFO] [logging.py:96:log_dist] [Rank 0] step=1540, skipped=22, lr=[8.948664320677332e-06, 8.948664320677332e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:44:23,214] [INFO] [timer.py:199:stop] epoch=0/micro_step=1540/global_step=1540, RunningAvgSamplesPerSec=109.02335196349398, CurrSamplesPerSec=101.4977854172543, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:44:23,323] [INFO] [logging.py:96:log_dist] [Rank 0] step=1540, skipped=24, lr=[4.637612485008328e-06, 4.637612485008328e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1539|ppo_ep: 1|act_loss: 0.5537109375|cri_loss: 0.375|unsuper_loss: 0.0 +average reward score: 3.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.09%) |Training time=0.48s (21.79%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1540|ppo_ep: 1|act_loss: -0.2861328125|cri_loss: -0.0626220703125|unsuper_loss: 0.0 +average reward score: 3.4921875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.91%) |Training time=0.50s (22.61%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1541|ppo_ep: 1|act_loss: -0.171875|cri_loss: -0.0218505859375|unsuper_loss: 0.0 +average reward score: 4.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.28%) |Training time=0.48s (22.15%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1542|ppo_ep: 1|act_loss: 0.02923583984375|cri_loss: 0.050079345703125|unsuper_loss: 0.0 +average reward score: 3.861328125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.72%) |Training time=0.50s (22.77%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1543|ppo_ep: 1|act_loss: 0.27490234375|cri_loss: 0.1748046875|unsuper_loss: 0.0 +average reward score: 3.6015625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.09%) |Training time=0.49s (22.40%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1544|ppo_ep: 1|act_loss: 0.25048828125|cri_loss: 0.1973876953125|unsuper_loss: 0.0 +average reward score: 2.509765625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.87%) |Training time=0.49s (22.64%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1545|ppo_ep: 1|act_loss: 0.181396484375|cri_loss: 0.1395263671875|unsuper_loss: 0.0 +average reward score: 2.671875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.70%) |Training time=0.50s (22.80%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1546|ppo_ep: 1|act_loss: -0.060211181640625|cri_loss: -0.006256103515625|unsuper_loss: 0.0 +average reward score: 3.3046875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.86%) |Training time=0.49s (22.64%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1547|ppo_ep: 1|act_loss: 0.07568359375|cri_loss: 0.0791015625|unsuper_loss: 0.0 +average reward score: 2.16015625 +------------------------------------------------------------------------------------- +|E2E latency=2.43s |Gather latency=0.00s (0.00%) |Generate time=1.59s (65.40%) |Training time=0.55s (22.71%) |Others=0.29 (11.90%)|CurSamplesPerSec=13.19 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1548|ppo_ep: 1|act_loss: -0.0823974609375|cri_loss: 0.028076171875|unsuper_loss: 0.0 +average reward score: 2.818359375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.79%) |Training time=0.50s (22.70%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.41 +[2023-04-14 09:44:45,262] [INFO] [logging.py:96:log_dist] [Rank 0] step=1550, skipped=22, lr=[8.938988793008496e-06, 8.938988793008496e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:44:45,280] [INFO] [timer.py:199:stop] epoch=0/micro_step=1550/global_step=1550, RunningAvgSamplesPerSec=108.92040127458814, CurrSamplesPerSec=96.0678427100437, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:44:45,372] [INFO] [logging.py:96:log_dist] [Rank 0] step=1550, skipped=24, lr=[4.632605586260949e-06, 4.632605586260949e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1549|ppo_ep: 1|act_loss: 0.34619140625|cri_loss: 0.21337890625|unsuper_loss: 0.0 +average reward score: 2.98046875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.73%) |Training time=0.50s (22.78%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1550|ppo_ep: 1|act_loss: 0.369140625|cri_loss: 0.21630859375|unsuper_loss: 0.0 +average reward score: 3.705078125 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.14%) |Training time=0.51s (23.39%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1551|ppo_ep: 1|act_loss: 0.46142578125|cri_loss: 0.289306640625|unsuper_loss: 0.0 +average reward score: 3.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.66%) |Training time=0.47s (21.79%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1552|ppo_ep: 1|act_loss: 0.345703125|cri_loss: 0.218994140625|unsuper_loss: 0.0 +average reward score: 3.10546875 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.62%) |Training time=0.53s (23.93%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1553|ppo_ep: 1|act_loss: 0.163330078125|cri_loss: 0.12451171875|unsuper_loss: 0.0 +average reward score: 4.44921875 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.78%) |Training time=0.52s (23.76%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1554|ppo_ep: 1|act_loss: 0.135498046875|cri_loss: 0.0986328125|unsuper_loss: 0.0 +average reward score: 3.1015625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.94%) |Training time=0.45s (20.56%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1555|ppo_ep: 1|act_loss: -0.2354736328125|cri_loss: -0.0045166015625|unsuper_loss: 0.0 +average reward score: 4.90625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.30%) |Training time=0.49s (22.20%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1556|ppo_ep: 1|act_loss: 0.00152587890625|cri_loss: 0.057708740234375|unsuper_loss: 0.0 +average reward score: 2.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.18%) |Training time=0.49s (22.34%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1557|ppo_ep: 1|act_loss: 0.43017578125|cri_loss: 0.260009765625|unsuper_loss: 0.0 +average reward score: 2.853515625 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.23%) |Training time=0.49s (22.30%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.51 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1558|ppo_ep: 1|act_loss: 0.15673828125|cri_loss: 0.125244140625|unsuper_loss: 0.0 +average reward score: 2.9765625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.13%) |Training time=0.47s (21.36%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.41 +[2023-04-14 09:45:07,278] [INFO] [logging.py:96:log_dist] [Rank 0] step=1560, skipped=22, lr=[8.929252286101288e-06, 8.929252286101288e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:45:07,296] [INFO] [timer.py:199:stop] epoch=0/micro_step=1560/global_step=1560, RunningAvgSamplesPerSec=108.83748797941826, CurrSamplesPerSec=101.76667574004247, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:45:07,389] [INFO] [logging.py:96:log_dist] [Rank 0] step=1560, skipped=24, lr=[4.62756707715415e-06, 4.62756707715415e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1559|ppo_ep: 1|act_loss: 0.161865234375|cri_loss: 0.1217041015625|unsuper_loss: 0.0 +average reward score: 3.515625 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.66s (74.22%) |Training time=0.48s (21.37%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.34 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1560|ppo_ep: 1|act_loss: 0.08599853515625|cri_loss: 0.06768798828125|unsuper_loss: 0.0 +average reward score: 3.03515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.92%) |Training time=0.45s (20.56%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1561|ppo_ep: 1|act_loss: 0.1705322265625|cri_loss: 0.1212158203125|unsuper_loss: 0.0 +average reward score: 4.92578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.54%) |Training time=0.45s (20.90%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1562|ppo_ep: 1|act_loss: 0.094970703125|cri_loss: 0.07098388671875|unsuper_loss: 0.0 +average reward score: 2.849609375 +------------------------------------------------------------------------------------- +|E2E latency=2.79s |Gather latency=0.00s (0.00%) |Generate time=1.77s (63.41%) |Training time=0.49s (17.69%) |Others=0.53 (18.90%)|CurSamplesPerSec=11.45 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1563|ppo_ep: 1|act_loss: 0.21337890625|cri_loss: 0.126708984375|unsuper_loss: 0.0 +average reward score: 3.6875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.25%) |Training time=0.46s (21.13%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1564|ppo_ep: 1|act_loss: 0.0687255859375|cri_loss: 0.06353759765625|unsuper_loss: 0.0 +average reward score: 4.07421875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.20%) |Training time=0.47s (21.31%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1565|ppo_ep: 1|act_loss: 0.040740966796875|cri_loss: 0.034027099609375|unsuper_loss: 0.0 +average reward score: 4.078125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.59%) |Training time=0.47s (21.59%) |Others=0.11 (4.81%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1566|ppo_ep: 1|act_loss: 0.20751953125|cri_loss: 0.12158203125|unsuper_loss: 0.0 +average reward score: 3.55859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.58%) |Training time=0.48s (21.90%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1567|ppo_ep: 1|act_loss: 0.0308380126953125|cri_loss: 0.02667236328125|unsuper_loss: 0.0 +average reward score: 2.984375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.80%) |Training time=0.48s (21.73%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1568|ppo_ep: 1|act_loss: 0.1075439453125|cri_loss: 0.0609130859375|unsuper_loss: 0.0 +average reward score: 4.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.08%) |Training time=0.47s (21.40%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.41 +[2023-04-14 09:45:29,668] [INFO] [logging.py:96:log_dist] [Rank 0] step=1570, skipped=22, lr=[8.919454944274233e-06, 8.919454944274233e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:45:29,686] [INFO] [timer.py:199:stop] epoch=0/micro_step=1570/global_step=1570, RunningAvgSamplesPerSec=108.8116418141218, CurrSamplesPerSec=105.11076914161218, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:45:29,778] [INFO] [logging.py:96:log_dist] [Rank 0] step=1570, skipped=24, lr=[4.622497032370792e-06, 4.622497032370792e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1569|ppo_ep: 1|act_loss: -0.0347900390625|cri_loss: -0.00799560546875|unsuper_loss: 0.0 +average reward score: 3.5234375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.04%) |Training time=0.47s (21.49%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1570|ppo_ep: 1|act_loss: 0.10052490234375|cri_loss: 0.06103515625|unsuper_loss: 0.0 +average reward score: 3.310546875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.01%) |Training time=0.47s (21.48%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1571|ppo_ep: 1|act_loss: -0.04644775390625|cri_loss: -0.01904296875|unsuper_loss: 0.0 +average reward score: 2.890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.39%) |Training time=0.46s (21.06%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1572|ppo_ep: 1|act_loss: -0.6005859375|cri_loss: -0.1934814453125|unsuper_loss: 0.0 +average reward score: 2.80078125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.43%) |Training time=0.48s (22.06%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1573|ppo_ep: 1|act_loss: -0.2471923828125|cri_loss: -0.1044921875|unsuper_loss: 0.0 +average reward score: 2.806640625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.58%) |Training time=0.48s (21.92%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1574|ppo_ep: 1|act_loss: 0.23974609375|cri_loss: 0.1318359375|unsuper_loss: 0.0 +average reward score: 3.423828125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.47%) |Training time=0.48s (22.02%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1575|ppo_ep: 1|act_loss: 0.115478515625|cri_loss: 0.07293701171875|unsuper_loss: 0.0 +average reward score: 3.80859375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.76%) |Training time=0.47s (21.73%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1576|ppo_ep: 1|act_loss: 0.1158447265625|cri_loss: 0.0618896484375|unsuper_loss: 0.0 +average reward score: 3.185546875 +------------------------------------------------------------------------------------- +|E2E latency=2.83s |Gather latency=0.00s (0.00%) |Generate time=1.77s (62.47%) |Training time=0.48s (16.92%) |Others=0.58 (20.61%)|CurSamplesPerSec=11.32 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1577|ppo_ep: 1|act_loss: -0.0975341796875|cri_loss: -0.0289306640625|unsuper_loss: 0.0 +average reward score: 3.537109375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.50%) |Training time=0.48s (22.00%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1578|ppo_ep: 1|act_loss: 0.1142578125|cri_loss: 0.0662841796875|unsuper_loss: 0.0 +average reward score: 3.919921875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.58%) |Training time=0.48s (21.90%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.41 +[2023-04-14 09:45:52,093] [INFO] [logging.py:96:log_dist] [Rank 0] step=1580, skipped=22, lr=[8.909596912747568e-06, 8.909596912747568e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:45:52,111] [INFO] [timer.py:199:stop] epoch=0/micro_step=1580/global_step=1580, RunningAvgSamplesPerSec=108.76884252619337, CurrSamplesPerSec=101.50047189216659, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:45:52,204] [INFO] [logging.py:96:log_dist] [Rank 0] step=1580, skipped=24, lr=[4.617395527061168e-06, 4.617395527061168e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1579|ppo_ep: 1|act_loss: -0.0958251953125|cri_loss: -0.0352783203125|unsuper_loss: 0.0 +average reward score: 4.23046875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.55%) |Training time=0.48s (21.96%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1580|ppo_ep: 1|act_loss: -0.014373779296875|cri_loss: 0.0070953369140625|unsuper_loss: 0.0 +average reward score: 4.1953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.62%) |Training time=0.47s (21.85%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1581|ppo_ep: 1|act_loss: 0.1207275390625|cri_loss: 0.0650634765625|unsuper_loss: 0.0 +average reward score: 3.24609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.62%) |Training time=0.47s (21.85%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1582|ppo_ep: 1|act_loss: 0.0110321044921875|cri_loss: 0.020233154296875|unsuper_loss: 0.0 +average reward score: 4.88671875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.51%) |Training time=0.48s (21.97%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1583|ppo_ep: 1|act_loss: -0.049652099609375|cri_loss: -0.022430419921875|unsuper_loss: 0.0 +average reward score: 3.423828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.67%) |Training time=0.47s (21.82%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1584|ppo_ep: 1|act_loss: 0.0467529296875|cri_loss: 0.033203125|unsuper_loss: 0.0 +average reward score: 3.763671875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.70%) |Training time=0.47s (21.80%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1585|ppo_ep: 1|act_loss: -0.01050567626953125|cri_loss: 0.001220703125|unsuper_loss: 0.0 +average reward score: 3.591796875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.59%) |Training time=0.48s (21.80%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1586|ppo_ep: 1|act_loss: 0.107666015625|cri_loss: 0.0589599609375|unsuper_loss: 0.0 +average reward score: 3.427734375 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.29%) |Training time=0.51s (23.19%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.44 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1587|ppo_ep: 1|act_loss: 0.03472900390625|cri_loss: 0.0185546875|unsuper_loss: 0.0 +average reward score: 4.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.00%) |Training time=0.49s (22.16%) |Others=0.11 (4.84%)|CurSamplesPerSec=14.51 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1588|ppo_ep: 1|act_loss: 0.060333251953125|cri_loss: 0.0380859375|unsuper_loss: 0.0 +average reward score: 3.052734375 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.63s (70.59%) |Training time=0.58s (25.15%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.82 |AvgSamplesPerSec=14.41 +[2023-04-14 09:46:14,048] [INFO] [logging.py:96:log_dist] [Rank 0] step=1590, skipped=22, lr=[8.899678337641102e-06, 8.899678337641102e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:46:14,066] [INFO] [timer.py:199:stop] epoch=0/micro_step=1590/global_step=1590, RunningAvgSamplesPerSec=108.69881169225158, CurrSamplesPerSec=101.52227597704174, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:46:14,159] [INFO] [logging.py:96:log_dist] [Rank 0] step=1590, skipped=24, lr=[4.612262636841895e-06, 4.612262636841895e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1589|ppo_ep: 1|act_loss: -0.0256195068359375|cri_loss: -0.009033203125|unsuper_loss: 0.0 +average reward score: 3.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.52%) |Training time=0.48s (21.97%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1590|ppo_ep: 1|act_loss: 0.00982666015625|cri_loss: 0.0081329345703125|unsuper_loss: 0.0 +average reward score: 3.65234375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.38%) |Training time=0.48s (22.09%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1591|ppo_ep: 1|act_loss: 0.06524658203125|cri_loss: 0.035430908203125|unsuper_loss: 0.0 +average reward score: 3.935546875 +------------------------------------------------------------------------------------- +|E2E latency=2.44s |Gather latency=0.00s (0.00%) |Generate time=1.77s (72.45%) |Training time=0.49s (20.24%) |Others=0.18 (7.31%)|CurSamplesPerSec=13.10 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1592|ppo_ep: 1|act_loss: 0.037811279296875|cri_loss: 0.021453857421875|unsuper_loss: 0.0 +average reward score: 3.34765625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.26%) |Training time=0.48s (22.22%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1593|ppo_ep: 1|act_loss: 0.0085601806640625|cri_loss: 0.006603240966796875|unsuper_loss: 0.0 +average reward score: 4.12890625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.57%) |Training time=0.48s (21.90%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1594|ppo_ep: 1|act_loss: -0.097412109375|cri_loss: -0.033599853515625|unsuper_loss: 0.0 +average reward score: 4.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.39%) |Training time=0.49s (22.13%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1595|ppo_ep: 1|act_loss: -0.043792724609375|cri_loss: -0.01497650146484375|unsuper_loss: 0.0 +average reward score: 3.98046875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.04%) |Training time=0.49s (22.45%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1596|ppo_ep: 1|act_loss: 0.02978515625|cri_loss: 0.022247314453125|unsuper_loss: 0.0 +average reward score: 4.1796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.12%) |Training time=0.46s (21.35%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1597|ppo_ep: 1|act_loss: 0.091796875|cri_loss: 0.0494384765625|unsuper_loss: 0.0 +average reward score: 3.82421875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.29%) |Training time=0.49s (22.21%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1598|ppo_ep: 1|act_loss: 0.023773193359375|cri_loss: 0.0134124755859375|unsuper_loss: 0.0 +average reward score: 5.109375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.49%) |Training time=0.48s (22.00%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.41 +[2023-04-14 09:46:36,122] [INFO] [logging.py:96:log_dist] [Rank 0] step=1600, skipped=22, lr=[8.889699365972046e-06, 8.889699365972046e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:46:36,140] [INFO] [timer.py:199:stop] epoch=0/micro_step=1600/global_step=1600, RunningAvgSamplesPerSec=108.6402557626682, CurrSamplesPerSec=100.86045297044397, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:46:36,232] [INFO] [logging.py:96:log_dist] [Rank 0] step=1600, skipped=24, lr=[4.6070984377947884e-06, 4.6070984377947884e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1599|ppo_ep: 1|act_loss: 0.02203369140625|cri_loss: 0.0122222900390625|unsuper_loss: 0.0 +average reward score: 4.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.50%) |Training time=0.48s (22.01%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1600|ppo_ep: 1|act_loss: -0.00275421142578125|cri_loss: 0.000911712646484375|unsuper_loss: 0.0 +average reward score: 4.296875 +------------------------------------------------------------------------------------- +|E2E latency=3.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (50.51%) |Training time=0.48s (15.23%) |Others=1.09 (34.26%)|CurSamplesPerSec=10.08 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1601|ppo_ep: 1|act_loss: -0.01715087890625|cri_loss: -0.007396697998046875|unsuper_loss: 0.0 +average reward score: 3.7578125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.62%) |Training time=0.48s (21.85%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1602|ppo_ep: 1|act_loss: -0.04461669921875|cri_loss: -0.0196533203125|unsuper_loss: 0.0 +average reward score: 3.693359375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.58%) |Training time=0.48s (21.91%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1603|ppo_ep: 1|act_loss: 0.00501251220703125|cri_loss: 0.005153656005859375|unsuper_loss: 0.0 +average reward score: 3.765625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.59%) |Training time=0.48s (21.91%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1604|ppo_ep: 1|act_loss: -0.03546142578125|cri_loss: -0.015838623046875|unsuper_loss: 0.0 +average reward score: 4.3125 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.78%) |Training time=0.47s (21.57%) |Others=0.12 (5.65%)|CurSamplesPerSec=14.54 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1605|ppo_ep: 1|act_loss: 0.10308837890625|cri_loss: 0.058502197265625|unsuper_loss: 0.0 +average reward score: 4.0078125 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.73s (74.31%) |Training time=0.49s (21.15%) |Others=0.11 (4.54%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1606|ppo_ep: 1|act_loss: -0.0546875|cri_loss: -0.02545166015625|unsuper_loss: 0.0 +average reward score: 3.8046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.69%) |Training time=0.47s (21.79%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1607|ppo_ep: 1|act_loss: 0.0222930908203125|cri_loss: 0.01284027099609375|unsuper_loss: 0.0 +average reward score: 4.12890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.96%) |Training time=0.47s (21.51%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1608|ppo_ep: 1|act_loss: -0.04022216796875|cri_loss: -0.0186004638671875|unsuper_loss: 0.0 +average reward score: 4.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.68%) |Training time=0.47s (21.79%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41 +[2023-04-14 09:46:59,030] [INFO] [logging.py:96:log_dist] [Rank 0] step=1610, skipped=22, lr=[8.879660145652832e-06, 8.879660145652832e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:46:59,048] [INFO] [timer.py:199:stop] epoch=0/micro_step=1610/global_step=1610, RunningAvgSamplesPerSec=108.59716815707017, CurrSamplesPerSec=105.74269943889715, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:46:59,141] [INFO] [logging.py:96:log_dist] [Rank 0] step=1610, skipped=24, lr=[4.601903006465734e-06, 4.601903006465734e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1609|ppo_ep: 1|act_loss: -0.0167388916015625|cri_loss: -0.006267547607421875|unsuper_loss: 0.0 +average reward score: 4.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.47s (21.47%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1610|ppo_ep: 1|act_loss: -0.02960205078125|cri_loss: -0.01259613037109375|unsuper_loss: 0.0 +average reward score: 4.08984375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.59%) |Training time=0.48s (21.92%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1611|ppo_ep: 1|act_loss: -0.00969696044921875|cri_loss: -0.00272369384765625|unsuper_loss: 0.0 +average reward score: 5.1171875 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.18%) |Training time=0.51s (22.87%) |Others=0.11 (4.94%)|CurSamplesPerSec=14.38 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1612|ppo_ep: 1|act_loss: 0.0380859375|cri_loss: 0.02044677734375|unsuper_loss: 0.0 +average reward score: 3.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.07%) |Training time=0.49s (22.44%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.54 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1613|ppo_ep: 1|act_loss: 0.06634521484375|cri_loss: 0.03521728515625|unsuper_loss: 0.0 +average reward score: 4.12890625 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.14%) |Training time=0.45s (20.37%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1614|ppo_ep: 1|act_loss: 0.0743408203125|cri_loss: 0.03887939453125|unsuper_loss: 0.0 +average reward score: 4.20703125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.39%) |Training time=0.49s (22.12%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1615|ppo_ep: 1|act_loss: 0.0175018310546875|cri_loss: 0.01300048828125|unsuper_loss: 0.0 +average reward score: 4.3515625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.26%) |Training time=0.49s (22.05%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1616|ppo_ep: 1|act_loss: 0.02777099609375|cri_loss: 0.0153656005859375|unsuper_loss: 0.0 +average reward score: 3.30859375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.79%) |Training time=0.48s (21.71%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1617|ppo_ep: 1|act_loss: -0.039794921875|cri_loss: -0.0181884765625|unsuper_loss: 0.0 +average reward score: 4.49609375 +------------------------------------------------------------------------------------- +|E2E latency=2.42s |Gather latency=0.00s (0.00%) |Generate time=1.73s (71.29%) |Training time=0.48s (19.72%) |Others=0.22 (8.99%)|CurSamplesPerSec=13.21 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1618|ppo_ep: 1|act_loss: 0.07086181640625|cri_loss: 0.03765869140625|unsuper_loss: 0.0 +average reward score: 3.755859375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.71%) |Training time=0.45s (20.78%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.41 +[2023-04-14 09:47:21,223] [INFO] [logging.py:96:log_dist] [Rank 0] step=1620, skipped=22, lr=[8.869560825488926e-06, 8.869560825488926e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:47:21,241] [INFO] [timer.py:199:stop] epoch=0/micro_step=1620/global_step=1620, RunningAvgSamplesPerSec=108.55081239645398, CurrSamplesPerSec=106.41888138557795, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:47:21,334] [INFO] [logging.py:96:log_dist] [Rank 0] step=1620, skipped=24, lr=[4.596676419863561e-06, 4.596676419863561e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1619|ppo_ep: 1|act_loss: 0.0615234375|cri_loss: 0.03411865234375|unsuper_loss: 0.0 +average reward score: 4.44140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.12%) |Training time=0.46s (21.36%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1620|ppo_ep: 1|act_loss: 0.024871826171875|cri_loss: 0.01503753662109375|unsuper_loss: 0.0 +average reward score: 3.728515625 +------------------------------------------------------------------------------------- +|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.77s (74.71%) |Training time=0.50s (21.13%) |Others=0.10 (4.16%)|CurSamplesPerSec=13.50 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1621|ppo_ep: 1|act_loss: -0.0162811279296875|cri_loss: -0.007350921630859375|unsuper_loss: 0.0 +average reward score: 5.1328125 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.84%) |Training time=0.48s (21.69%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1622|ppo_ep: 1|act_loss: -0.02630615234375|cri_loss: -0.01035308837890625|unsuper_loss: 0.0 +average reward score: 4.40625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.77%) |Training time=0.47s (21.71%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1623|ppo_ep: 1|act_loss: 0.03363037109375|cri_loss: 0.0177154541015625|unsuper_loss: 0.0 +average reward score: 3.87109375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.55%) |Training time=0.48s (21.93%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1624|ppo_ep: 1|act_loss: 0.005886077880859375|cri_loss: 0.003627777099609375|unsuper_loss: 0.0 +average reward score: 3.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.09%) |Training time=0.49s (22.37%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1625|ppo_ep: 1|act_loss: 0.0280609130859375|cri_loss: 0.01690673828125|unsuper_loss: 0.0 +average reward score: 2.912109375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.24%) |Training time=0.46s (21.22%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1626|ppo_ep: 1|act_loss: -0.050811767578125|cri_loss: -0.021881103515625|unsuper_loss: 0.0 +average reward score: 4.94921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.40%) |Training time=0.48s (22.06%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1627|ppo_ep: 1|act_loss: -0.05517578125|cri_loss: -0.0225067138671875|unsuper_loss: 0.0 +average reward score: 4.9921875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.55%) |Training time=0.48s (21.94%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1628|ppo_ep: 1|act_loss: 0.07183837890625|cri_loss: 0.039794921875|unsuper_loss: 0.0 +average reward score: 4.18359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.55%) |Training time=0.48s (21.92%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41 +[2023-04-14 09:47:43,177] [INFO] [logging.py:96:log_dist] [Rank 0] step=1630, skipped=22, lr=[8.859401555176615e-06, 8.859401555176615e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:47:43,482] [INFO] [timer.py:199:stop] epoch=0/micro_step=1630/global_step=1630, RunningAvgSamplesPerSec=108.43823317583377, CurrSamplesPerSec=53.26256723411719, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:47:43,575] [INFO] [logging.py:96:log_dist] [Rank 0] step=1630, skipped=24, lr=[4.591418755458887e-06, 4.591418755458887e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1629|ppo_ep: 1|act_loss: 0.09765625|cri_loss: 0.051177978515625|unsuper_loss: 0.0 +average reward score: 3.75390625 +------------------------------------------------------------------------------------- +|E2E latency=2.45s |Gather latency=0.00s (0.00%) |Generate time=1.59s (64.92%) |Training time=0.76s (31.10%) |Others=0.10 (3.99%)|CurSamplesPerSec=13.04 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1630|ppo_ep: 1|act_loss: 0.0201416015625|cri_loss: 0.0125274658203125|unsuper_loss: 0.0 +average reward score: 4.66796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.48s (22.00%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1631|ppo_ep: 1|act_loss: 0.018310546875|cri_loss: 0.0108489990234375|unsuper_loss: 0.0 +average reward score: 4.23046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.19%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1632|ppo_ep: 1|act_loss: 0.0361328125|cri_loss: 0.019378662109375|unsuper_loss: 0.0 +average reward score: 4.4921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.13%) |Training time=0.48s (22.33%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1633|ppo_ep: 1|act_loss: -0.019989013671875|cri_loss: -0.00919342041015625|unsuper_loss: 0.0 +average reward score: 4.1875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.23%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1634|ppo_ep: 1|act_loss: 0.031402587890625|cri_loss: 0.01763916015625|unsuper_loss: 0.0 +average reward score: 4.09375 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.64s (71.03%) |Training time=0.57s (24.67%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1635|ppo_ep: 1|act_loss: 0.04095458984375|cri_loss: 0.0223541259765625|unsuper_loss: 0.0 +average reward score: 3.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.86%) |Training time=0.49s (22.63%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1636|ppo_ep: 1|act_loss: -0.003795623779296875|cri_loss: -0.0005779266357421875|unsuper_loss: 0.0 +average reward score: 3.61328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.04%) |Training time=0.49s (22.44%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1637|ppo_ep: 1|act_loss: -0.004100799560546875|cri_loss: -0.0005321502685546875|unsuper_loss: 0.0 +average reward score: 4.2109375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.02%) |Training time=0.49s (22.47%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1638|ppo_ep: 1|act_loss: 0.043060302734375|cri_loss: 0.02545166015625|unsuper_loss: 0.0 +average reward score: 3.30859375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.82%) |Training time=0.49s (22.68%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.41 +[2023-04-14 09:48:05,298] [INFO] [logging.py:96:log_dist] [Rank 0] step=1640, skipped=22, lr=[8.849182485300792e-06, 8.849182485300792e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:48:05,316] [INFO] [timer.py:199:stop] epoch=0/micro_step=1640/global_step=1640, RunningAvgSamplesPerSec=108.37462570038971, CurrSamplesPerSec=107.1831391727385, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:48:05,409] [INFO] [logging.py:96:log_dist] [Rank 0] step=1640, skipped=24, lr=[4.586130091182985e-06, 4.586130091182985e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1639|ppo_ep: 1|act_loss: 0.005695343017578125|cri_loss: 0.00555419921875|unsuper_loss: 0.0 +average reward score: 4.1953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.90%) |Training time=0.46s (21.53%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1640|ppo_ep: 1|act_loss: -0.004543304443359375|cri_loss: -0.0017652511596679688|unsuper_loss: 0.0 +average reward score: 4.12890625 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.59s (70.44%) |Training time=0.47s (20.68%) |Others=0.20 (8.88%)|CurSamplesPerSec=14.19 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1641|ppo_ep: 1|act_loss: 0.014434814453125|cri_loss: 0.00841522216796875|unsuper_loss: 0.0 +average reward score: 3.341796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.97%) |Training time=0.49s (22.52%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1642|ppo_ep: 1|act_loss: -0.00848388671875|cri_loss: -0.003963470458984375|unsuper_loss: 0.0 +average reward score: 5.80078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.21%) |Training time=0.48s (22.28%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1643|ppo_ep: 1|act_loss: 0.031829833984375|cri_loss: 0.016815185546875|unsuper_loss: 0.0 +average reward score: 5.17578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.15%) |Training time=0.48s (22.31%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1644|ppo_ep: 1|act_loss: 0.0106048583984375|cri_loss: 0.00621795654296875|unsuper_loss: 0.0 +average reward score: 4.61328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.23%) |Training time=0.48s (22.24%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1645|ppo_ep: 1|act_loss: 0.038177490234375|cri_loss: 0.0225982666015625|unsuper_loss: 0.0 +average reward score: 3.908203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.64%) |Training time=0.47s (21.81%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1646|ppo_ep: 1|act_loss: -0.0369873046875|cri_loss: -0.017547607421875|unsuper_loss: 0.0 +average reward score: 4.453125 +------------------------------------------------------------------------------------- +|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.71s (74.11%) |Training time=0.49s (21.49%) |Others=0.10 (4.41%)|CurSamplesPerSec=13.89 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1647|ppo_ep: 1|act_loss: -0.051971435546875|cri_loss: -0.0243682861328125|unsuper_loss: 0.0 +average reward score: 3.453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.06%) |Training time=0.49s (22.41%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1648|ppo_ep: 1|act_loss: 0.01430511474609375|cri_loss: 0.00861358642578125|unsuper_loss: 0.0 +average reward score: 4.2734375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.17%) |Training time=0.49s (22.36%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.41 +[2023-04-14 09:48:27,226] [INFO] [logging.py:96:log_dist] [Rank 0] step=1650, skipped=22, lr=[8.838903767332725e-06, 8.838903767332725e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:48:27,244] [INFO] [timer.py:199:stop] epoch=0/micro_step=1650/global_step=1650, RunningAvgSamplesPerSec=108.32006295141781, CurrSamplesPerSec=100.62015510849723, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:48:27,337] [INFO] [logging.py:96:log_dist] [Rank 0] step=1650, skipped=24, lr=[4.580810505426617e-06, 4.580810505426617e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1649|ppo_ep: 1|act_loss: 0.0341796875|cri_loss: 0.018310546875|unsuper_loss: 0.0 +average reward score: 3.806640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.26%) |Training time=0.48s (22.22%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1650|ppo_ep: 1|act_loss: 0.0545654296875|cri_loss: 0.029083251953125|unsuper_loss: 0.0 +average reward score: 4.59375 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.12%) |Training time=0.51s (21.64%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.55 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1651|ppo_ep: 1|act_loss: 0.0146026611328125|cri_loss: 0.0078277587890625|unsuper_loss: 0.0 +average reward score: 4.9375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.82%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1652|ppo_ep: 1|act_loss: -0.00402069091796875|cri_loss: -0.0014247894287109375|unsuper_loss: 0.0 +average reward score: 4.61328125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.87%) |Training time=0.49s (22.63%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1653|ppo_ep: 1|act_loss: 0.00786590576171875|cri_loss: 0.00921630859375|unsuper_loss: 0.0 +average reward score: 3.876953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.14%) |Training time=0.48s (22.31%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1654|ppo_ep: 1|act_loss: -0.023162841796875|cri_loss: -0.0103912353515625|unsuper_loss: 0.0 +average reward score: 4.1875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.44%) |Training time=0.47s (22.00%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1655|ppo_ep: 1|act_loss: -0.0096435546875|cri_loss: -0.004184722900390625|unsuper_loss: 0.0 +average reward score: 4.55859375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.16%) |Training time=0.49s (22.25%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1656|ppo_ep: 1|act_loss: 0.018157958984375|cri_loss: 0.011199951171875|unsuper_loss: 0.0 +average reward score: 5.671875 +------------------------------------------------------------------------------------- +|E2E latency=2.46s |Gather latency=0.00s (0.00%) |Generate time=1.59s (64.54%) |Training time=0.45s (18.46%) |Others=0.42 (17.01%)|CurSamplesPerSec=13.00 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1657|ppo_ep: 1|act_loss: 0.024444580078125|cri_loss: 0.01318359375|unsuper_loss: 0.0 +average reward score: 4.3515625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.73%) |Training time=0.46s (21.69%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1658|ppo_ep: 1|act_loss: 0.024383544921875|cri_loss: 0.0128021240234375|unsuper_loss: 0.0 +average reward score: 3.88671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.81%) |Training time=0.46s (21.62%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.41 +[2023-04-14 09:48:49,312] [INFO] [logging.py:96:log_dist] [Rank 0] step=1660, skipped=22, lr=[8.82856555362781e-06, 8.82856555362781e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:48:49,331] [INFO] [timer.py:199:stop] epoch=0/micro_step=1660/global_step=1660, RunningAvgSamplesPerSec=108.27997592914936, CurrSamplesPerSec=107.1406158574308, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:48:49,423] [INFO] [logging.py:96:log_dist] [Rank 0] step=1660, skipped=24, lr=[4.575460077038877e-06, 4.575460077038877e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1659|ppo_ep: 1|act_loss: 0.0134429931640625|cri_loss: 0.0074615478515625|unsuper_loss: 0.0 +average reward score: 3.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.86%) |Training time=0.46s (21.57%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1660|ppo_ep: 1|act_loss: -0.0192413330078125|cri_loss: -0.009063720703125|unsuper_loss: 0.0 +average reward score: 4.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.71%) |Training time=0.47s (21.71%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1661|ppo_ep: 1|act_loss: -0.00200653076171875|cri_loss: -0.00023174285888671875|unsuper_loss: 0.0 +average reward score: 4.94140625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.91%) |Training time=0.46s (21.51%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1662|ppo_ep: 1|act_loss: -0.0095977783203125|cri_loss: -0.003932952880859375|unsuper_loss: 0.0 +average reward score: 4.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.58s (69.08%) |Training time=0.46s (20.33%) |Others=0.24 (10.59%)|CurSamplesPerSec=13.99 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1663|ppo_ep: 1|act_loss: 0.002063751220703125|cri_loss: 0.0014085769653320312|unsuper_loss: 0.0 +average reward score: 5.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.59%) |Training time=0.47s (21.82%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1664|ppo_ep: 1|act_loss: 0.04315185546875|cri_loss: 0.0236358642578125|unsuper_loss: 0.0 +average reward score: 3.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.57s (71.26%) |Training time=0.48s (21.65%) |Others=0.16 (7.09%)|CurSamplesPerSec=14.49 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1665|ppo_ep: 1|act_loss: -0.00046062469482421875|cri_loss: 0.0005645751953125|unsuper_loss: 0.0 +average reward score: 4.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.12%) |Training time=0.47s (21.71%) |Others=0.11 (5.17%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1666|ppo_ep: 1|act_loss: 0.0020160675048828125|cri_loss: 0.0041656494140625|unsuper_loss: 0.0 +average reward score: 3.298828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.68%) |Training time=0.47s (21.76%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1667|ppo_ep: 1|act_loss: -0.0254974365234375|cri_loss: -0.01202392578125|unsuper_loss: 0.0 +average reward score: 5.234375 +------------------------------------------------------------------------------------- +|E2E latency=2.46s |Gather latency=0.00s (0.00%) |Generate time=1.58s (64.14%) |Training time=0.47s (19.15%) |Others=0.41 (16.71%)|CurSamplesPerSec=13.01 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1668|ppo_ep: 1|act_loss: 0.020050048828125|cri_loss: 0.010650634765625|unsuper_loss: 0.0 +average reward score: 3.779296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.48%) |Training time=0.47s (21.95%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.41 +[2023-04-14 09:49:11,326] [INFO] [logging.py:96:log_dist] [Rank 0] step=1670, skipped=22, lr=[8.818167997423314e-06, 8.818167997423314e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:49:11,344] [INFO] [timer.py:199:stop] epoch=0/micro_step=1670/global_step=1670, RunningAvgSamplesPerSec=108.25653389955217, CurrSamplesPerSec=106.11741265840399, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:49:11,437] [INFO] [logging.py:96:log_dist] [Rank 0] step=1670, skipped=24, lr=[4.5700788853260205e-06, 4.5700788853260205e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1669|ppo_ep: 1|act_loss: -0.01548004150390625|cri_loss: -0.00714111328125|unsuper_loss: 0.0 +average reward score: 4.375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.46s (21.61%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1670|ppo_ep: 1|act_loss: 0.02532958984375|cri_loss: 0.014007568359375|unsuper_loss: 0.0 +average reward score: 4.796875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.14%) |Training time=0.49s (22.23%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1671|ppo_ep: 1|act_loss: -0.00852203369140625|cri_loss: -0.00392913818359375|unsuper_loss: 0.0 +average reward score: 5.30078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.21%) |Training time=0.48s (22.27%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1672|ppo_ep: 1|act_loss: 0.0172882080078125|cri_loss: 0.0097808837890625|unsuper_loss: 0.0 +average reward score: 4.484375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.97%) |Training time=0.49s (22.54%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1673|ppo_ep: 1|act_loss: 0.00482177734375|cri_loss: 0.003299713134765625|unsuper_loss: 0.0 +average reward score: 4.30078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.85%) |Training time=0.49s (22.64%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1674|ppo_ep: 1|act_loss: 0.015869140625|cri_loss: 0.00927734375|unsuper_loss: 0.0 +average reward score: 4.89453125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.95%) |Training time=0.49s (22.55%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1675|ppo_ep: 1|act_loss: 0.0238037109375|cri_loss: 0.01232147216796875|unsuper_loss: 0.0 +average reward score: 4.6171875 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.03%) |Training time=0.50s (22.54%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.28 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1676|ppo_ep: 1|act_loss: 0.049346923828125|cri_loss: 0.0260009765625|unsuper_loss: 0.0 +average reward score: 3.904296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.80%) |Training time=0.49s (22.66%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1677|ppo_ep: 1|act_loss: 0.0116119384765625|cri_loss: 0.006591796875|unsuper_loss: 0.0 +average reward score: 4.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.73%) |Training time=0.49s (22.76%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1678|ppo_ep: 1|act_loss: 0.05316162109375|cri_loss: 0.027618408203125|unsuper_loss: 0.0 +average reward score: 3.74609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.91%) |Training time=0.49s (22.54%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42 +[2023-04-14 09:49:33,142] [INFO] [logging.py:96:log_dist] [Rank 0] step=1680, skipped=22, lr=[8.807711252836105e-06, 8.807711252836105e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:49:33,906] [INFO] [timer.py:199:stop] epoch=0/micro_step=1680/global_step=1680, RunningAvgSamplesPerSec=108.02099512384166, CurrSamplesPerSec=29.835284744846422, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:49:34,000] [INFO] [logging.py:96:log_dist] [Rank 0] step=1680, skipped=24, lr=[4.564667010050288e-06, 4.564667010050288e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1679|ppo_ep: 1|act_loss: -0.0125732421875|cri_loss: -0.0058746337890625|unsuper_loss: 0.0 +average reward score: 5.4296875 +------------------------------------------------------------------------------------- +|E2E latency=2.91s |Gather latency=0.00s (0.00%) |Generate time=1.58s (54.17%) |Training time=1.24s (42.44%) |Others=0.10 (3.39%)|CurSamplesPerSec=10.98 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1680|ppo_ep: 1|act_loss: -0.021759033203125|cri_loss: -0.01035308837890625|unsuper_loss: 0.0 +average reward score: 4.453125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.60%) |Training time=0.45s (20.84%) |Others=0.14 (6.57%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1681|ppo_ep: 1|act_loss: -0.0322265625|cri_loss: -0.0152130126953125|unsuper_loss: 0.0 +average reward score: 3.94921875 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.95%) |Training time=0.47s (20.70%) |Others=0.10 (4.35%)|CurSamplesPerSec=14.12 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1682|ppo_ep: 1|act_loss: -0.0018930435180664062|cri_loss: -3.147125244140625e-05|unsuper_loss: 0.0 +average reward score: 4.015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.49%) |Training time=0.47s (21.94%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1683|ppo_ep: 1|act_loss: 0.02203369140625|cri_loss: 0.0120391845703125|unsuper_loss: 0.0 +average reward score: 3.998046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.30%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1684|ppo_ep: 1|act_loss: 0.03509521484375|cri_loss: 0.01904296875|unsuper_loss: 0.0 +average reward score: 3.82421875 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.59s (70.07%) |Training time=0.47s (20.92%) |Others=0.20 (9.01%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1685|ppo_ep: 1|act_loss: -0.00547027587890625|cri_loss: -0.0011768341064453125|unsuper_loss: 0.0 +average reward score: 4.7265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.10%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1686|ppo_ep: 1|act_loss: -0.033355712890625|cri_loss: -0.01560211181640625|unsuper_loss: 0.0 +average reward score: 4.32421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.76%) |Training time=0.49s (22.70%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1687|ppo_ep: 1|act_loss: -0.01319122314453125|cri_loss: -0.005558013916015625|unsuper_loss: 0.0 +average reward score: 4.6328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.12%) |Training time=0.48s (22.34%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1688|ppo_ep: 1|act_loss: -0.003948211669921875|cri_loss: 0.0012969970703125|unsuper_loss: 0.0 +average reward score: 4.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.86%) |Training time=0.49s (22.61%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41 +[2023-04-14 09:49:55,740] [INFO] [logging.py:96:log_dist] [Rank 0] step=1690, skipped=22, lr=[8.797195474860359e-06, 8.797195474860359e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:49:55,758] [INFO] [timer.py:199:stop] epoch=0/micro_step=1690/global_step=1690, RunningAvgSamplesPerSec=107.97908023515386, CurrSamplesPerSec=98.599461374245, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:49:55,851] [INFO] [logging.py:96:log_dist] [Rank 0] step=1690, skipped=24, lr=[4.559224531428731e-06, 4.559224531428731e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1689|ppo_ep: 1|act_loss: -0.04498291015625|cri_loss: -0.0214385986328125|unsuper_loss: 0.0 +average reward score: 5.28125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.98%) |Training time=0.49s (22.50%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1690|ppo_ep: 1|act_loss: 0.044769287109375|cri_loss: 0.02410888671875|unsuper_loss: 0.0 +average reward score: 4.01171875 +------------------------------------------------------------------------------------- +|E2E latency=3.03s |Gather latency=0.00s (0.00%) |Generate time=1.58s (52.18%) |Training time=0.49s (16.08%) |Others=0.96 (31.73%)|CurSamplesPerSec=10.56 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1691|ppo_ep: 1|act_loss: 0.019256591796875|cri_loss: 0.0107574462890625|unsuper_loss: 0.0 +average reward score: 4.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.84%) |Training time=0.49s (22.61%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1692|ppo_ep: 1|act_loss: -0.0185089111328125|cri_loss: -0.007755279541015625|unsuper_loss: 0.0 +average reward score: 4.2109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.04%) |Training time=0.49s (22.42%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1693|ppo_ep: 1|act_loss: -0.00452423095703125|cri_loss: -0.0012645721435546875|unsuper_loss: 0.0 +average reward score: 3.41015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (22.06%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1694|ppo_ep: 1|act_loss: -0.01233673095703125|cri_loss: -0.00475311279296875|unsuper_loss: 0.0 +average reward score: 3.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.48s (22.40%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1695|ppo_ep: 1|act_loss: 0.01306915283203125|cri_loss: 0.00788116455078125|unsuper_loss: 0.0 +average reward score: 4.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.07%) |Training time=0.48s (20.66%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1696|ppo_ep: 1|act_loss: 0.01459503173828125|cri_loss: 0.00774383544921875|unsuper_loss: 0.0 +average reward score: 6.890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.86%) |Training time=0.48s (22.24%) |Others=0.11 (4.90%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1697|ppo_ep: 1|act_loss: -0.010650634765625|cri_loss: -0.00344085693359375|unsuper_loss: 0.0 +average reward score: 4.671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.50%) |Training time=0.48s (22.00%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1698|ppo_ep: 1|act_loss: 0.015228271484375|cri_loss: 0.00823211669921875|unsuper_loss: 0.0 +average reward score: 4.71484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.48s (22.39%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.41 +[2023-04-14 09:50:18,441] [INFO] [logging.py:96:log_dist] [Rank 0] step=1700, skipped=22, lr=[8.786620819365276e-06, 8.786620819365276e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:50:18,460] [INFO] [timer.py:199:stop] epoch=0/micro_step=1700/global_step=1700, RunningAvgSamplesPerSec=107.92857209171187, CurrSamplesPerSec=103.74092618671828, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:50:18,554] [INFO] [logging.py:96:log_dist] [Rank 0] step=1700, skipped=24, lr=[4.553751530132009e-06, 4.553751530132009e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1699|ppo_ep: 1|act_loss: -0.0390625|cri_loss: -0.0186309814453125|unsuper_loss: 0.0 +average reward score: 5.40625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.67%) |Training time=0.47s (21.65%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1700|ppo_ep: 1|act_loss: 0.01053619384765625|cri_loss: 0.005527496337890625|unsuper_loss: 0.0 +average reward score: 4.95703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.33%) |Training time=0.48s (22.14%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1701|ppo_ep: 1|act_loss: 0.0002722740173339844|cri_loss: 0.0005764961242675781|unsuper_loss: 0.0 +average reward score: 4.328125 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.59s (69.91%) |Training time=0.49s (21.56%) |Others=0.19 (8.53%)|CurSamplesPerSec=14.10 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1702|ppo_ep: 1|act_loss: -0.002727508544921875|cri_loss: -0.0006961822509765625|unsuper_loss: 0.0 +average reward score: 4.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.21%) |Training time=0.48s (22.23%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1703|ppo_ep: 1|act_loss: -0.0056304931640625|cri_loss: -0.0018415451049804688|unsuper_loss: 0.0 +average reward score: 3.80078125 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.03%) |Training time=0.48s (21.91%) |Others=0.11 (5.06%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1704|ppo_ep: 1|act_loss: 0.0157623291015625|cri_loss: 0.00850677490234375|unsuper_loss: 0.0 +average reward score: 5.21484375 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.39%) |Training time=0.48s (21.28%) |Others=0.10 (4.32%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1705|ppo_ep: 1|act_loss: 0.0118255615234375|cri_loss: 0.006561279296875|unsuper_loss: 0.0 +average reward score: 4.46484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.19%) |Training time=0.48s (22.27%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1706|ppo_ep: 1|act_loss: -0.0498046875|cri_loss: -0.0225372314453125|unsuper_loss: 0.0 +average reward score: 4.34375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.24%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1707|ppo_ep: 1|act_loss: -0.0137176513671875|cri_loss: -0.00522613525390625|unsuper_loss: 0.0 +average reward score: 4.85546875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (22.04%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1708|ppo_ep: 1|act_loss: -0.01203155517578125|cri_loss: -0.00525665283203125|unsuper_loss: 0.0 +average reward score: 3.890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.85%) |Training time=0.49s (22.62%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41 +[2023-04-14 09:50:40,357] [INFO] [logging.py:96:log_dist] [Rank 0] step=1710, skipped=22, lr=[8.77598744309276e-06, 8.77598744309276e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:50:40,375] [INFO] [timer.py:199:stop] epoch=0/micro_step=1710/global_step=1710, RunningAvgSamplesPerSec=107.88094685691051, CurrSamplesPerSec=101.33945819997479, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:50:40,517] [INFO] [logging.py:96:log_dist] [Rank 0] step=1710, skipped=24, lr=[4.548248087283204e-06, 4.548248087283204e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1709|ppo_ep: 1|act_loss: -0.0095977783203125|cri_loss: -0.0038585662841796875|unsuper_loss: 0.0 +average reward score: 4.04296875 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.82%) |Training time=0.48s (21.60%) |Others=0.15 (6.58%)|CurSamplesPerSec=14.44 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1710|ppo_ep: 1|act_loss: -0.0081634521484375|cri_loss: -0.003696441650390625|unsuper_loss: 0.0 +average reward score: 3.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.41%) |Training time=0.49s (22.04%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1711|ppo_ep: 1|act_loss: 0.00742340087890625|cri_loss: 0.0043792724609375|unsuper_loss: 0.0 +average reward score: 4.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.93%) |Training time=0.49s (22.55%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1712|ppo_ep: 1|act_loss: 0.0270843505859375|cri_loss: 0.01422882080078125|unsuper_loss: 0.0 +average reward score: 3.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.07s |Gather latency=0.00s (0.00%) |Generate time=1.59s (76.61%) |Training time=0.39s (18.64%) |Others=0.10 (4.75%)|CurSamplesPerSec=15.43 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1713|ppo_ep: 1|act_loss: -0.00589752197265625|cri_loss: -0.002044677734375|unsuper_loss: 0.0 +average reward score: 4.82421875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.71%) |Training time=0.47s (21.71%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1714|ppo_ep: 1|act_loss: 0.00591278076171875|cri_loss: 0.003887176513671875|unsuper_loss: 0.0 +average reward score: 4.26171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.77%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1715|ppo_ep: 1|act_loss: -0.02520751953125|cri_loss: -0.0118408203125|unsuper_loss: 0.0 +average reward score: 5.046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.67%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1716|ppo_ep: 1|act_loss: -0.03692626953125|cri_loss: -0.01666259765625|unsuper_loss: 0.0 +average reward score: 3.564453125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.37%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1717|ppo_ep: 1|act_loss: -0.049041748046875|cri_loss: -0.02178955078125|unsuper_loss: 0.0 +average reward score: 3.392578125 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.15%) |Training time=0.45s (21.24%) |Others=0.10 (4.61%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1718|ppo_ep: 1|act_loss: -0.0164794921875|cri_loss: -0.00760650634765625|unsuper_loss: 0.0 +average reward score: 4.078125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.03%) |Training time=0.46s (21.38%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.42 +[2023-04-14 09:51:02,716] [INFO] [logging.py:96:log_dist] [Rank 0] step=1720, skipped=22, lr=[8.765295503655101e-06, 8.765295503655101e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:51:02,734] [INFO] [timer.py:199:stop] epoch=0/micro_step=1720/global_step=1720, RunningAvgSamplesPerSec=107.88049260354408, CurrSamplesPerSec=107.629738450978, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:51:02,827] [INFO] [logging.py:96:log_dist] [Rank 0] step=1720, skipped=24, lr=[4.542714284456616e-06, 4.542714284456616e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1719|ppo_ep: 1|act_loss: 0.03533935546875|cri_loss: 0.0183563232421875|unsuper_loss: 0.0 +average reward score: 5.19140625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.95%) |Training time=0.46s (21.48%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1720|ppo_ep: 1|act_loss: 0.064453125|cri_loss: 0.03436279296875|unsuper_loss: 0.0 +average reward score: 3.904296875 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.06%) |Training time=0.45s (21.31%) |Others=0.10 (4.63%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1721|ppo_ep: 1|act_loss: 0.016632080078125|cri_loss: 0.011627197265625|unsuper_loss: 0.0 +average reward score: 4.26171875 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.13%) |Training time=0.45s (21.26%) |Others=0.10 (4.60%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1722|ppo_ep: 1|act_loss: 0.05401611328125|cri_loss: 0.0280609130859375|unsuper_loss: 0.0 +average reward score: 5.17578125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.85%) |Training time=0.46s (21.56%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1723|ppo_ep: 1|act_loss: 0.00507354736328125|cri_loss: 0.0029201507568359375|unsuper_loss: 0.0 +average reward score: 4.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.72%) |Training time=0.46s (21.68%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1724|ppo_ep: 1|act_loss: 0.01146697998046875|cri_loss: 0.006473541259765625|unsuper_loss: 0.0 +average reward score: 4.16015625 +------------------------------------------------------------------------------------- +|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.74s (75.64%) |Training time=0.46s (20.09%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.91 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1725|ppo_ep: 1|act_loss: -0.07354736328125|cri_loss: -0.032379150390625|unsuper_loss: 0.0 +average reward score: 3.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.22%) |Training time=0.48s (22.22%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1726|ppo_ep: 1|act_loss: -0.0413818359375|cri_loss: -0.0194549560546875|unsuper_loss: 0.0 +average reward score: 3.080078125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.69%) |Training time=0.46s (21.71%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1727|ppo_ep: 1|act_loss: 0.0097808837890625|cri_loss: 0.007587432861328125|unsuper_loss: 0.0 +average reward score: 5.30859375 +------------------------------------------------------------------------------------- +|E2E latency=2.91s |Gather latency=0.00s (0.00%) |Generate time=1.58s (54.26%) |Training time=0.46s (15.74%) |Others=0.87 (30.00%)|CurSamplesPerSec=11.02 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1728|ppo_ep: 1|act_loss: -0.0242767333984375|cri_loss: -0.010833740234375|unsuper_loss: 0.0 +average reward score: 4.1875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.70%) |Training time=0.46s (21.69%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.42 +[2023-04-14 09:51:25,354] [INFO] [logging.py:96:log_dist] [Rank 0] step=1730, skipped=22, lr=[8.754545159532632e-06, 8.754545159532632e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:51:25,370] [INFO] [timer.py:199:stop] epoch=0/micro_step=1730/global_step=1730, RunningAvgSamplesPerSec=107.87975778832639, CurrSamplesPerSec=117.8119436052013, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:51:25,466] [INFO] [logging.py:96:log_dist] [Rank 0] step=1730, skipped=24, lr=[4.537150203676553e-06, 4.537150203676553e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1729|ppo_ep: 1|act_loss: 0.035400390625|cri_loss: 0.01837158203125|unsuper_loss: 0.0 +average reward score: 4.88671875 +------------------------------------------------------------------------------------- +|E2E latency=2.46s |Gather latency=0.00s (0.00%) |Generate time=1.87s (75.81%) |Training time=0.49s (20.01%) |Others=0.10 (4.18%)|CurSamplesPerSec=12.98 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1730|ppo_ep: 1|act_loss: 0.0250091552734375|cri_loss: 0.0130767822265625|unsuper_loss: 0.0 +average reward score: 4.09765625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.73%) |Training time=0.47s (21.68%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1731|ppo_ep: 1|act_loss: 0.03399658203125|cri_loss: 0.0174713134765625|unsuper_loss: 0.0 +average reward score: 4.6171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.89%) |Training time=0.49s (22.41%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1732|ppo_ep: 1|act_loss: 0.00628662109375|cri_loss: 0.004150390625|unsuper_loss: 0.0 +average reward score: 4.796875 +------------------------------------------------------------------------------------- +|E2E latency=2.75s |Gather latency=0.00s (0.00%) |Generate time=1.59s (58.04%) |Training time=0.51s (18.54%) |Others=0.64 (23.42%)|CurSamplesPerSec=11.65 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1733|ppo_ep: 1|act_loss: 0.03253173828125|cri_loss: 0.016815185546875|unsuper_loss: 0.0 +average reward score: 5.34765625 +------------------------------------------------------------------------------------- +|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.68s (73.06%) |Training time=0.52s (22.67%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.90 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1734|ppo_ep: 1|act_loss: 0.0027866363525390625|cri_loss: 0.0016613006591796875|unsuper_loss: 0.0 +average reward score: 4.13671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.30%) |Training time=0.48s (22.17%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1735|ppo_ep: 1|act_loss: -0.01568603515625|cri_loss: -0.007293701171875|unsuper_loss: 0.0 +average reward score: 3.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.80%) |Training time=0.49s (22.67%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1736|ppo_ep: 1|act_loss: -0.0134735107421875|cri_loss: -0.00504302978515625|unsuper_loss: 0.0 +average reward score: 3.9375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.96%) |Training time=0.49s (22.50%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1737|ppo_ep: 1|act_loss: -0.029541015625|cri_loss: -0.01422882080078125|unsuper_loss: 0.0 +average reward score: 4.79296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.30%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1738|ppo_ep: 1|act_loss: -0.004878997802734375|cri_loss: -0.0018644332885742188|unsuper_loss: 0.0 +average reward score: 3.884765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.08%) |Training time=0.48s (22.37%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.41 +[2023-04-14 09:51:47,899] [INFO] [logging.py:96:log_dist] [Rank 0] step=1740, skipped=22, lr=[8.743736570071387e-06, 8.743736570071387e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:51:47,917] [INFO] [timer.py:199:stop] epoch=0/micro_step=1740/global_step=1740, RunningAvgSamplesPerSec=107.81197459690361, CurrSamplesPerSec=93.59621982583081, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:51:48,010] [INFO] [logging.py:96:log_dist] [Rank 0] step=1740, skipped=24, lr=[4.531555927416115e-06, 4.531555927416115e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1739|ppo_ep: 1|act_loss: 0.01534271240234375|cri_loss: 0.00815582275390625|unsuper_loss: 0.0 +average reward score: 4.015625 +------------------------------------------------------------------------------------- +|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.32%) |Training time=0.50s (21.48%) |Others=0.10 (4.19%)|CurSamplesPerSec=13.63 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1740|ppo_ep: 1|act_loss: 0.041717529296875|cri_loss: 0.021728515625|unsuper_loss: 0.0 +average reward score: 4.62890625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.59%) |Training time=0.50s (22.90%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1741|ppo_ep: 1|act_loss: 0.017333984375|cri_loss: 0.0090484619140625|unsuper_loss: 0.0 +average reward score: 3.990234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.78%) |Training time=0.49s (22.70%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1742|ppo_ep: 1|act_loss: -0.0078582763671875|cri_loss: -0.003448486328125|unsuper_loss: 0.0 +average reward score: 4.50390625 +------------------------------------------------------------------------------------- +|E2E latency=2.46s |Gather latency=0.00s (0.00%) |Generate time=1.58s (64.12%) |Training time=0.49s (19.89%) |Others=0.39 (15.99%)|CurSamplesPerSec=13.02 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1743|ppo_ep: 1|act_loss: 0.00637054443359375|cri_loss: 0.0039215087890625|unsuper_loss: 0.0 +average reward score: 4.7421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.00%) |Training time=0.49s (22.46%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1744|ppo_ep: 1|act_loss: -0.00989532470703125|cri_loss: -0.00441741943359375|unsuper_loss: 0.0 +average reward score: 3.30859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.98%) |Training time=0.49s (22.48%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1745|ppo_ep: 1|act_loss: -0.0804443359375|cri_loss: -0.03790283203125|unsuper_loss: 0.0 +average reward score: 3.40234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.18%) |Training time=0.46s (21.26%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1746|ppo_ep: 1|act_loss: -0.03265380859375|cri_loss: -0.0139923095703125|unsuper_loss: 0.0 +average reward score: 4.05078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.41%) |Training time=0.45s (21.02%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1747|ppo_ep: 1|act_loss: -0.0177001953125|cri_loss: -0.0078887939453125|unsuper_loss: 0.0 +average reward score: 4.625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.46s (21.18%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1748|ppo_ep: 1|act_loss: -0.0044708251953125|cri_loss: -0.00197601318359375|unsuper_loss: 0.0 +average reward score: 5.6015625 +------------------------------------------------------------------------------------- +|E2E latency=2.45s |Gather latency=0.00s (0.00%) |Generate time=1.59s (65.05%) |Training time=0.46s (18.74%) |Others=0.40 (16.21%)|CurSamplesPerSec=13.05 |AvgSamplesPerSec=14.41 +[2023-04-14 09:52:10,107] [INFO] [logging.py:96:log_dist] [Rank 0] step=1750, skipped=22, lr=[8.732869895480736e-06, 8.732869895480736e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:52:10,126] [INFO] [timer.py:199:stop] epoch=0/micro_step=1750/global_step=1750, RunningAvgSamplesPerSec=107.78236145408847, CurrSamplesPerSec=109.62930097272944, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:52:10,218] [INFO] [logging.py:96:log_dist] [Rank 0] step=1750, skipped=24, lr=[4.525931538595969e-06, 4.525931538595969e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1749|ppo_ep: 1|act_loss: 0.007076263427734375|cri_loss: 0.00386810302734375|unsuper_loss: 0.0 +average reward score: 3.205078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.32%) |Training time=0.45s (21.13%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1750|ppo_ep: 1|act_loss: 0.0384521484375|cri_loss: 0.02001953125|unsuper_loss: 0.0 +average reward score: 4.59765625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.31%) |Training time=0.45s (21.12%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1751|ppo_ep: 1|act_loss: 0.07427978515625|cri_loss: 0.040679931640625|unsuper_loss: 0.0 +average reward score: 4.08984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.02%) |Training time=0.46s (21.42%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1752|ppo_ep: 1|act_loss: 0.0146026611328125|cri_loss: 0.0081329345703125|unsuper_loss: 0.0 +average reward score: 4.93359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.16%) |Training time=0.46s (21.27%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1753|ppo_ep: 1|act_loss: -0.022735595703125|cri_loss: -0.01081085205078125|unsuper_loss: 0.0 +average reward score: 4.875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.02%) |Training time=0.46s (21.41%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1754|ppo_ep: 1|act_loss: -0.03564453125|cri_loss: -0.01702880859375|unsuper_loss: 0.0 +average reward score: 5.19921875 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.88%) |Training time=0.46s (19.87%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1755|ppo_ep: 1|act_loss: -0.0101776123046875|cri_loss: -0.004772186279296875|unsuper_loss: 0.0 +average reward score: 4.0 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.97%) |Training time=0.46s (21.46%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1756|ppo_ep: 1|act_loss: -0.01068878173828125|cri_loss: -0.00504302978515625|unsuper_loss: 0.0 +average reward score: 4.4296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.40%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1757|ppo_ep: 1|act_loss: 0.00408935546875|cri_loss: 0.003238677978515625|unsuper_loss: 0.0 +average reward score: 4.30859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.21%) |Training time=0.46s (21.22%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1758|ppo_ep: 1|act_loss: 0.0113983154296875|cri_loss: 0.006969451904296875|unsuper_loss: 0.0 +average reward score: 3.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.65%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.41 +[2023-04-14 09:52:31,837] [INFO] [logging.py:96:log_dist] [Rank 0] step=1760, skipped=22, lr=[8.72194529683101e-06, 8.72194529683101e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:52:31,856] [INFO] [timer.py:199:stop] epoch=0/micro_step=1760/global_step=1760, RunningAvgSamplesPerSec=107.78163376115772, CurrSamplesPerSec=105.12303566505426, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:52:31,949] [INFO] [logging.py:96:log_dist] [Rank 0] step=1760, skipped=24, lr=[4.5202771205831286e-06, 4.5202771205831286e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1759|ppo_ep: 1|act_loss: 0.01348876953125|cri_loss: 0.007396697998046875|unsuper_loss: 0.0 +average reward score: 3.6328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.94%) |Training time=0.47s (21.51%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1760|ppo_ep: 1|act_loss: 0.11712646484375|cri_loss: 0.06634521484375|unsuper_loss: 0.0 +average reward score: 4.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.38%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1761|ppo_ep: 1|act_loss: -0.01331329345703125|cri_loss: -0.00519561767578125|unsuper_loss: 0.0 +average reward score: 5.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.42%) |Training time=0.46s (21.06%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1762|ppo_ep: 1|act_loss: 0.0191802978515625|cri_loss: 0.00983428955078125|unsuper_loss: 0.0 +average reward score: 4.02734375 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.41%) |Training time=0.46s (20.24%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.19 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1763|ppo_ep: 1|act_loss: -0.0606689453125|cri_loss: -0.0264739990234375|unsuper_loss: 0.0 +average reward score: 3.58203125 +------------------------------------------------------------------------------------- +|E2E latency=2.74s |Gather latency=0.00s (0.00%) |Generate time=1.60s (58.39%) |Training time=0.46s (16.65%) |Others=0.68 (24.95%)|CurSamplesPerSec=11.68 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1764|ppo_ep: 1|act_loss: 0.034393310546875|cri_loss: 0.01776123046875|unsuper_loss: 0.0 +average reward score: 4.23046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.27%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1765|ppo_ep: 1|act_loss: -0.0011138916015625|cri_loss: -0.00027751922607421875|unsuper_loss: 0.0 +average reward score: 4.03125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.11%) |Training time=0.46s (21.33%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1766|ppo_ep: 1|act_loss: -0.0205230712890625|cri_loss: -0.0092010498046875|unsuper_loss: 0.0 +average reward score: 4.3046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.46s (21.15%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1767|ppo_ep: 1|act_loss: -0.01461029052734375|cri_loss: -0.00675201416015625|unsuper_loss: 0.0 +average reward score: 3.173828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.22%) |Training time=0.46s (21.18%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1768|ppo_ep: 1|act_loss: -0.00934600830078125|cri_loss: -0.004116058349609375|unsuper_loss: 0.0 +average reward score: 3.951171875 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.92%) |Training time=0.46s (19.85%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.75 |AvgSamplesPerSec=14.41 +[2023-04-14 09:52:54,244] [INFO] [logging.py:96:log_dist] [Rank 0] step=1770, skipped=22, lr=[8.710962936051119e-06, 8.710962936051119e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:52:54,263] [INFO] [timer.py:199:stop] epoch=0/micro_step=1770/global_step=1770, RunningAvgSamplesPerSec=107.78735625931982, CurrSamplesPerSec=109.66692295813189, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:52:54,355] [INFO] [logging.py:96:log_dist] [Rank 0] step=1770, skipped=24, lr=[4.514592757189707e-06, 4.514592757189707e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1769|ppo_ep: 1|act_loss: -0.000946044921875|cri_loss: 0.00171661376953125|unsuper_loss: 0.0 +average reward score: 4.08984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.31%) |Training time=0.45s (21.14%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1770|ppo_ep: 1|act_loss: -0.017913818359375|cri_loss: -0.0079498291015625|unsuper_loss: 0.0 +average reward score: 4.16796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.25%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1771|ppo_ep: 1|act_loss: 0.0276641845703125|cri_loss: 0.0149993896484375|unsuper_loss: 0.0 +average reward score: 4.296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.97%) |Training time=0.46s (21.46%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1772|ppo_ep: 1|act_loss: 0.02630615234375|cri_loss: 0.014068603515625|unsuper_loss: 0.0 +average reward score: 4.80078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.10%) |Training time=0.46s (21.34%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1773|ppo_ep: 1|act_loss: 0.0014629364013671875|cri_loss: 0.0013713836669921875|unsuper_loss: 0.0 +average reward score: 5.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.88%) |Training time=0.47s (21.56%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1774|ppo_ep: 1|act_loss: -0.009490966796875|cri_loss: -0.004062652587890625|unsuper_loss: 0.0 +average reward score: 4.27734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.46s (21.17%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1775|ppo_ep: 1|act_loss: -0.06005859375|cri_loss: -0.0272369384765625|unsuper_loss: 0.0 +average reward score: 3.927734375 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.71%) |Training time=0.44s (20.68%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1776|ppo_ep: 1|act_loss: -0.0263824462890625|cri_loss: -0.0122528076171875|unsuper_loss: 0.0 +average reward score: 4.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.64%) |Training time=0.44s (20.76%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1777|ppo_ep: 1|act_loss: 0.0029964447021484375|cri_loss: 0.0020046234130859375|unsuper_loss: 0.0 +average reward score: 4.97265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.51%) |Training time=0.45s (20.83%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1778|ppo_ep: 1|act_loss: 0.005260467529296875|cri_loss: 0.003143310546875|unsuper_loss: 0.0 +average reward score: 4.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.32%) |Training time=0.45s (21.07%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.42 +[2023-04-14 09:53:15,699] [INFO] [logging.py:96:log_dist] [Rank 0] step=1780, skipped=22, lr=[8.699922975926139e-06, 8.699922975926139e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:53:15,718] [INFO] [timer.py:199:stop] epoch=0/micro_step=1780/global_step=1780, RunningAvgSamplesPerSec=107.80113421242028, CurrSamplesPerSec=113.88078375605919, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:53:15,810] [INFO] [logging.py:96:log_dist] [Rank 0] step=1780, skipped=24, lr=[4.508878532671684e-06, 4.508878532671684e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1779|ppo_ep: 1|act_loss: 0.04669189453125|cri_loss: 0.02532958984375|unsuper_loss: 0.0 +average reward score: 4.890625 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.64%) |Training time=0.44s (20.78%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1780|ppo_ep: 1|act_loss: 0.0433349609375|cri_loss: 0.0230712890625|unsuper_loss: 0.0 +average reward score: 4.015625 +------------------------------------------------------------------------------------- +|E2E latency=2.47s |Gather latency=0.00s (0.00%) |Generate time=1.59s (64.40%) |Training time=0.46s (18.43%) |Others=0.42 (17.17%)|CurSamplesPerSec=12.95 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1781|ppo_ep: 1|act_loss: 0.03173828125|cri_loss: 0.018829345703125|unsuper_loss: 0.0 +average reward score: 4.25 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.41%) |Training time=0.45s (21.02%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1782|ppo_ep: 1|act_loss: 0.00551605224609375|cri_loss: 0.002964019775390625|unsuper_loss: 0.0 +average reward score: 4.20703125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.67%) |Training time=0.44s (20.74%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1783|ppo_ep: 1|act_loss: 0.0203857421875|cri_loss: 0.01043701171875|unsuper_loss: 0.0 +average reward score: 4.46484375 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.85%) |Training time=0.46s (20.52%) |Others=0.17 (7.63%)|CurSamplesPerSec=14.42 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1784|ppo_ep: 1|act_loss: -0.0440673828125|cri_loss: -0.020599365234375|unsuper_loss: 0.0 +average reward score: 3.3828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.97%) |Training time=0.44s (20.32%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1785|ppo_ep: 1|act_loss: -0.0390625|cri_loss: -0.0182037353515625|unsuper_loss: 0.0 +average reward score: 4.4453125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.70%) |Training time=0.44s (20.71%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1786|ppo_ep: 1|act_loss: -0.032318115234375|cri_loss: -0.01308441162109375|unsuper_loss: 0.0 +average reward score: 4.69921875 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.06%) |Training time=0.43s (20.33%) |Others=0.10 (4.61%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1787|ppo_ep: 1|act_loss: 0.0012836456298828125|cri_loss: 0.0012454986572265625|unsuper_loss: 0.0 +average reward score: 4.52734375 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.93%) |Training time=0.44s (20.46%) |Others=0.10 (4.61%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1788|ppo_ep: 1|act_loss: -0.00051116943359375|cri_loss: 0.0003037452697753906|unsuper_loss: 0.0 +average reward score: 4.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.02%) |Training time=0.44s (20.38%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.42 +[2023-04-14 09:53:37,537] [INFO] [logging.py:96:log_dist] [Rank 0] step=1790, skipped=22, lr=[8.688825580094914e-06, 8.688825580094914e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:53:37,556] [INFO] [timer.py:199:stop] epoch=0/micro_step=1790/global_step=1790, RunningAvgSamplesPerSec=107.830801602799, CurrSamplesPerSec=110.82939976714037, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:53:37,649] [INFO] [logging.py:96:log_dist] [Rank 0] step=1790, skipped=24, lr=[4.503134531727652e-06, 4.503134531727652e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1789|ppo_ep: 1|act_loss: 0.0289306640625|cri_loss: 0.01531219482421875|unsuper_loss: 0.0 +average reward score: 4.75 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.40%) |Training time=0.45s (21.00%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1790|ppo_ep: 1|act_loss: 0.0858154296875|cri_loss: 0.04718017578125|unsuper_loss: 0.0 +average reward score: 4.03125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.19%) |Training time=0.46s (21.26%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1791|ppo_ep: 1|act_loss: 0.1064453125|cri_loss: 0.05609130859375|unsuper_loss: 0.0 +average reward score: 4.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.51%) |Training time=0.49s (22.04%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1792|ppo_ep: 1|act_loss: 0.0220184326171875|cri_loss: 0.01183319091796875|unsuper_loss: 0.0 +average reward score: 4.40625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.46s (21.16%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1793|ppo_ep: 1|act_loss: -0.00402069091796875|cri_loss: -0.001773834228515625|unsuper_loss: 0.0 +average reward score: 4.29296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.01%) |Training time=0.46s (21.42%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1794|ppo_ep: 1|act_loss: -0.00018310546875|cri_loss: 0.0029506683349609375|unsuper_loss: 0.0 +average reward score: 3.982421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.47s (21.61%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1795|ppo_ep: 1|act_loss: -0.01824951171875|cri_loss: -0.00859832763671875|unsuper_loss: 0.0 +average reward score: 4.01953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.94%) |Training time=0.46s (21.49%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1796|ppo_ep: 1|act_loss: 0.038330078125|cri_loss: 0.0207977294921875|unsuper_loss: 0.0 +average reward score: 4.4296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.47s (21.62%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1797|ppo_ep: 1|act_loss: -0.05828857421875|cri_loss: -0.0277099609375|unsuper_loss: 0.0 +average reward score: 5.10546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.68%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1798|ppo_ep: 1|act_loss: -0.0136566162109375|cri_loss: -0.006031036376953125|unsuper_loss: 0.0 +average reward score: 3.41015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.66%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.42 +[2023-04-14 09:53:59,319] [INFO] [logging.py:96:log_dist] [Rank 0] step=1800, skipped=22, lr=[8.677670913047617e-06, 8.677670913047617e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:53:59,587] [INFO] [timer.py:199:stop] epoch=0/micro_step=1800/global_step=1800, RunningAvgSamplesPerSec=107.77048253976814, CurrSamplesPerSec=57.6945053886944, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:53:59,680] [INFO] [logging.py:96:log_dist] [Rank 0] step=1800, skipped=24, lr=[4.49736083949756e-06, 4.49736083949756e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1799|ppo_ep: 1|act_loss: -0.002838134765625|cri_loss: -0.000732421875|unsuper_loss: 0.0 +average reward score: 4.3203125 +------------------------------------------------------------------------------------- +|E2E latency=2.57s |Gather latency=0.00s (0.00%) |Generate time=1.76s (68.28%) |Training time=0.72s (27.90%) |Others=0.10 (3.82%)|CurSamplesPerSec=12.45 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1800|ppo_ep: 1|act_loss: -0.004962921142578125|cri_loss: -0.002277374267578125|unsuper_loss: 0.0 +average reward score: 5.18359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (22.07%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1801|ppo_ep: 1|act_loss: 0.00911712646484375|cri_loss: 0.00502777099609375|unsuper_loss: 0.0 +average reward score: 4.32421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.90%) |Training time=0.46s (21.54%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1802|ppo_ep: 1|act_loss: 0.0194091796875|cri_loss: 0.0101318359375|unsuper_loss: 0.0 +average reward score: 5.296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.67%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1803|ppo_ep: 1|act_loss: -0.004100799560546875|cri_loss: -0.0012693405151367188|unsuper_loss: 0.0 +average reward score: 5.61328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.69%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1804|ppo_ep: 1|act_loss: 0.005481719970703125|cri_loss: 0.00354766845703125|unsuper_loss: 0.0 +average reward score: 4.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.30%) |Training time=0.46s (21.12%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1805|ppo_ep: 1|act_loss: 0.009002685546875|cri_loss: 0.0046844482421875|unsuper_loss: 0.0 +average reward score: 4.77734375 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.60s (67.59%) |Training time=0.46s (19.33%) |Others=0.31 (13.08%)|CurSamplesPerSec=13.55 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1806|ppo_ep: 1|act_loss: -0.016754150390625|cri_loss: -0.00707244873046875|unsuper_loss: 0.0 +average reward score: 4.13671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.89%) |Training time=0.46s (21.54%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1807|ppo_ep: 1|act_loss: -0.021514892578125|cri_loss: -0.01013946533203125|unsuper_loss: 0.0 +average reward score: 3.890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.74%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1808|ppo_ep: 1|act_loss: 0.022613525390625|cri_loss: 0.01165008544921875|unsuper_loss: 0.0 +average reward score: 4.69140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.56%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.42 +[2023-04-14 09:54:21,316] [INFO] [logging.py:96:log_dist] [Rank 0] step=1810, skipped=22, lr=[8.66645914012333e-06, 8.66645914012333e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:54:21,334] [INFO] [timer.py:199:stop] epoch=0/micro_step=1810/global_step=1810, RunningAvgSamplesPerSec=107.75986855875877, CurrSamplesPerSec=105.9602488394859, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:54:21,427] [INFO] [logging.py:96:log_dist] [Rank 0] step=1810, skipped=24, lr=[4.491557541561456e-06, 4.491557541561456e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1809|ppo_ep: 1|act_loss: 0.03271484375|cri_loss: 0.0170440673828125|unsuper_loss: 0.0 +average reward score: 4.0234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.80%) |Training time=0.46s (21.65%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1810|ppo_ep: 1|act_loss: -0.0049285888671875|cri_loss: -0.002193450927734375|unsuper_loss: 0.0 +average reward score: 4.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.88s |Gather latency=0.00s (0.00%) |Generate time=1.59s (55.11%) |Training time=0.47s (16.32%) |Others=0.82 (28.58%)|CurSamplesPerSec=11.10 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1811|ppo_ep: 1|act_loss: -0.0049896240234375|cri_loss: -0.002170562744140625|unsuper_loss: 0.0 +average reward score: 5.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.39%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1812|ppo_ep: 1|act_loss: -0.0087127685546875|cri_loss: -0.0037517547607421875|unsuper_loss: 0.0 +average reward score: 4.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.81%) |Training time=0.47s (21.34%) |Others=0.15 (6.85%)|CurSamplesPerSec=14.51 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1813|ppo_ep: 1|act_loss: 0.01837158203125|cri_loss: 0.00933837890625|unsuper_loss: 0.0 +average reward score: 5.1875 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.82%) |Training time=0.47s (20.84%) |Others=0.10 (4.34%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1814|ppo_ep: 1|act_loss: -0.0235595703125|cri_loss: -0.01139068603515625|unsuper_loss: 0.0 +average reward score: 4.48828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.05%) |Training time=0.46s (21.39%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1815|ppo_ep: 1|act_loss: -0.002483367919921875|cri_loss: -0.0010623931884765625|unsuper_loss: 0.0 +average reward score: 3.763671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.46s (21.48%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1816|ppo_ep: 1|act_loss: 0.027923583984375|cri_loss: 0.01465606689453125|unsuper_loss: 0.0 +average reward score: 4.19140625 +------------------------------------------------------------------------------------- +|E2E latency=3.37s |Gather latency=0.00s (0.00%) |Generate time=1.59s (47.29%) |Training time=0.45s (13.42%) |Others=1.32 (39.28%)|CurSamplesPerSec=9.49 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1817|ppo_ep: 1|act_loss: 0.03106689453125|cri_loss: 0.0168914794921875|unsuper_loss: 0.0 +average reward score: 3.7109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.38%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1818|ppo_ep: 1|act_loss: -0.027679443359375|cri_loss: -0.01276397705078125|unsuper_loss: 0.0 +average reward score: 4.33984375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.17%) |Training time=0.45s (21.22%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.42 +[2023-04-14 09:54:44,979] [INFO] [logging.py:96:log_dist] [Rank 0] step=1820, skipped=22, lr=[8.65519042750757e-06, 8.65519042750757e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:54:44,998] [INFO] [timer.py:199:stop] epoch=0/micro_step=1820/global_step=1820, RunningAvgSamplesPerSec=107.75628443014152, CurrSamplesPerSec=109.50247245462815, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:54:45,090] [INFO] [logging.py:96:log_dist] [Rank 0] step=1820, skipped=24, lr=[4.485724723938215e-06, 4.485724723938215e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1819|ppo_ep: 1|act_loss: 0.0186767578125|cri_loss: 0.0098114013671875|unsuper_loss: 0.0 +average reward score: 5.24609375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.65%) |Training time=0.45s (20.86%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1820|ppo_ep: 1|act_loss: 0.01434326171875|cri_loss: 0.007396697998046875|unsuper_loss: 0.0 +average reward score: 4.30078125 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.68s (74.57%) |Training time=0.47s (21.07%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1821|ppo_ep: 1|act_loss: 0.00457763671875|cri_loss: 0.0029048919677734375|unsuper_loss: 0.0 +average reward score: 4.6875 +------------------------------------------------------------------------------------- +|E2E latency=2.78s |Gather latency=0.00s (0.00%) |Generate time=1.58s (56.71%) |Training time=0.47s (17.01%) |Others=0.73 (26.28%)|CurSamplesPerSec=11.49 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1822|ppo_ep: 1|act_loss: -0.027496337890625|cri_loss: -0.012420654296875|unsuper_loss: 0.0 +average reward score: 4.42578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.26%) |Training time=0.48s (22.18%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1823|ppo_ep: 1|act_loss: -0.0218505859375|cri_loss: -0.0094757080078125|unsuper_loss: 0.0 +average reward score: 4.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.48%) |Training time=0.47s (21.95%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1824|ppo_ep: 1|act_loss: -0.0355224609375|cri_loss: -0.015899658203125|unsuper_loss: 0.0 +average reward score: 4.5234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.19%) |Training time=0.48s (22.25%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1825|ppo_ep: 1|act_loss: -0.0250396728515625|cri_loss: -0.01177978515625|unsuper_loss: 0.0 +average reward score: 4.6953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.57%) |Training time=0.47s (21.84%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1826|ppo_ep: 1|act_loss: -0.0089111328125|cri_loss: -0.0037994384765625|unsuper_loss: 0.0 +average reward score: 4.3671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.48s (22.07%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1827|ppo_ep: 1|act_loss: -0.00405120849609375|cri_loss: -0.000476837158203125|unsuper_loss: 0.0 +average reward score: 5.2734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.33%) |Training time=0.47s (22.10%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1828|ppo_ep: 1|act_loss: 0.00782012939453125|cri_loss: 0.0044708251953125|unsuper_loss: 0.0 +average reward score: 3.943359375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.57s (71.99%) |Training time=0.51s (23.51%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.41 +[2023-04-14 09:55:07,257] [INFO] [logging.py:96:log_dist] [Rank 0] step=1830, skipped=22, lr=[8.643864942229842e-06, 8.643864942229842e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:55:07,276] [INFO] [timer.py:199:stop] epoch=0/micro_step=1830/global_step=1830, RunningAvgSamplesPerSec=107.72570916256083, CurrSamplesPerSec=101.29953304215451, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:55:07,369] [INFO] [logging.py:96:log_dist] [Rank 0] step=1830, skipped=24, lr=[4.479862473084266e-06, 4.479862473084266e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1829|ppo_ep: 1|act_loss: 0.04193115234375|cri_loss: 0.0218658447265625|unsuper_loss: 0.0 +average reward score: 5.3828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.27%) |Training time=0.48s (22.18%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.41 +epoch: 0|step: 1830|ppo_ep: 1|act_loss: -0.0260772705078125|cri_loss: -0.01226806640625|unsuper_loss: 0.0 +average reward score: 4.5859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.31%) |Training time=0.48s (22.12%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.42 +[2023-04-14 09:55:11,655] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 1831|ppo_ep: 1|act_loss: -0.002475738525390625|cri_loss: 0.0001316070556640625|unsuper_loss: 0.0 +average reward score: 4.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.90%) |Training time=0.47s (22.01%) |Others=0.09 (4.10%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.42 +[2023-04-14 09:55:13,796] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 1832|ppo_ep: 1|act_loss: -0.0205841064453125|cri_loss: -0.007701873779296875|unsuper_loss: 0.0 +average reward score: 4.4921875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.71%) |Training time=0.48s (22.19%) |Others=0.09 (4.09%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1833|ppo_ep: 1|act_loss: -0.0280303955078125|cri_loss: -0.0123748779296875|unsuper_loss: 0.0 +average reward score: 3.974609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.79%) |Training time=0.47s (21.64%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1834|ppo_ep: 1|act_loss: 0.022491455078125|cri_loss: 0.01218414306640625|unsuper_loss: 0.0 +average reward score: 4.30859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.07%) |Training time=0.49s (22.39%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1835|ppo_ep: 1|act_loss: 0.027008056640625|cri_loss: 0.014068603515625|unsuper_loss: 0.0 +average reward score: 3.689453125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.00%) |Training time=0.46s (21.40%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1836|ppo_ep: 1|act_loss: 0.0245513916015625|cri_loss: 0.0125579833984375|unsuper_loss: 0.0 +average reward score: 4.1015625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.48%) |Training time=0.47s (21.93%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1837|ppo_ep: 1|act_loss: 0.0110015869140625|cri_loss: 0.00743865966796875|unsuper_loss: 0.0 +average reward score: 4.21875 +------------------------------------------------------------------------------------- +|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.63s (76.87%) |Training time=0.39s (18.40%) |Others=0.10 (4.73%)|CurSamplesPerSec=15.12 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1838|ppo_ep: 1|act_loss: 0.00502777099609375|cri_loss: 0.0027179718017578125|unsuper_loss: 0.0 +average reward score: 4.6328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.86%) |Training time=0.40s (18.48%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.42 +[2023-04-14 09:55:28,958] [INFO] [logging.py:96:log_dist] [Rank 0] step=1840, skipped=22, lr=[8.632482852161159e-06, 8.632482852161159e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:55:28,976] [INFO] [timer.py:199:stop] epoch=0/micro_step=1840/global_step=1840, RunningAvgSamplesPerSec=107.73700504107984, CurrSamplesPerSec=111.84251454093962, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:55:29,068] [INFO] [logging.py:96:log_dist] [Rank 0] step=1840, skipped=26, lr=[4.475151538852264e-06, 4.475151538852264e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1839|ppo_ep: 1|act_loss: -0.0207672119140625|cri_loss: -0.00972747802734375|unsuper_loss: 0.0 +average reward score: 4.3359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.57%) |Training time=0.45s (20.86%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1840|ppo_ep: 1|act_loss: -0.021087646484375|cri_loss: -0.009552001953125|unsuper_loss: 0.0 +average reward score: 3.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (21.04%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1841|ppo_ep: 1|act_loss: -0.023468017578125|cri_loss: -0.010345458984375|unsuper_loss: 0.0 +average reward score: 3.470703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.92%) |Training time=0.44s (20.50%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1842|ppo_ep: 1|act_loss: -0.05645751953125|cri_loss: -0.026336669921875|unsuper_loss: 0.0 +average reward score: 4.5703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.65%) |Training time=0.45s (20.72%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1843|ppo_ep: 1|act_loss: 0.034271240234375|cri_loss: 0.0200042724609375|unsuper_loss: 0.0 +average reward score: 3.44140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.52%) |Training time=0.43s (19.89%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1844|ppo_ep: 1|act_loss: -0.0029296875|cri_loss: -0.0012416839599609375|unsuper_loss: 0.0 +average reward score: 4.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.96%) |Training time=0.44s (18.77%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.79 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1845|ppo_ep: 1|act_loss: 0.023406982421875|cri_loss: 0.01213836669921875|unsuper_loss: 0.0 +average reward score: 4.25390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.21%) |Training time=0.46s (21.22%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1846|ppo_ep: 1|act_loss: -0.0084228515625|cri_loss: -0.002788543701171875|unsuper_loss: 0.0 +average reward score: 4.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.31%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1847|ppo_ep: 1|act_loss: -0.06622314453125|cri_loss: -0.0291748046875|unsuper_loss: 0.0 +average reward score: 4.046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.35%) |Training time=0.45s (21.07%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1848|ppo_ep: 1|act_loss: 0.03302001953125|cri_loss: 0.017364501953125|unsuper_loss: 0.0 +average reward score: 3.33984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.46s (21.30%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42 +[2023-04-14 09:55:50,807] [INFO] [logging.py:96:log_dist] [Rank 0] step=1850, skipped=22, lr=[8.621044326011558e-06, 8.621044326011558e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:55:50,825] [INFO] [timer.py:199:stop] epoch=0/micro_step=1850/global_step=1850, RunningAvgSamplesPerSec=107.7581995961723, CurrSamplesPerSec=109.30806060828172, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:55:50,918] [INFO] [logging.py:96:log_dist] [Rank 0] step=1850, skipped=26, lr=[4.469236527444994e-06, 4.469236527444994e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1849|ppo_ep: 1|act_loss: -0.00634765625|cri_loss: 0.00135040283203125|unsuper_loss: 0.0 +average reward score: 3.236328125 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.73s (75.76%) |Training time=0.46s (19.95%) |Others=0.10 (4.29%)|CurSamplesPerSec=14.01 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1850|ppo_ep: 1|act_loss: 0.01006317138671875|cri_loss: 0.010528564453125|unsuper_loss: 0.0 +average reward score: 3.015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.64%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1851|ppo_ep: 1|act_loss: 0.032867431640625|cri_loss: 0.0192413330078125|unsuper_loss: 0.0 +average reward score: 3.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.28%) |Training time=0.45s (21.15%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1852|ppo_ep: 1|act_loss: 0.02581787109375|cri_loss: 0.0165252685546875|unsuper_loss: 0.0 +average reward score: 3.73046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.50%) |Training time=0.45s (20.93%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1853|ppo_ep: 1|act_loss: -0.04046630859375|cri_loss: -0.01678466796875|unsuper_loss: 0.0 +average reward score: 3.263671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.07%) |Training time=0.46s (21.36%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1854|ppo_ep: 1|act_loss: -0.1556396484375|cri_loss: -0.061798095703125|unsuper_loss: 0.0 +average reward score: 3.73046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.26%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1855|ppo_ep: 1|act_loss: 0.0204010009765625|cri_loss: 0.01122283935546875|unsuper_loss: 0.0 +average reward score: 3.962890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.13%) |Training time=0.46s (21.30%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1856|ppo_ep: 1|act_loss: 0.038818359375|cri_loss: 0.0240478515625|unsuper_loss: 0.0 +average reward score: 2.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.04%) |Training time=0.46s (21.40%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1857|ppo_ep: 1|act_loss: -0.041259765625|cri_loss: -0.0198516845703125|unsuper_loss: 0.0 +average reward score: 4.76953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.49%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1858|ppo_ep: 1|act_loss: 0.1640625|cri_loss: 0.1064453125|unsuper_loss: 0.0 +average reward score: 3.146484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.56%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.42 +[2023-04-14 09:56:12,515] [INFO] [logging.py:96:log_dist] [Rank 0] step=1860, skipped=22, lr=[8.609549533327585e-06, 8.609549533327585e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:56:12,534] [INFO] [timer.py:199:stop] epoch=0/micro_step=1860/global_step=1860, RunningAvgSamplesPerSec=107.75624465773492, CurrSamplesPerSec=104.92407924083149, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:56:12,628] [INFO] [logging.py:96:log_dist] [Rank 0] step=1860, skipped=26, lr=[4.463292327201862e-06, 4.463292327201862e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1859|ppo_ep: 1|act_loss: -0.017120361328125|cri_loss: -0.00815582275390625|unsuper_loss: 0.0 +average reward score: 3.7578125 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.57%) |Training time=0.47s (20.21%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.77 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1860|ppo_ep: 1|act_loss: 0.0186614990234375|cri_loss: 0.00991058349609375|unsuper_loss: 0.0 +average reward score: 3.82421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.74%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1861|ppo_ep: 1|act_loss: 0.031982421875|cri_loss: 0.0178070068359375|unsuper_loss: 0.0 +average reward score: 4.62109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.83%) |Training time=0.46s (21.60%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1862|ppo_ep: 1|act_loss: 0.03680419921875|cri_loss: 0.020660400390625|unsuper_loss: 0.0 +average reward score: 4.64453125 +------------------------------------------------------------------------------------- +|E2E latency=2.58s |Gather latency=0.00s (0.00%) |Generate time=1.58s (61.54%) |Training time=0.47s (18.13%) |Others=0.52 (20.32%)|CurSamplesPerSec=12.43 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1863|ppo_ep: 1|act_loss: 0.053924560546875|cri_loss: 0.027862548828125|unsuper_loss: 0.0 +average reward score: 5.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.88%) |Training time=0.47s (21.56%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42 +[2023-04-14 09:56:23,836] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384 +epoch: 0|step: 1864|ppo_ep: 1|act_loss: 0.0142364501953125|cri_loss: 0.0084381103515625|unsuper_loss: 0.0 +average reward score: 4.68359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.09%) |Training time=0.47s (21.75%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1865|ppo_ep: 1|act_loss: 0.005218505859375|cri_loss: 0.003391265869140625|unsuper_loss: 0.0 +average reward score: 4.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.47s (21.80%) |Others=0.11 (5.13%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1866|ppo_ep: 1|act_loss: 0.020599365234375|cri_loss: 0.01151275634765625|unsuper_loss: 0.0 +average reward score: 4.07421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.67%) |Training time=0.47s (21.80%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1867|ppo_ep: 1|act_loss: -0.00788116455078125|cri_loss: -0.00058746337890625|unsuper_loss: 0.0 +average reward score: 4.55078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.98%) |Training time=0.46s (21.45%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1868|ppo_ep: 1|act_loss: 0.010833740234375|cri_loss: 0.00745391845703125|unsuper_loss: 0.0 +average reward score: 4.60546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.98%) |Training time=0.46s (21.45%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42 +[2023-04-14 09:56:34,515] [INFO] [logging.py:96:log_dist] [Rank 0] step=1870, skipped=22, lr=[8.597998644489801e-06, 8.597998644489801e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:56:34,533] [INFO] [timer.py:199:stop] epoch=0/micro_step=1870/global_step=1870, RunningAvgSamplesPerSec=107.7423744536757, CurrSamplesPerSec=106.6015344781027, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:56:34,626] [INFO] [logging.py:96:log_dist] [Rank 0] step=1870, skipped=27, lr=[4.45791766334022e-06, 4.45791766334022e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1869|ppo_ep: 1|act_loss: -0.01727294921875|cri_loss: -0.00601959228515625|unsuper_loss: 0.0 +average reward score: 4.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.46%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1870|ppo_ep: 1|act_loss: -0.072021484375|cri_loss: -0.029998779296875|unsuper_loss: 0.0 +average reward score: 5.0234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.26%) |Training time=0.46s (21.18%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1871|ppo_ep: 1|act_loss: 0.0263671875|cri_loss: 0.014129638671875|unsuper_loss: 0.0 +average reward score: 4.63671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.68%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1872|ppo_ep: 1|act_loss: -0.03436279296875|cri_loss: -0.015869140625|unsuper_loss: 0.0 +average reward score: 4.3671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.74%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1873|ppo_ep: 1|act_loss: 0.00041961669921875|cri_loss: 0.001628875732421875|unsuper_loss: 0.0 +average reward score: 4.16796875 +------------------------------------------------------------------------------------- +|E2E latency=2.49s |Gather latency=0.00s (0.00%) |Generate time=1.76s (70.73%) |Training time=0.47s (19.01%) |Others=0.26 (10.27%)|CurSamplesPerSec=12.86 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1874|ppo_ep: 1|act_loss: -0.018707275390625|cri_loss: -0.00836181640625|unsuper_loss: 0.0 +average reward score: 5.48046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.45%) |Training time=0.47s (21.99%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1875|ppo_ep: 1|act_loss: 0.03057861328125|cri_loss: 0.0168304443359375|unsuper_loss: 0.0 +average reward score: 4.69140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.97%) |Training time=0.46s (21.45%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1876|ppo_ep: 1|act_loss: 0.0498046875|cri_loss: 0.0269012451171875|unsuper_loss: 0.0 +average reward score: 4.11328125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.74%) |Training time=0.47s (21.68%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1877|ppo_ep: 1|act_loss: 0.09320068359375|cri_loss: 0.048553466796875|unsuper_loss: 0.0 +average reward score: 4.85546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.56%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1878|ppo_ep: 1|act_loss: -0.0460205078125|cri_loss: -0.02252197265625|unsuper_loss: 0.0 +average reward score: 4.9921875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.47%) |Training time=0.48s (22.04%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.42 +[2023-04-14 09:56:56,563] [INFO] [logging.py:96:log_dist] [Rank 0] step=1880, skipped=22, lr=[8.58639183071024e-06, 8.58639183071024e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:56:56,581] [INFO] [timer.py:199:stop] epoch=0/micro_step=1880/global_step=1880, RunningAvgSamplesPerSec=107.72234280157105, CurrSamplesPerSec=100.53431047965461, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:56:56,674] [INFO] [logging.py:96:log_dist] [Rank 0] step=1880, skipped=27, lr=[4.451918247401336e-06, 4.451918247401336e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1879|ppo_ep: 1|act_loss: -0.04168701171875|cri_loss: -0.0201873779296875|unsuper_loss: 0.0 +average reward score: 4.00390625 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.57%) |Training time=0.48s (21.13%) |Others=0.10 (4.30%)|CurSamplesPerSec=14.06 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1880|ppo_ep: 1|act_loss: -0.01303863525390625|cri_loss: -0.0061492919921875|unsuper_loss: 0.0 +average reward score: 4.73828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.85%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1881|ppo_ep: 1|act_loss: 0.073974609375|cri_loss: 0.039947509765625|unsuper_loss: 0.0 +average reward score: 4.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.98%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1882|ppo_ep: 1|act_loss: 0.02935791015625|cri_loss: 0.019622802734375|unsuper_loss: 0.0 +average reward score: 4.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.50%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1883|ppo_ep: 1|act_loss: 0.012115478515625|cri_loss: 0.00738525390625|unsuper_loss: 0.0 +average reward score: 4.953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.53%) |Training time=0.47s (21.91%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1884|ppo_ep: 1|act_loss: 0.0086669921875|cri_loss: 0.00626373291015625|unsuper_loss: 0.0 +average reward score: 4.87109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.56%) |Training time=0.47s (21.88%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1885|ppo_ep: 1|act_loss: -0.06195068359375|cri_loss: -0.027618408203125|unsuper_loss: 0.0 +average reward score: 4.78515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.91%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1886|ppo_ep: 1|act_loss: -0.0176544189453125|cri_loss: -0.006420135498046875|unsuper_loss: 0.0 +average reward score: 4.3359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.70%) |Training time=0.47s (21.72%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1887|ppo_ep: 1|act_loss: -0.0256195068359375|cri_loss: -0.010986328125|unsuper_loss: 0.0 +average reward score: 4.16796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.55%) |Training time=0.47s (21.89%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1888|ppo_ep: 1|act_loss: 0.05511474609375|cri_loss: 0.02960205078125|unsuper_loss: 0.0 +average reward score: 4.63671875 +------------------------------------------------------------------------------------- +|E2E latency=2.82s |Gather latency=0.00s (0.00%) |Generate time=1.76s (62.23%) |Training time=0.47s (16.67%) |Others=0.60 (21.09%)|CurSamplesPerSec=11.34 |AvgSamplesPerSec=14.42 +[2023-04-14 09:57:18,761] [INFO] [logging.py:96:log_dist] [Rank 0] step=1890, skipped=22, lr=[8.574729264029886e-06, 8.574729264029886e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:57:18,779] [INFO] [timer.py:199:stop] epoch=0/micro_step=1890/global_step=1890, RunningAvgSamplesPerSec=107.70136193271539, CurrSamplesPerSec=103.31760270160629, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:57:18,872] [INFO] [logging.py:96:log_dist] [Rank 0] step=1890, skipped=27, lr=[4.44588989932528e-06, 4.44588989932528e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1889|ppo_ep: 1|act_loss: 0.056121826171875|cri_loss: 0.02935791015625|unsuper_loss: 0.0 +average reward score: 4.796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.42%) |Training time=0.47s (22.00%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1890|ppo_ep: 1|act_loss: -0.0290985107421875|cri_loss: -0.0125274658203125|unsuper_loss: 0.0 +average reward score: 4.3515625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.13%) |Training time=0.48s (22.31%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1891|ppo_ep: 1|act_loss: 0.17236328125|cri_loss: 0.0926513671875|unsuper_loss: 0.0 +average reward score: 4.29296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.25%) |Training time=0.48s (22.18%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1892|ppo_ep: 1|act_loss: 0.011810302734375|cri_loss: 0.00740814208984375|unsuper_loss: 0.0 +average reward score: 4.796875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.65%) |Training time=0.47s (21.77%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1893|ppo_ep: 1|act_loss: 0.0084075927734375|cri_loss: 0.00481414794921875|unsuper_loss: 0.0 +average reward score: 4.796875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.18%) |Training time=0.49s (22.25%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1894|ppo_ep: 1|act_loss: 0.087890625|cri_loss: 0.047149658203125|unsuper_loss: 0.0 +average reward score: 4.5859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (22.07%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1895|ppo_ep: 1|act_loss: -0.0259246826171875|cri_loss: -0.012298583984375|unsuper_loss: 0.0 +average reward score: 5.16015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.15%) |Training time=0.48s (22.32%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1896|ppo_ep: 1|act_loss: -0.013397216796875|cri_loss: -0.00591278076171875|unsuper_loss: 0.0 +average reward score: 5.09765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.78%) |Training time=0.49s (22.68%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1897|ppo_ep: 1|act_loss: -0.08074951171875|cri_loss: -0.038360595703125|unsuper_loss: 0.0 +average reward score: 5.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.75%) |Training time=0.49s (22.68%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1898|ppo_ep: 1|act_loss: -0.0638427734375|cri_loss: -0.029876708984375|unsuper_loss: 0.0 +average reward score: 5.47265625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.53%) |Training time=0.50s (22.97%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.42 +[2023-04-14 09:57:40,428] [INFO] [logging.py:96:log_dist] [Rank 0] step=1900, skipped=22, lr=[8.563011117316109e-06, 8.563011117316109e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:57:40,446] [INFO] [timer.py:199:stop] epoch=0/micro_step=1900/global_step=1900, RunningAvgSamplesPerSec=107.65234667115939, CurrSamplesPerSec=96.84231002618438, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:57:40,796] [INFO] [logging.py:96:log_dist] [Rank 0] step=1900, skipped=27, lr=[4.439832708466712e-06, 4.439832708466712e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1899|ppo_ep: 1|act_loss: -0.0599365234375|cri_loss: -0.029296875|unsuper_loss: 0.0 +average reward score: 4.40234375 +------------------------------------------------------------------------------------- +|E2E latency=2.43s |Gather latency=0.00s (0.00%) |Generate time=1.58s (65.12%) |Training time=0.75s (30.80%) |Others=0.10 (4.09%)|CurSamplesPerSec=13.16 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1900|ppo_ep: 1|act_loss: 0.00838470458984375|cri_loss: 0.00495147705078125|unsuper_loss: 0.0 +average reward score: 5.15625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.84%) |Training time=0.50s (22.66%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1901|ppo_ep: 1|act_loss: 0.01554107666015625|cri_loss: 0.00830841064453125|unsuper_loss: 0.0 +average reward score: 4.25 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.47%) |Training time=0.46s (20.99%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1902|ppo_ep: 1|act_loss: 0.082763671875|cri_loss: 0.04510498046875|unsuper_loss: 0.0 +average reward score: 4.671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.23%) |Training time=0.46s (21.23%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1903|ppo_ep: 1|act_loss: 0.08160400390625|cri_loss: 0.04412841796875|unsuper_loss: 0.0 +average reward score: 4.390625 +------------------------------------------------------------------------------------- +|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.03%) |Training time=0.49s (20.81%) |Others=0.10 (4.16%)|CurSamplesPerSec=13.48 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1904|ppo_ep: 1|act_loss: 0.004364013671875|cri_loss: 0.0028858184814453125|unsuper_loss: 0.0 +average reward score: 4.6640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.16%) |Training time=0.46s (21.26%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1905|ppo_ep: 1|act_loss: 0.008758544921875|cri_loss: 0.005126953125|unsuper_loss: 0.0 +average reward score: 4.859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.83%) |Training time=0.45s (20.64%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1906|ppo_ep: 1|act_loss: -0.04571533203125|cri_loss: -0.0203399658203125|unsuper_loss: 0.0 +average reward score: 4.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.63s (70.66%) |Training time=0.45s (19.45%) |Others=0.23 (9.89%)|CurSamplesPerSec=13.85 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1907|ppo_ep: 1|act_loss: -0.0204315185546875|cri_loss: -0.00977325439453125|unsuper_loss: 0.0 +average reward score: 4.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.66s (74.35%) |Training time=0.47s (21.17%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.35 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1908|ppo_ep: 1|act_loss: -0.0234527587890625|cri_loss: -0.011016845703125|unsuper_loss: 0.0 +average reward score: 4.91015625 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.73s (75.05%) |Training time=0.48s (20.64%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.88 |AvgSamplesPerSec=14.42 +[2023-04-14 09:58:02,983] [INFO] [logging.py:96:log_dist] [Rank 0] step=1910, skipped=22, lr=[8.551237564260112e-06, 8.551237564260112e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:58:03,001] [INFO] [timer.py:199:stop] epoch=0/micro_step=1910/global_step=1910, RunningAvgSamplesPerSec=107.64097658565429, CurrSamplesPerSec=115.11625720882522, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:58:03,094] [INFO] [logging.py:96:log_dist] [Rank 0] step=1910, skipped=27, lr=[4.433746764607812e-06, 4.433746764607812e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1909|ppo_ep: 1|act_loss: -0.018829345703125|cri_loss: -0.00876617431640625|unsuper_loss: 0.0 +average reward score: 4.64453125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.14%) |Training time=0.44s (20.34%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1910|ppo_ep: 1|act_loss: -0.02069091796875|cri_loss: -0.00942230224609375|unsuper_loss: 0.0 +average reward score: 4.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.48%) |Training time=0.48s (22.00%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1911|ppo_ep: 1|act_loss: 0.02587890625|cri_loss: 0.0135345458984375|unsuper_loss: 0.0 +average reward score: 4.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.89%) |Training time=0.47s (21.57%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1912|ppo_ep: 1|act_loss: 0.01739501953125|cri_loss: 0.0093841552734375|unsuper_loss: 0.0 +average reward score: 4.296875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.27%) |Training time=0.48s (22.20%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1913|ppo_ep: 1|act_loss: 0.05316162109375|cri_loss: 0.0273284912109375|unsuper_loss: 0.0 +average reward score: 3.9609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.60%) |Training time=0.47s (21.87%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1914|ppo_ep: 1|act_loss: 0.0217742919921875|cri_loss: 0.01145172119140625|unsuper_loss: 0.0 +average reward score: 4.5546875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.77%) |Training time=0.47s (21.72%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1915|ppo_ep: 1|act_loss: -0.00406646728515625|cri_loss: -0.0015735626220703125|unsuper_loss: 0.0 +average reward score: 4.796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.95%) |Training time=0.47s (21.52%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1916|ppo_ep: 1|act_loss: 0.0229339599609375|cri_loss: 0.01293182373046875|unsuper_loss: 0.0 +average reward score: 4.30859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.86%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1917|ppo_ep: 1|act_loss: -0.03863525390625|cri_loss: -0.01763916015625|unsuper_loss: 0.0 +average reward score: 4.296875 +------------------------------------------------------------------------------------- +|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.17%) |Training time=0.51s (21.66%) |Others=0.10 (4.17%)|CurSamplesPerSec=13.53 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1918|ppo_ep: 1|act_loss: 0.020843505859375|cri_loss: 0.0109405517578125|unsuper_loss: 0.0 +average reward score: 4.45703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.28%) |Training time=0.48s (22.18%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.42 +[2023-04-14 09:58:24,899] [INFO] [logging.py:96:log_dist] [Rank 0] step=1920, skipped=22, lr=[8.539408779374354e-06, 8.539408779374354e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:58:24,917] [INFO] [timer.py:199:stop] epoch=0/micro_step=1920/global_step=1920, RunningAvgSamplesPerSec=107.6049387338173, CurrSamplesPerSec=101.45643165445873, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:58:25,010] [INFO] [logging.py:96:log_dist] [Rank 0] step=1920, skipped=27, lr=[4.427632157956951e-06, 4.427632157956951e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1919|ppo_ep: 1|act_loss: -0.022369384765625|cri_loss: -0.01059722900390625|unsuper_loss: 0.0 +average reward score: 4.63671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (22.10%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1920|ppo_ep: 1|act_loss: -0.0789794921875|cri_loss: -0.0374755859375|unsuper_loss: 0.0 +average reward score: 4.06640625 +------------------------------------------------------------------------------------- +|E2E latency=2.51s |Gather latency=0.00s (0.00%) |Generate time=1.61s (64.09%) |Training time=0.49s (19.47%) |Others=0.41 (16.44%)|CurSamplesPerSec=12.77 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1921|ppo_ep: 1|act_loss: -0.027984619140625|cri_loss: -0.0128326416015625|unsuper_loss: 0.0 +average reward score: 4.7109375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.48s (22.00%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1922|ppo_ep: 1|act_loss: 0.013946533203125|cri_loss: 0.0087738037109375|unsuper_loss: 0.0 +average reward score: 4.6796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.48s (22.03%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1923|ppo_ep: 1|act_loss: 0.020965576171875|cri_loss: 0.011932373046875|unsuper_loss: 0.0 +average reward score: 4.68359375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.60%) |Training time=0.48s (21.88%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1924|ppo_ep: 1|act_loss: 0.0313720703125|cri_loss: 0.021392822265625|unsuper_loss: 0.0 +average reward score: 4.8984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.33%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1925|ppo_ep: 1|act_loss: 0.02484130859375|cri_loss: 0.01285552978515625|unsuper_loss: 0.0 +average reward score: 4.859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.51%) |Training time=0.47s (21.92%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1926|ppo_ep: 1|act_loss: 0.09423828125|cri_loss: 0.052276611328125|unsuper_loss: 0.0 +average reward score: 4.55859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.76%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1927|ppo_ep: 1|act_loss: -0.0212249755859375|cri_loss: -0.0092926025390625|unsuper_loss: 0.0 +average reward score: 4.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (22.03%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1928|ppo_ep: 1|act_loss: -0.0222015380859375|cri_loss: -0.01053619384765625|unsuper_loss: 0.0 +average reward score: 4.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.73%) |Training time=0.47s (21.70%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.42 +[2023-04-14 09:58:46,890] [INFO] [logging.py:96:log_dist] [Rank 0] step=1930, skipped=22, lr=[8.527524937989964e-06, 8.527524937989964e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:58:46,909] [INFO] [timer.py:199:stop] epoch=0/micro_step=1930/global_step=1930, RunningAvgSamplesPerSec=107.57664485953372, CurrSamplesPerSec=101.2196243603908, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:58:47,001] [INFO] [logging.py:96:log_dist] [Rank 0] step=1930, skipped=27, lr=[4.421488979147349e-06, 4.421488979147349e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1929|ppo_ep: 1|act_loss: -0.042266845703125|cri_loss: -0.0194244384765625|unsuper_loss: 0.0 +average reward score: 4.67578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.32%) |Training time=0.48s (22.14%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1930|ppo_ep: 1|act_loss: -0.01319122314453125|cri_loss: -0.00580596923828125|unsuper_loss: 0.0 +average reward score: 4.4921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.64%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1931|ppo_ep: 1|act_loss: -0.0440673828125|cri_loss: -0.019989013671875|unsuper_loss: 0.0 +average reward score: 4.90234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.88%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1932|ppo_ep: 1|act_loss: -0.04925537109375|cri_loss: -0.0204315185546875|unsuper_loss: 0.0 +average reward score: 4.1171875 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.38%) |Training time=0.47s (20.38%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.78 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1933|ppo_ep: 1|act_loss: 0.0242156982421875|cri_loss: 0.01245880126953125|unsuper_loss: 0.0 +average reward score: 4.515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.34%) |Training time=0.48s (22.10%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1934|ppo_ep: 1|act_loss: 0.057586669921875|cri_loss: 0.030181884765625|unsuper_loss: 0.0 +average reward score: 4.71484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.40%) |Training time=0.48s (22.04%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1935|ppo_ep: 1|act_loss: 0.0548095703125|cri_loss: 0.0284881591796875|unsuper_loss: 0.0 +average reward score: 4.77734375 +------------------------------------------------------------------------------------- +|E2E latency=2.07s |Gather latency=0.00s (0.00%) |Generate time=1.58s (76.53%) |Training time=0.39s (18.69%) |Others=0.10 (4.77%)|CurSamplesPerSec=15.49 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1936|ppo_ep: 1|act_loss: 0.04010009765625|cri_loss: 0.02294921875|unsuper_loss: 0.0 +average reward score: 4.83203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.56%) |Training time=0.48s (21.92%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1937|ppo_ep: 1|act_loss: 0.0172882080078125|cri_loss: 0.009521484375|unsuper_loss: 0.0 +average reward score: 4.47265625 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.67%) |Training time=0.47s (20.98%) |Others=0.10 (4.35%)|CurSamplesPerSec=14.16 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1938|ppo_ep: 1|act_loss: -0.036041259765625|cri_loss: -0.0171966552734375|unsuper_loss: 0.0 +average reward score: 4.4453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.35%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.42 +[2023-04-14 09:59:08,767] [INFO] [logging.py:96:log_dist] [Rank 0] step=1940, skipped=22, lr=[8.515586216254137e-06, 8.515586216254137e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:59:08,785] [INFO] [timer.py:199:stop] epoch=0/micro_step=1940/global_step=1940, RunningAvgSamplesPerSec=107.57414139164379, CurrSamplesPerSec=109.40669624006952, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:59:08,878] [INFO] [logging.py:96:log_dist] [Rank 0] step=1940, skipped=27, lr=[4.4153173192357375e-06, 4.4153173192357375e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1939|ppo_ep: 1|act_loss: -0.020263671875|cri_loss: -0.0081634521484375|unsuper_loss: 0.0 +average reward score: 4.7109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.36%) |Training time=0.46s (21.09%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1940|ppo_ep: 1|act_loss: 0.0053558349609375|cri_loss: 0.003795623779296875|unsuper_loss: 0.0 +average reward score: 4.44140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.89%) |Training time=0.46s (21.55%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43 +epoch: 0|step: 1941|ppo_ep: 1|act_loss: -0.0266265869140625|cri_loss: -0.0120849609375|unsuper_loss: 0.0 +average reward score: 4.31640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.85%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43 +epoch: 0|step: 1942|ppo_ep: 1|act_loss: 0.00569915771484375|cri_loss: 0.003177642822265625|unsuper_loss: 0.0 +average reward score: 4.33984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.95%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.43 +epoch: 0|step: 1943|ppo_ep: 1|act_loss: 0.00244903564453125|cri_loss: 0.0018787384033203125|unsuper_loss: 0.0 +average reward score: 4.98828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.48s (22.08%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43 +epoch: 0|step: 1944|ppo_ep: 1|act_loss: 0.02032470703125|cri_loss: 0.01052093505859375|unsuper_loss: 0.0 +average reward score: 4.91015625 +------------------------------------------------------------------------------------- +|E2E latency=2.46s |Gather latency=0.00s (0.00%) |Generate time=1.59s (64.58%) |Training time=0.48s (19.36%) |Others=0.39 (16.06%)|CurSamplesPerSec=13.03 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1945|ppo_ep: 1|act_loss: 0.0038471221923828125|cri_loss: 0.0023193359375|unsuper_loss: 0.0 +average reward score: 4.13671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.22%) |Training time=0.48s (22.21%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.43 +epoch: 0|step: 1946|ppo_ep: 1|act_loss: -0.00514984130859375|cri_loss: -0.002071380615234375|unsuper_loss: 0.0 +average reward score: 4.83984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.42%) |Training time=0.47s (22.02%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43 +epoch: 0|step: 1947|ppo_ep: 1|act_loss: -0.0303497314453125|cri_loss: -0.0145416259765625|unsuper_loss: 0.0 +average reward score: 4.73828125 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.12%) |Training time=0.48s (20.65%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.43 +epoch: 0|step: 1948|ppo_ep: 1|act_loss: -0.0226287841796875|cri_loss: -0.0107574462890625|unsuper_loss: 0.0 +average reward score: 4.5859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.19%) |Training time=0.48s (22.23%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.43 +[2023-04-14 09:59:30,807] [INFO] [logging.py:96:log_dist] [Rank 0] step=1950, skipped=22, lr=[8.50359279112753e-06, 8.50359279112753e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:59:30,826] [INFO] [timer.py:199:stop] epoch=0/micro_step=1950/global_step=1950, RunningAvgSamplesPerSec=107.54580213604925, CurrSamplesPerSec=102.16569360734277, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:59:30,918] [INFO] [logging.py:96:log_dist] [Rank 0] step=1950, skipped=27, lr=[4.4091172697010025e-06, 4.4091172697010025e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1949|ppo_ep: 1|act_loss: -0.0047607421875|cri_loss: -0.001953125|unsuper_loss: 0.0 +average reward score: 4.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.40%) |Training time=0.48s (22.07%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43 +epoch: 0|step: 1950|ppo_ep: 1|act_loss: 0.0032978057861328125|cri_loss: 0.002613067626953125|unsuper_loss: 0.0 +average reward score: 4.453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.29%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.43 +epoch: 0|step: 1951|ppo_ep: 1|act_loss: -0.009979248046875|cri_loss: -0.00467681884765625|unsuper_loss: 0.0 +average reward score: 5.2734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.30%) |Training time=0.48s (22.15%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43 +epoch: 0|step: 1952|ppo_ep: 1|act_loss: 0.033477783203125|cri_loss: 0.017303466796875|unsuper_loss: 0.0 +average reward score: 5.07421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.40%) |Training time=0.48s (22.04%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.43 +epoch: 0|step: 1953|ppo_ep: 1|act_loss: -0.0017824172973632812|cri_loss: -0.000713348388671875|unsuper_loss: 0.0 +average reward score: 4.69921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.85%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43 +epoch: 0|step: 1954|ppo_ep: 1|act_loss: 0.042510986328125|cri_loss: 0.022857666015625|unsuper_loss: 0.0 +average reward score: 4.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.48s (21.96%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.43 +epoch: 0|step: 1955|ppo_ep: 1|act_loss: 0.01194000244140625|cri_loss: 0.0062408447265625|unsuper_loss: 0.0 +average reward score: 4.44140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.03%) |Training time=0.48s (22.42%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.43 +epoch: 0|step: 1956|ppo_ep: 1|act_loss: -0.025299072265625|cri_loss: -0.0088348388671875|unsuper_loss: 0.0 +average reward score: 3.568359375 +------------------------------------------------------------------------------------- +|E2E latency=2.89s |Gather latency=0.00s (0.00%) |Generate time=1.58s (54.65%) |Training time=0.48s (16.54%) |Others=0.83 (28.80%)|CurSamplesPerSec=11.08 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1957|ppo_ep: 1|act_loss: -0.0439453125|cri_loss: -0.02105712890625|unsuper_loss: 0.0 +average reward score: 4.96484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.86%) |Training time=0.49s (22.59%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1958|ppo_ep: 1|act_loss: -0.02569580078125|cri_loss: -0.0119781494140625|unsuper_loss: 0.0 +average reward score: 4.5 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.85%) |Training time=0.49s (22.60%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.42 +[2023-04-14 09:59:53,126] [INFO] [logging.py:96:log_dist] [Rank 0] step=1960, skipped=22, lr=[8.491544840381637e-06, 8.491544840381637e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 09:59:53,144] [INFO] [timer.py:199:stop] epoch=0/micro_step=1960/global_step=1960, RunningAvgSamplesPerSec=107.5075454772204, CurrSamplesPerSec=98.59728841847918, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 09:59:53,237] [INFO] [logging.py:96:log_dist] [Rank 0] step=1960, skipped=27, lr=[4.4028889224428365e-06, 4.4028889224428365e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1959|ppo_ep: 1|act_loss: -0.0106353759765625|cri_loss: -0.004428863525390625|unsuper_loss: 0.0 +average reward score: 5.08203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.79%) |Training time=0.49s (22.67%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1960|ppo_ep: 1|act_loss: -0.01139068603515625|cri_loss: -0.005153656005859375|unsuper_loss: 0.0 +average reward score: 4.3515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.68%) |Training time=0.49s (22.77%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.43 +epoch: 0|step: 1961|ppo_ep: 1|act_loss: 0.00475311279296875|cri_loss: 0.0025424957275390625|unsuper_loss: 0.0 +average reward score: 4.65625 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.80%) |Training time=0.49s (20.97%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.79 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1962|ppo_ep: 1|act_loss: 0.01065826416015625|cri_loss: 0.007080078125|unsuper_loss: 0.0 +average reward score: 4.80078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.96%) |Training time=0.49s (22.50%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.43 +epoch: 0|step: 1963|ppo_ep: 1|act_loss: -0.0230560302734375|cri_loss: -0.0100250244140625|unsuper_loss: 0.0 +average reward score: 4.3203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.91%) |Training time=0.49s (22.54%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43 +epoch: 0|step: 1964|ppo_ep: 1|act_loss: -0.007442474365234375|cri_loss: -0.0033111572265625|unsuper_loss: 0.0 +average reward score: 4.89453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.69%) |Training time=0.49s (22.46%) |Others=0.10 (4.85%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.43 +epoch: 0|step: 1965|ppo_ep: 1|act_loss: -0.0241851806640625|cri_loss: -0.0106658935546875|unsuper_loss: 0.0 +average reward score: 4.6171875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.12%) |Training time=0.49s (22.36%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.43 +epoch: 0|step: 1966|ppo_ep: 1|act_loss: 0.024444580078125|cri_loss: 0.0130615234375|unsuper_loss: 0.0 +average reward score: 4.6796875 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.67s (74.12%) |Training time=0.49s (21.54%) |Others=0.10 (4.35%)|CurSamplesPerSec=14.18 |AvgSamplesPerSec=14.43 +epoch: 0|step: 1967|ppo_ep: 1|act_loss: 0.037841796875|cri_loss: 0.02105712890625|unsuper_loss: 0.0 +average reward score: 4.70703125 +------------------------------------------------------------------------------------- +|E2E latency=2.87s |Gather latency=0.00s (0.00%) |Generate time=1.57s (54.79%) |Training time=0.49s (17.24%) |Others=0.80 (27.96%)|CurSamplesPerSec=11.16 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1968|ppo_ep: 1|act_loss: 0.021453857421875|cri_loss: 0.01126861572265625|unsuper_loss: 0.0 +average reward score: 4.43359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.02%) |Training time=0.47s (21.43%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.42 +[2023-04-14 10:00:15,737] [INFO] [logging.py:96:log_dist] [Rank 0] step=1970, skipped=22, lr=[8.479442542596152e-06, 8.479442542596152e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:00:15,755] [INFO] [timer.py:199:stop] epoch=0/micro_step=1970/global_step=1970, RunningAvgSamplesPerSec=107.4582436204539, CurrSamplesPerSec=94.59606963662851, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:00:15,847] [INFO] [logging.py:96:log_dist] [Rank 0] step=1970, skipped=27, lr=[4.39663236978037e-06, 4.39663236978037e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1969|ppo_ep: 1|act_loss: 0.04022216796875|cri_loss: 0.022247314453125|unsuper_loss: 0.0 +average reward score: 4.65234375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.56%) |Training time=0.50s (22.96%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1970|ppo_ep: 1|act_loss: 0.002838134765625|cri_loss: 0.002742767333984375|unsuper_loss: 0.0 +average reward score: 4.7578125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.26%) |Training time=0.51s (23.23%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1971|ppo_ep: 1|act_loss: 0.0199432373046875|cri_loss: 0.0103759765625|unsuper_loss: 0.0 +average reward score: 4.5 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.22%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1972|ppo_ep: 1|act_loss: 0.04071044921875|cri_loss: 0.0208740234375|unsuper_loss: 0.0 +average reward score: 4.66015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.94%) |Training time=0.49s (22.53%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1973|ppo_ep: 1|act_loss: -0.02496337890625|cri_loss: -0.011505126953125|unsuper_loss: 0.0 +average reward score: 4.25 +------------------------------------------------------------------------------------- +|E2E latency=2.51s |Gather latency=0.00s (0.00%) |Generate time=1.58s (62.97%) |Training time=0.49s (19.54%) |Others=0.44 (17.49%)|CurSamplesPerSec=12.73 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1974|ppo_ep: 1|act_loss: -0.0089111328125|cri_loss: -0.00240325927734375|unsuper_loss: 0.0 +average reward score: 4.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.12%) |Training time=0.48s (22.33%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1975|ppo_ep: 1|act_loss: 0.035888671875|cri_loss: 0.019439697265625|unsuper_loss: 0.0 +average reward score: 4.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.63%) |Training time=0.50s (22.83%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1976|ppo_ep: 1|act_loss: -0.0299835205078125|cri_loss: -0.0144195556640625|unsuper_loss: 0.0 +average reward score: 4.828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.02%) |Training time=0.48s (22.41%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1977|ppo_ep: 1|act_loss: 0.12469482421875|cri_loss: 0.06622314453125|unsuper_loss: 0.0 +average reward score: 4.9140625 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.34%) |Training time=0.51s (21.49%) |Others=0.10 (4.17%)|CurSamplesPerSec=13.59 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1978|ppo_ep: 1|act_loss: 0.00582122802734375|cri_loss: 0.00420379638671875|unsuper_loss: 0.0 +average reward score: 4.8125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.79%) |Training time=0.49s (22.67%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.42 +[2023-04-14 10:00:37,960] [INFO] [logging.py:96:log_dist] [Rank 0] step=1980, skipped=22, lr=[8.467286077156324e-06, 8.467286077156324e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:00:37,978] [INFO] [timer.py:199:stop] epoch=0/micro_step=1980/global_step=1980, RunningAvgSamplesPerSec=107.40380143627976, CurrSamplesPerSec=102.09924531086331, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:00:38,071] [INFO] [logging.py:96:log_dist] [Rank 0] step=1980, skipped=27, lr=[4.3903477044508066e-06, 4.3903477044508066e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1979|ppo_ep: 1|act_loss: -0.00296783447265625|cri_loss: -0.0011501312255859375|unsuper_loss: 0.0 +average reward score: 4.5234375 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.59s (69.84%) |Training time=0.48s (20.97%) |Others=0.21 (9.19%)|CurSamplesPerSec=14.10 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1980|ppo_ep: 1|act_loss: -0.04632568359375|cri_loss: -0.021453857421875|unsuper_loss: 0.0 +average reward score: 4.54296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.48s (22.06%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1981|ppo_ep: 1|act_loss: 0.01451873779296875|cri_loss: 0.0079803466796875|unsuper_loss: 0.0 +average reward score: 4.2109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.48s (22.09%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1982|ppo_ep: 1|act_loss: -0.0458984375|cri_loss: -0.02252197265625|unsuper_loss: 0.0 +average reward score: 4.80859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.48s (22.41%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1983|ppo_ep: 1|act_loss: 0.006771087646484375|cri_loss: 0.0038604736328125|unsuper_loss: 0.0 +average reward score: 5.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.57%) |Training time=0.48s (21.90%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1984|ppo_ep: 1|act_loss: 0.028411865234375|cri_loss: 0.0149078369140625|unsuper_loss: 0.0 +average reward score: 4.8515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.30%) |Training time=0.48s (22.15%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1985|ppo_ep: 1|act_loss: -0.0034332275390625|cri_loss: -0.0011224746704101562|unsuper_loss: 0.0 +average reward score: 4.21875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.48s (22.41%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1986|ppo_ep: 1|act_loss: 0.025604248046875|cri_loss: 0.01398468017578125|unsuper_loss: 0.0 +average reward score: 4.76171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.25%) |Training time=0.48s (22.18%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1987|ppo_ep: 1|act_loss: 0.01654052734375|cri_loss: 0.00942230224609375|unsuper_loss: 0.0 +average reward score: 4.671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.24%) |Training time=0.48s (22.20%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1988|ppo_ep: 1|act_loss: -0.04742431640625|cri_loss: -0.0215301513671875|unsuper_loss: 0.0 +average reward score: 5.08984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.08%) |Training time=0.48s (22.36%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.42 +[2023-04-14 10:00:59,653] [INFO] [logging.py:96:log_dist] [Rank 0] step=1990, skipped=22, lr=[8.455075624250293e-06, 8.455075624250293e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:00:59,671] [INFO] [timer.py:199:stop] epoch=0/micro_step=1990/global_step=1990, RunningAvgSamplesPerSec=107.3690434626594, CurrSamplesPerSec=99.88764347649415, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:00:59,764] [INFO] [logging.py:96:log_dist] [Rank 0] step=1990, skipped=27, lr=[4.3840350196080485e-06, 4.3840350196080485e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1989|ppo_ep: 1|act_loss: 0.0305023193359375|cri_loss: 0.017486572265625|unsuper_loss: 0.0 +average reward score: 4.38671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.05%) |Training time=0.48s (22.40%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43 +epoch: 0|step: 1990|ppo_ep: 1|act_loss: 0.03271484375|cri_loss: 0.017822265625|unsuper_loss: 0.0 +average reward score: 4.95703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.24%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.43 +epoch: 0|step: 1991|ppo_ep: 1|act_loss: 0.031494140625|cri_loss: 0.0171051025390625|unsuper_loss: 0.0 +average reward score: 4.25390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.78%) |Training time=0.49s (22.66%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.43 +epoch: 0|step: 1992|ppo_ep: 1|act_loss: -0.0189361572265625|cri_loss: -0.00396728515625|unsuper_loss: 0.0 +average reward score: 4.7265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.89%) |Training time=0.49s (22.55%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.43 +epoch: 0|step: 1993|ppo_ep: 1|act_loss: -0.026123046875|cri_loss: -0.011444091796875|unsuper_loss: 0.0 +average reward score: 4.34765625 +------------------------------------------------------------------------------------- +|E2E latency=2.57s |Gather latency=0.00s (0.00%) |Generate time=1.74s (67.83%) |Training time=0.49s (19.00%) |Others=0.34 (13.16%)|CurSamplesPerSec=12.47 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1994|ppo_ep: 1|act_loss: 0.00157928466796875|cri_loss: 0.0015621185302734375|unsuper_loss: 0.0 +average reward score: 4.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.02%) |Training time=0.49s (22.46%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1995|ppo_ep: 1|act_loss: 0.04425048828125|cri_loss: 0.022979736328125|unsuper_loss: 0.0 +average reward score: 4.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.68s (74.22%) |Training time=0.48s (21.39%) |Others=0.10 (4.39%)|CurSamplesPerSec=14.12 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1996|ppo_ep: 1|act_loss: 0.025238037109375|cri_loss: 0.01305389404296875|unsuper_loss: 0.0 +average reward score: 4.8046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.04%) |Training time=0.48s (22.40%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1997|ppo_ep: 1|act_loss: 0.02642822265625|cri_loss: 0.01448822021484375|unsuper_loss: 0.0 +average reward score: 4.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.11%) |Training time=0.48s (22.33%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.42 +epoch: 0|step: 1998|ppo_ep: 1|act_loss: 0.01488494873046875|cri_loss: 0.0079498291015625|unsuper_loss: 0.0 +average reward score: 4.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.91%) |Training time=0.47s (21.54%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.43 +[2023-04-14 10:01:21,774] [INFO] [logging.py:96:log_dist] [Rank 0] step=2000, skipped=22, lr=[8.442811364866433e-06, 8.442811364866433e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:01:21,793] [INFO] [timer.py:199:stop] epoch=0/micro_step=2000/global_step=2000, RunningAvgSamplesPerSec=107.33059698092995, CurrSamplesPerSec=98.97223170601313, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:01:21,885] [INFO] [logging.py:96:log_dist] [Rank 0] step=2000, skipped=27, lr=[4.3776944088213124e-06, 4.3776944088213124e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 1999|ppo_ep: 1|act_loss: -0.0130462646484375|cri_loss: -0.005878448486328125|unsuper_loss: 0.0 +average reward score: 4.984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.02%) |Training time=0.49s (22.45%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2000|ppo_ep: 1|act_loss: 0.0110015869140625|cri_loss: 0.00705718994140625|unsuper_loss: 0.0 +average reward score: 4.8046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.08%) |Training time=0.48s (22.37%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2001|ppo_ep: 1|act_loss: -0.0264434814453125|cri_loss: -0.012451171875|unsuper_loss: 0.0 +average reward score: 4.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.26%) |Training time=0.48s (22.19%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2002|ppo_ep: 1|act_loss: -0.00392913818359375|cri_loss: 7.2479248046875e-05|unsuper_loss: 0.0 +average reward score: 5.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.20%) |Training time=0.49s (22.28%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2003|ppo_ep: 1|act_loss: -0.0116119384765625|cri_loss: -0.00412750244140625|unsuper_loss: 0.0 +average reward score: 4.71875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.08%) |Training time=0.48s (22.37%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2004|ppo_ep: 1|act_loss: 0.0240478515625|cri_loss: 0.01318359375|unsuper_loss: 0.0 +average reward score: 5.05078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.24%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2005|ppo_ep: 1|act_loss: -0.0154266357421875|cri_loss: -0.005596160888671875|unsuper_loss: 0.0 +average reward score: 5.5 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.97%) |Training time=0.48s (22.49%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2006|ppo_ep: 1|act_loss: -0.040557861328125|cri_loss: -0.0189056396484375|unsuper_loss: 0.0 +average reward score: 5.0 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.83%) |Training time=0.49s (22.62%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.43 +[2023-04-14 10:01:39,335] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 2007|ppo_ep: 1|act_loss: -0.044219970703125|cri_loss: -0.0193023681640625|unsuper_loss: 0.0 +average reward score: 5.51171875 +------------------------------------------------------------------------------------- +|E2E latency=2.43s |Gather latency=0.00s (0.00%) |Generate time=1.63s (67.23%) |Training time=0.59s (24.41%) |Others=0.20 (8.35%)|CurSamplesPerSec=13.17 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2008|ppo_ep: 1|act_loss: -0.019622802734375|cri_loss: -0.0090789794921875|unsuper_loss: 0.0 +average reward score: 5.20703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.03%) |Training time=0.48s (22.41%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43 +[2023-04-14 10:01:43,655] [INFO] [logging.py:96:log_dist] [Rank 0] step=2010, skipped=22, lr=[8.43049348079065e-06, 8.43049348079065e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:01:43,673] [INFO] [timer.py:199:stop] epoch=0/micro_step=2010/global_step=2010, RunningAvgSamplesPerSec=107.27663857336522, CurrSamplesPerSec=97.40335191638643, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:01:43,766] [INFO] [logging.py:96:log_dist] [Rank 0] step=2010, skipped=28, lr=[4.37196406009969e-06, 4.37196406009969e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2009|ppo_ep: 1|act_loss: -0.005809783935546875|cri_loss: -0.000865936279296875|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.76%) |Training time=0.49s (22.70%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2010|ppo_ep: 1|act_loss: -0.00772857666015625|cri_loss: -0.00311279296875|unsuper_loss: 0.0 +average reward score: 5.61328125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.23%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2011|ppo_ep: 1|act_loss: -0.00501251220703125|cri_loss: -0.0017919540405273438|unsuper_loss: 0.0 +average reward score: 5.13671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.82%) |Training time=0.49s (22.63%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2012|ppo_ep: 1|act_loss: 0.0738525390625|cri_loss: 0.038116455078125|unsuper_loss: 0.0 +average reward score: 5.47265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.04%) |Training time=0.48s (22.40%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2013|ppo_ep: 1|act_loss: 0.0192413330078125|cri_loss: 0.0116424560546875|unsuper_loss: 0.0 +average reward score: 4.9765625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.33%) |Training time=0.51s (23.15%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2014|ppo_ep: 1|act_loss: 0.0540771484375|cri_loss: 0.029754638671875|unsuper_loss: 0.0 +average reward score: 5.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.17%) |Training time=0.51s (23.33%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2015|ppo_ep: 1|act_loss: -0.0133514404296875|cri_loss: -0.00567626953125|unsuper_loss: 0.0 +average reward score: 5.25 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.29%) |Training time=0.45s (20.97%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2016|ppo_ep: 1|act_loss: 0.014251708984375|cri_loss: 0.00818634033203125|unsuper_loss: 0.0 +average reward score: 5.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.17%) |Training time=0.42s (19.22%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2017|ppo_ep: 1|act_loss: -0.0249481201171875|cri_loss: -0.0024261474609375|unsuper_loss: 0.0 +average reward score: 5.4453125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.59%) |Training time=0.46s (20.89%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2018|ppo_ep: 1|act_loss: 0.0255126953125|cri_loss: 0.013580322265625|unsuper_loss: 0.0 +average reward score: 5.70703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.35%) |Training time=0.46s (21.09%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.43 +[2023-04-14 10:02:05,348] [INFO] [logging.py:96:log_dist] [Rank 0] step=2020, skipped=22, lr=[8.418122154603703e-06, 8.418122154603703e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:02:05,367] [INFO] [timer.py:199:stop] epoch=0/micro_step=2020/global_step=2020, RunningAvgSamplesPerSec=107.26067687682459, CurrSamplesPerSec=111.90816404803911, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:02:05,459] [INFO] [logging.py:96:log_dist] [Rank 0] step=2020, skipped=28, lr=[4.36557064928396e-06, 4.36557064928396e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2019|ppo_ep: 1|act_loss: 0.04864501953125|cri_loss: 0.0251312255859375|unsuper_loss: 0.0 +average reward score: 5.21875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.70%) |Training time=0.45s (20.75%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2020|ppo_ep: 1|act_loss: -0.04766845703125|cri_loss: -0.0232086181640625|unsuper_loss: 0.0 +average reward score: 5.12890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.46s (21.27%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2021|ppo_ep: 1|act_loss: 0.019683837890625|cri_loss: 0.011077880859375|unsuper_loss: 0.0 +average reward score: 4.96484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.69%) |Training time=0.45s (20.62%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2022|ppo_ep: 1|act_loss: 0.02667236328125|cri_loss: 0.018280029296875|unsuper_loss: 0.0 +average reward score: 5.0859375 +------------------------------------------------------------------------------------- +|E2E latency=2.46s |Gather latency=0.00s (0.00%) |Generate time=1.78s (72.36%) |Training time=0.46s (18.90%) |Others=0.21 (8.74%)|CurSamplesPerSec=13.01 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2023|ppo_ep: 1|act_loss: -0.0203704833984375|cri_loss: -0.00766754150390625|unsuper_loss: 0.0 +average reward score: 4.828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.67%) |Training time=0.47s (21.77%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2024|ppo_ep: 1|act_loss: -0.0394287109375|cri_loss: -0.018646240234375|unsuper_loss: 0.0 +average reward score: 5.32421875 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.62s (72.14%) |Training time=0.53s (23.49%) |Others=0.10 (4.37%)|CurSamplesPerSec=14.25 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2025|ppo_ep: 1|act_loss: 0.00598907470703125|cri_loss: 0.00673675537109375|unsuper_loss: 0.0 +average reward score: 5.546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.39%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2026|ppo_ep: 1|act_loss: -0.01861572265625|cri_loss: -0.0078582763671875|unsuper_loss: 0.0 +average reward score: 5.20703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.08%) |Training time=0.46s (21.32%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2027|ppo_ep: 1|act_loss: 0.064453125|cri_loss: 0.033477783203125|unsuper_loss: 0.0 +average reward score: 5.52734375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.28%) |Training time=0.45s (21.10%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2028|ppo_ep: 1|act_loss: 0.01007080078125|cri_loss: 0.00605010986328125|unsuper_loss: 0.0 +average reward score: 5.69921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.27%) |Training time=0.48s (22.21%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.43 +[2023-04-14 10:02:27,376] [INFO] [logging.py:96:log_dist] [Rank 0] step=2030, skipped=22, lr=[8.405697569678487e-06, 8.405697569678487e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:02:27,394] [INFO] [timer.py:199:stop] epoch=0/micro_step=2030/global_step=2030, RunningAvgSamplesPerSec=107.2414902830951, CurrSamplesPerSec=93.43053466253548, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:02:27,487] [INFO] [logging.py:96:log_dist] [Rank 0] step=2030, skipped=28, lr=[4.3591495862107625e-06, 4.3591495862107625e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2029|ppo_ep: 1|act_loss: 0.07958984375|cri_loss: 0.04119873046875|unsuper_loss: 0.0 +average reward score: 4.9921875 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.56%) |Training time=0.51s (22.99%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2030|ppo_ep: 1|act_loss: 0.0701904296875|cri_loss: 0.036529541015625|unsuper_loss: 0.0 +average reward score: 4.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.47%) |Training time=0.48s (22.00%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2031|ppo_ep: 1|act_loss: 0.03338623046875|cri_loss: 0.017608642578125|unsuper_loss: 0.0 +average reward score: 5.14453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.79%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2032|ppo_ep: 1|act_loss: -0.0008573532104492188|cri_loss: -0.00025463104248046875|unsuper_loss: 0.0 +average reward score: 4.62890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (22.03%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2033|ppo_ep: 1|act_loss: -0.0310211181640625|cri_loss: -0.0144195556640625|unsuper_loss: 0.0 +average reward score: 5.296875 +------------------------------------------------------------------------------------- +|E2E latency=2.08s |Gather latency=0.00s (0.00%) |Generate time=1.59s (76.65%) |Training time=0.39s (18.61%) |Others=0.10 (4.75%)|CurSamplesPerSec=15.42 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2034|ppo_ep: 1|act_loss: -0.03662109375|cri_loss: -0.0159759521484375|unsuper_loss: 0.0 +average reward score: 5.23828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.28%) |Training time=0.48s (22.17%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2035|ppo_ep: 1|act_loss: -0.04473876953125|cri_loss: -0.021026611328125|unsuper_loss: 0.0 +average reward score: 5.50390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.04%) |Training time=0.49s (22.42%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2036|ppo_ep: 1|act_loss: -0.072265625|cri_loss: -0.034637451171875|unsuper_loss: 0.0 +average reward score: 4.8984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.90%) |Training time=0.49s (22.58%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2037|ppo_ep: 1|act_loss: 0.021484375|cri_loss: 0.01324462890625|unsuper_loss: 0.0 +average reward score: 5.16796875 +------------------------------------------------------------------------------------- +|E2E latency=2.94s |Gather latency=0.00s (0.00%) |Generate time=1.58s (53.92%) |Training time=0.59s (20.15%) |Others=0.76 (25.93%)|CurSamplesPerSec=10.90 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2038|ppo_ep: 1|act_loss: 0.164794921875|cri_loss: 0.093994140625|unsuper_loss: 0.0 +average reward score: 4.7421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.92%) |Training time=0.49s (22.54%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.43 +[2023-04-14 10:02:50,207] [INFO] [logging.py:96:log_dist] [Rank 0] step=2040, skipped=22, lr=[8.393219910177327e-06, 8.393219910177327e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:02:50,225] [INFO] [timer.py:199:stop] epoch=0/micro_step=2040/global_step=2040, RunningAvgSamplesPerSec=107.20764921501167, CurrSamplesPerSec=99.54234960114154, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:02:50,317] [INFO] [logging.py:96:log_dist] [Rank 0] step=2040, skipped=28, lr=[4.352700966055743e-06, 4.352700966055743e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2039|ppo_ep: 1|act_loss: -0.006927490234375|cri_loss: -0.003032684326171875|unsuper_loss: 0.0 +average reward score: 5.71875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.12%) |Training time=0.48s (22.36%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2040|ppo_ep: 1|act_loss: 0.061065673828125|cri_loss: 0.032318115234375|unsuper_loss: 0.0 +average reward score: 5.36328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.03%) |Training time=0.49s (22.43%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2041|ppo_ep: 1|act_loss: 0.09600830078125|cri_loss: 0.05084228515625|unsuper_loss: 0.0 +average reward score: 5.33984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.49s (22.41%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2042|ppo_ep: 1|act_loss: 0.067626953125|cri_loss: 0.037261962890625|unsuper_loss: 0.0 +average reward score: 5.03515625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.85%) |Training time=0.49s (22.64%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2043|ppo_ep: 1|act_loss: 0.01378631591796875|cri_loss: 0.0082244873046875|unsuper_loss: 0.0 +average reward score: 5.65625 +------------------------------------------------------------------------------------- +|E2E latency=2.73s |Gather latency=0.00s (0.00%) |Generate time=1.60s (58.73%) |Training time=0.49s (17.81%) |Others=0.64 (23.46%)|CurSamplesPerSec=11.73 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2044|ppo_ep: 1|act_loss: -0.0100250244140625|cri_loss: -0.004634857177734375|unsuper_loss: 0.0 +average reward score: 5.12109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.37%) |Training time=0.47s (22.02%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2045|ppo_ep: 1|act_loss: -0.02764892578125|cri_loss: -0.0125579833984375|unsuper_loss: 0.0 +average reward score: 5.59765625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.45%) |Training time=0.50s (23.05%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2046|ppo_ep: 1|act_loss: -0.019775390625|cri_loss: -0.0078277587890625|unsuper_loss: 0.0 +average reward score: 5.625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.60%) |Training time=0.50s (22.80%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2047|ppo_ep: 1|act_loss: -0.0533447265625|cri_loss: -0.0249481201171875|unsuper_loss: 0.0 +average reward score: 4.87109375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.77%) |Training time=0.50s (22.74%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2048|ppo_ep: 1|act_loss: -0.0435791015625|cri_loss: -0.020751953125|unsuper_loss: 0.0 +average reward score: 5.45703125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.92%) |Training time=0.49s (22.57%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.43 +[2023-04-14 10:03:12,500] [INFO] [logging.py:96:log_dist] [Rank 0] step=2050, skipped=22, lr=[8.380689361049238e-06, 8.380689361049238e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:03:12,724] [INFO] [timer.py:199:stop] epoch=0/micro_step=2050/global_step=2050, RunningAvgSamplesPerSec=107.1224647769258, CurrSamplesPerSec=61.993959389620294, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:03:12,816] [INFO] [logging.py:96:log_dist] [Rank 0] step=2050, skipped=28, lr=[4.34622488440301e-06, 4.34622488440301e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2049|ppo_ep: 1|act_loss: -0.03973388671875|cri_loss: -0.0189056396484375|unsuper_loss: 0.0 +average reward score: 5.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.59s (67.24%) |Training time=0.68s (28.64%) |Others=0.10 (4.13%)|CurSamplesPerSec=13.50 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2050|ppo_ep: 1|act_loss: 0.01849365234375|cri_loss: 0.0101165771484375|unsuper_loss: 0.0 +average reward score: 5.21875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.48s (22.10%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2051|ppo_ep: 1|act_loss: 0.038970947265625|cri_loss: 0.020233154296875|unsuper_loss: 0.0 +average reward score: 5.55859375 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.81%) |Training time=0.49s (21.00%) |Others=0.10 (4.19%)|CurSamplesPerSec=13.65 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2052|ppo_ep: 1|act_loss: 0.0194244384765625|cri_loss: 0.010223388671875|unsuper_loss: 0.0 +average reward score: 4.875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.52%) |Training time=0.48s (21.99%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2053|ppo_ep: 1|act_loss: 0.0357666015625|cri_loss: 0.0200347900390625|unsuper_loss: 0.0 +average reward score: 5.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.60%) |Training time=0.48s (21.05%) |Others=0.10 (4.35%)|CurSamplesPerSec=14.17 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2054|ppo_ep: 1|act_loss: 0.01934814453125|cri_loss: 0.01020050048828125|unsuper_loss: 0.0 +average reward score: 5.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.58s (67.31%) |Training time=0.46s (19.47%) |Others=0.31 (13.22%)|CurSamplesPerSec=13.60 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2055|ppo_ep: 1|act_loss: -0.04913330078125|cri_loss: -0.020721435546875|unsuper_loss: 0.0 +average reward score: 5.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.95%) |Training time=0.46s (21.47%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2056|ppo_ep: 1|act_loss: -0.0174407958984375|cri_loss: -0.0081787109375|unsuper_loss: 0.0 +average reward score: 5.45703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.87%) |Training time=0.47s (21.58%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2057|ppo_ep: 1|act_loss: -0.0110015869140625|cri_loss: -0.004688262939453125|unsuper_loss: 0.0 +average reward score: 5.203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.62%) |Training time=0.47s (21.80%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2058|ppo_ep: 1|act_loss: -0.01165771484375|cri_loss: -0.00550079345703125|unsuper_loss: 0.0 +average reward score: 5.21875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.93%) |Training time=0.46s (21.52%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.43 +[2023-04-14 10:03:34,775] [INFO] [logging.py:96:log_dist] [Rank 0] step=2060, skipped=22, lr=[8.368106108027184e-06, 8.368106108027184e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:03:34,794] [INFO] [timer.py:199:stop] epoch=0/micro_step=2060/global_step=2060, RunningAvgSamplesPerSec=107.10558702661527, CurrSamplesPerSec=106.73938194768076, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:03:34,886] [INFO] [logging.py:96:log_dist] [Rank 0] step=2060, skipped=28, lr=[4.339721437243713e-06, 4.339721437243713e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2059|ppo_ep: 1|act_loss: 0.025543212890625|cri_loss: 0.01482391357421875|unsuper_loss: 0.0 +average reward score: 5.56640625 +------------------------------------------------------------------------------------- +|E2E latency=2.70s |Gather latency=0.00s (0.00%) |Generate time=1.58s (58.77%) |Training time=0.46s (17.16%) |Others=0.65 (24.07%)|CurSamplesPerSec=11.87 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2060|ppo_ep: 1|act_loss: -0.03955078125|cri_loss: -0.0190887451171875|unsuper_loss: 0.0 +average reward score: 4.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.06s |Gather latency=0.00s (0.00%) |Generate time=1.58s (76.50%) |Training time=0.39s (18.73%) |Others=0.10 (4.77%)|CurSamplesPerSec=15.50 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2061|ppo_ep: 1|act_loss: 0.002399444580078125|cri_loss: 0.0017910003662109375|unsuper_loss: 0.0 +average reward score: 5.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.02%) |Training time=0.46s (21.41%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2062|ppo_ep: 1|act_loss: -0.0010166168212890625|cri_loss: -0.0002918243408203125|unsuper_loss: 0.0 +average reward score: 5.12109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.74%) |Training time=0.47s (21.69%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2063|ppo_ep: 1|act_loss: -0.055419921875|cri_loss: -0.0272369384765625|unsuper_loss: 0.0 +average reward score: 5.3046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.47s (21.69%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2064|ppo_ep: 1|act_loss: 0.029693603515625|cri_loss: 0.0154266357421875|unsuper_loss: 0.0 +average reward score: 5.48828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.69%) |Training time=0.47s (21.75%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2065|ppo_ep: 1|act_loss: 0.0760498046875|cri_loss: 0.03997802734375|unsuper_loss: 0.0 +average reward score: 4.6796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.55%) |Training time=0.47s (21.89%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2066|ppo_ep: 1|act_loss: 0.00406646728515625|cri_loss: 0.00244140625|unsuper_loss: 0.0 +average reward score: 5.515625 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.59%) |Training time=0.47s (20.17%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.82 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2067|ppo_ep: 1|act_loss: 0.0028285980224609375|cri_loss: 0.00255584716796875|unsuper_loss: 0.0 +average reward score: 5.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.51%) |Training time=0.47s (21.94%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2068|ppo_ep: 1|act_loss: -0.01499176025390625|cri_loss: -0.0057525634765625|unsuper_loss: 0.0 +average reward score: 5.29296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.46s (21.48%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.43 +[2023-04-14 10:03:56,902] [INFO] [logging.py:96:log_dist] [Rank 0] step=2070, skipped=22, lr=[8.35547033762533e-06, 8.35547033762533e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:03:57,081] [INFO] [timer.py:199:stop] epoch=0/micro_step=2070/global_step=2070, RunningAvgSamplesPerSec=107.08235179869443, CurrSamplesPerSec=68.86106831377825, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:03:57,174] [INFO] [logging.py:96:log_dist] [Rank 0] step=2070, skipped=28, lr=[4.333190720974631e-06, 4.333190720974631e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2069|ppo_ep: 1|act_loss: -0.04583740234375|cri_loss: -0.02099609375|unsuper_loss: 0.0 +average reward score: 5.14453125 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.58s (68.57%) |Training time=0.63s (27.18%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.87 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2070|ppo_ep: 1|act_loss: -0.01213836669921875|cri_loss: -0.00543975830078125|unsuper_loss: 0.0 +average reward score: 5.23828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.49%) |Training time=0.47s (21.95%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2071|ppo_ep: 1|act_loss: 0.02960205078125|cri_loss: 0.0157470703125|unsuper_loss: 0.0 +average reward score: 4.85546875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.27%) |Training time=0.45s (21.16%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2072|ppo_ep: 1|act_loss: -0.00476837158203125|cri_loss: -0.0019512176513671875|unsuper_loss: 0.0 +average reward score: 5.2734375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.39%) |Training time=0.48s (21.88%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2073|ppo_ep: 1|act_loss: 0.07574462890625|cri_loss: 0.039306640625|unsuper_loss: 0.0 +average reward score: 5.42578125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.35%) |Training time=0.48s (22.15%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2074|ppo_ep: 1|act_loss: 0.0472412109375|cri_loss: 0.0240936279296875|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.34%) |Training time=0.48s (22.13%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2075|ppo_ep: 1|act_loss: 0.05096435546875|cri_loss: 0.026123046875|unsuper_loss: 0.0 +average reward score: 4.92578125 +------------------------------------------------------------------------------------- +|E2E latency=2.48s |Gather latency=0.00s (0.00%) |Generate time=1.59s (63.90%) |Training time=0.48s (19.21%) |Others=0.42 (16.89%)|CurSamplesPerSec=12.90 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2076|ppo_ep: 1|act_loss: 0.023681640625|cri_loss: 0.0122833251953125|unsuper_loss: 0.0 +average reward score: 4.8828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.34%) |Training time=0.47s (21.99%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2077|ppo_ep: 1|act_loss: 0.01788330078125|cri_loss: 0.0093536376953125|unsuper_loss: 0.0 +average reward score: 5.49609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.11%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2078|ppo_ep: 1|act_loss: 0.04364013671875|cri_loss: 0.02362060546875|unsuper_loss: 0.0 +average reward score: 4.7578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.13%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.43 +[2023-04-14 10:04:19,005] [INFO] [logging.py:96:log_dist] [Rank 0] step=2080, skipped=22, lr=[8.342782237136277e-06, 8.342782237136277e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:04:19,024] [INFO] [timer.py:199:stop] epoch=0/micro_step=2080/global_step=2080, RunningAvgSamplesPerSec=107.05842882438776, CurrSamplesPerSec=101.66045801757994, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:04:19,116] [INFO] [logging.py:96:log_dist] [Rank 0] step=2080, skipped=28, lr=[4.326632832396733e-06, 4.326632832396733e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2079|ppo_ep: 1|act_loss: -0.033355712890625|cri_loss: -0.0159149169921875|unsuper_loss: 0.0 +average reward score: 5.23046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.33%) |Training time=0.48s (22.14%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2080|ppo_ep: 1|act_loss: -2.86102294921875e-05|cri_loss: 0.0017223358154296875|unsuper_loss: 0.0 +average reward score: 5.50390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.51%) |Training time=0.47s (21.94%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2081|ppo_ep: 1|act_loss: -0.0098876953125|cri_loss: -0.00450897216796875|unsuper_loss: 0.0 +average reward score: 5.62109375 +------------------------------------------------------------------------------------- +|E2E latency=2.85s |Gather latency=0.00s (0.00%) |Generate time=1.76s (61.87%) |Training time=0.49s (17.14%) |Others=0.60 (20.98%)|CurSamplesPerSec=11.23 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2082|ppo_ep: 1|act_loss: -0.01531982421875|cri_loss: -0.007282257080078125|unsuper_loss: 0.0 +average reward score: 5.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.68s (75.09%) |Training time=0.46s (20.52%) |Others=0.10 (4.38%)|CurSamplesPerSec=14.28 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2083|ppo_ep: 1|act_loss: -0.012115478515625|cri_loss: -0.005657196044921875|unsuper_loss: 0.0 +average reward score: 5.5234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.53%) |Training time=0.45s (20.90%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2084|ppo_ep: 1|act_loss: 0.03741455078125|cri_loss: 0.020172119140625|unsuper_loss: 0.0 +average reward score: 5.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.88%) |Training time=0.46s (21.52%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2085|ppo_ep: 1|act_loss: 0.0025177001953125|cri_loss: 0.0016345977783203125|unsuper_loss: 0.0 +average reward score: 5.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.60%) |Training time=0.47s (21.83%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2086|ppo_ep: 1|act_loss: 0.094970703125|cri_loss: 0.0496826171875|unsuper_loss: 0.0 +average reward score: 5.27734375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.93%) |Training time=0.46s (21.46%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2087|ppo_ep: 1|act_loss: -0.020263671875|cri_loss: -0.00921630859375|unsuper_loss: 0.0 +average reward score: 5.11328125 +------------------------------------------------------------------------------------- +|E2E latency=2.47s |Gather latency=0.00s (0.00%) |Generate time=1.60s (64.66%) |Training time=0.46s (18.46%) |Others=0.42 (16.88%)|CurSamplesPerSec=12.95 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2088|ppo_ep: 1|act_loss: 0.0782470703125|cri_loss: 0.0416259765625|unsuper_loss: 0.0 +average reward score: 5.51171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.67%) |Training time=0.47s (21.75%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43 +[2023-04-14 10:04:41,595] [INFO] [logging.py:96:log_dist] [Rank 0] step=2090, skipped=22, lr=[8.330041994628283e-06, 8.330041994628283e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:04:41,613] [INFO] [timer.py:199:stop] epoch=0/micro_step=2090/global_step=2090, RunningAvgSamplesPerSec=107.05361134522774, CurrSamplesPerSec=108.12874805543001, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:04:41,706] [INFO] [logging.py:96:log_dist] [Rank 0] step=2090, skipped=28, lr=[4.320047868713747e-06, 4.320047868713747e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2089|ppo_ep: 1|act_loss: -0.01372528076171875|cri_loss: -0.006328582763671875|unsuper_loss: 0.0 +average reward score: 5.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.99%) |Training time=0.46s (21.44%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2090|ppo_ep: 1|act_loss: 0.005096435546875|cri_loss: 0.00392913818359375|unsuper_loss: 0.0 +average reward score: 5.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.91%) |Training time=0.46s (21.50%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2091|ppo_ep: 1|act_loss: -0.005229949951171875|cri_loss: -0.0017862319946289062|unsuper_loss: 0.0 +average reward score: 5.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.74%) |Training time=0.46s (21.67%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2092|ppo_ep: 1|act_loss: 0.0266876220703125|cri_loss: 0.0213470458984375|unsuper_loss: 0.0 +average reward score: 5.12890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.77%) |Training time=0.46s (21.66%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2093|ppo_ep: 1|act_loss: -0.0106201171875|cri_loss: -0.0045623779296875|unsuper_loss: 0.0 +average reward score: 5.12890625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.85%) |Training time=0.46s (21.55%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2094|ppo_ep: 1|act_loss: 0.040252685546875|cri_loss: 0.025543212890625|unsuper_loss: 0.0 +average reward score: 4.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.12%) |Training time=0.46s (21.29%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2095|ppo_ep: 1|act_loss: -0.0213623046875|cri_loss: -0.00997161865234375|unsuper_loss: 0.0 +average reward score: 4.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.84%) |Training time=0.46s (21.56%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2096|ppo_ep: 1|act_loss: -0.0013408660888671875|cri_loss: -0.0003294944763183594|unsuper_loss: 0.0 +average reward score: 5.49609375 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.76s (76.15%) |Training time=0.45s (19.60%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.85 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2097|ppo_ep: 1|act_loss: 0.02960205078125|cri_loss: 0.0153045654296875|unsuper_loss: 0.0 +average reward score: 5.26171875 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.99%) |Training time=0.46s (21.41%) |Others=0.10 (4.60%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2098|ppo_ep: 1|act_loss: 0.08538818359375|cri_loss: 0.045074462890625|unsuper_loss: 0.0 +average reward score: 5.39453125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.85%) |Training time=0.46s (21.55%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.43 +[2023-04-14 10:05:03,168] [INFO] [logging.py:96:log_dist] [Rank 0] step=2100, skipped=22, lr=[8.317249798942473e-06, 8.317249798942473e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:05:03,186] [INFO] [timer.py:199:stop] epoch=0/micro_step=2100/global_step=2100, RunningAvgSamplesPerSec=107.05548599476938, CurrSamplesPerSec=106.59958715986158, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:05:03,279] [INFO] [logging.py:96:log_dist] [Rank 0] step=2100, skipped=28, lr=[4.313435927530719e-06, 4.313435927530719e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2099|ppo_ep: 1|act_loss: 0.0094146728515625|cri_loss: 0.005313873291015625|unsuper_loss: 0.0 +average reward score: 5.88671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.81%) |Training time=0.46s (21.62%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2100|ppo_ep: 1|act_loss: -0.013214111328125|cri_loss: -0.004150390625|unsuper_loss: 0.0 +average reward score: 4.9921875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.82%) |Training time=0.46s (21.54%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2101|ppo_ep: 1|act_loss: -0.037139892578125|cri_loss: -0.0179595947265625|unsuper_loss: 0.0 +average reward score: 5.01171875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.03%) |Training time=0.46s (21.37%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2102|ppo_ep: 1|act_loss: -0.02099609375|cri_loss: -0.0099945068359375|unsuper_loss: 0.0 +average reward score: 5.3828125 +------------------------------------------------------------------------------------- +|E2E latency=2.46s |Gather latency=0.00s (0.00%) |Generate time=1.61s (65.65%) |Training time=0.44s (17.92%) |Others=0.40 (16.43%)|CurSamplesPerSec=13.02 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2103|ppo_ep: 1|act_loss: 0.01552581787109375|cri_loss: 0.0088958740234375|unsuper_loss: 0.0 +average reward score: 5.41796875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.10%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2104|ppo_ep: 1|act_loss: -0.002620697021484375|cri_loss: -0.0007905960083007812|unsuper_loss: 0.0 +average reward score: 5.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.32%) |Training time=0.45s (21.08%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2105|ppo_ep: 1|act_loss: 0.0304412841796875|cri_loss: 0.0157318115234375|unsuper_loss: 0.0 +average reward score: 5.078125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.51%) |Training time=0.45s (20.89%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2106|ppo_ep: 1|act_loss: 0.0146331787109375|cri_loss: 0.00815582275390625|unsuper_loss: 0.0 +average reward score: 4.8046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.53%) |Training time=0.45s (20.91%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2107|ppo_ep: 1|act_loss: -0.0046539306640625|cri_loss: -0.001953125|unsuper_loss: 0.0 +average reward score: 5.796875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.32%) |Training time=0.45s (21.08%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.43 +[2023-04-14 10:05:22,846] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 2108|ppo_ep: 1|act_loss: -0.006683349609375|cri_loss: -0.002918243408203125|unsuper_loss: 0.0 +average reward score: 5.3046875 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.79%) |Training time=0.45s (21.08%) |Others=0.09 (4.12%)|CurSamplesPerSec=15.04 |AvgSamplesPerSec=14.43 +[2023-04-14 10:05:24,880] [INFO] [logging.py:96:log_dist] [Rank 0] step=2110, skipped=22, lr=[8.30440583969005e-06, 8.30440583969005e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:05:24,898] [INFO] [timer.py:199:stop] epoch=0/micro_step=2110/global_step=2110, RunningAvgSamplesPerSec=107.07153675733814, CurrSamplesPerSec=106.8084394578795, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:05:24,982] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +[2023-04-14 10:05:24,982] [INFO] [logging.py:96:log_dist] [Rank 0] step=2110, skipped=30, lr=[4.308127016630176e-06, 4.308127016630176e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2109|ppo_ep: 1|act_loss: -0.0302276611328125|cri_loss: -0.01470184326171875|unsuper_loss: 0.0 +average reward score: 5.65234375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.46s (21.64%) |Others=0.09 (4.10%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2110|ppo_ep: 1|act_loss: 0.011016845703125|cri_loss: 0.00604248046875|unsuper_loss: 0.0 +average reward score: 5.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.40%) |Training time=0.45s (21.02%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2111|ppo_ep: 1|act_loss: 0.0041656494140625|cri_loss: 0.0021915435791015625|unsuper_loss: 0.0 +average reward score: 4.875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.95%) |Training time=0.45s (20.54%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2112|ppo_ep: 1|act_loss: 0.0007038116455078125|cri_loss: 0.0009160041809082031|unsuper_loss: 0.0 +average reward score: 5.4921875 +------------------------------------------------------------------------------------- +|E2E latency=2.38s |Gather latency=0.00s (0.00%) |Generate time=1.82s (76.67%) |Training time=0.46s (19.18%) |Others=0.10 (4.14%)|CurSamplesPerSec=13.46 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2113|ppo_ep: 1|act_loss: 0.035888671875|cri_loss: 0.0190582275390625|unsuper_loss: 0.0 +average reward score: 5.078125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.17%) |Training time=0.45s (21.24%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2114|ppo_ep: 1|act_loss: -0.0216522216796875|cri_loss: -0.01023101806640625|unsuper_loss: 0.0 +average reward score: 5.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.34%) |Training time=0.45s (21.08%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2115|ppo_ep: 1|act_loss: -0.014190673828125|cri_loss: -0.006740570068359375|unsuper_loss: 0.0 +average reward score: 4.8515625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.33%) |Training time=0.45s (21.07%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2116|ppo_ep: 1|act_loss: -0.03753662109375|cri_loss: -0.016510009765625|unsuper_loss: 0.0 +average reward score: 5.20703125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.00%) |Training time=0.46s (21.43%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2117|ppo_ep: 1|act_loss: -0.02288818359375|cri_loss: -0.01116943359375|unsuper_loss: 0.0 +average reward score: 5.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.45s (20.86%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2118|ppo_ep: 1|act_loss: 0.041748046875|cri_loss: 0.0218658447265625|unsuper_loss: 0.0 +average reward score: 5.29296875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.16%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.43 +[2023-04-14 10:05:46,571] [INFO] [logging.py:96:log_dist] [Rank 0] step=2120, skipped=22, lr=[8.291510307249474e-06, 8.291510307249474e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:05:46,589] [INFO] [timer.py:199:stop] epoch=0/micro_step=2120/global_step=2120, RunningAvgSamplesPerSec=107.08617854175999, CurrSamplesPerSec=108.78975468800026, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:05:46,682] [INFO] [logging.py:96:log_dist] [Rank 0] step=2120, skipped=30, lr=[4.301466763187256e-06, 4.301466763187256e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2119|ppo_ep: 1|act_loss: -0.0065765380859375|cri_loss: -0.00258636474609375|unsuper_loss: 0.0 +average reward score: 5.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.09%) |Training time=0.46s (21.34%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2120|ppo_ep: 1|act_loss: 0.0123443603515625|cri_loss: 0.007183074951171875|unsuper_loss: 0.0 +average reward score: 4.796875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.23%) |Training time=0.45s (21.18%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2121|ppo_ep: 1|act_loss: -0.019927978515625|cri_loss: -0.0091552734375|unsuper_loss: 0.0 +average reward score: 5.26171875 +------------------------------------------------------------------------------------- +|E2E latency=2.95s |Gather latency=0.00s (0.00%) |Generate time=1.58s (53.69%) |Training time=0.45s (15.36%) |Others=0.91 (30.95%)|CurSamplesPerSec=10.85 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2122|ppo_ep: 1|act_loss: -0.0125274658203125|cri_loss: -0.005584716796875|unsuper_loss: 0.0 +average reward score: 5.23046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.77%) |Training time=0.46s (21.65%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2123|ppo_ep: 1|act_loss: 0.03314208984375|cri_loss: 0.0187530517578125|unsuper_loss: 0.0 +average reward score: 4.93359375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.22%) |Training time=0.45s (21.19%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.43 +[2023-04-14 10:05:58,065] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 2124|ppo_ep: 1|act_loss: 0.0307159423828125|cri_loss: 0.0158843994140625|unsuper_loss: 0.0 +average reward score: 5.52734375 +------------------------------------------------------------------------------------- +|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.59s (75.23%) |Training time=0.42s (20.12%) |Others=0.10 (4.65%)|CurSamplesPerSec=15.18 |AvgSamplesPerSec=14.43 +[2023-04-14 10:06:00,172] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 2125|ppo_ep: 1|act_loss: 0.00861358642578125|cri_loss: 0.004924774169921875|unsuper_loss: 0.0 +average reward score: 5.04296875 +------------------------------------------------------------------------------------- +|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.58s (75.14%) |Training time=0.43s (20.21%) |Others=0.10 (4.65%)|CurSamplesPerSec=15.19 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2126|ppo_ep: 1|act_loss: -0.0016460418701171875|cri_loss: 2.6702880859375e-05|unsuper_loss: 0.0 +average reward score: 5.1796875 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.08%) |Training time=0.48s (21.46%) |Others=0.17 (7.45%)|CurSamplesPerSec=14.34 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2127|ppo_ep: 1|act_loss: -0.0195770263671875|cri_loss: -0.0091552734375|unsuper_loss: 0.0 +average reward score: 5.24609375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.21%) |Training time=0.45s (21.19%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2128|ppo_ep: 1|act_loss: 0.0018329620361328125|cri_loss: 0.0025920867919921875|unsuper_loss: 0.0 +average reward score: 5.29296875 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.18%) |Training time=0.45s (21.23%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.43 +[2023-04-14 10:06:08,802] [INFO] [logging.py:96:log_dist] [Rank 0] step=2130, skipped=24, lr=[8.281156877023959e-06, 8.281156877023959e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:06:08,820] [INFO] [timer.py:199:stop] epoch=0/micro_step=2130/global_step=2130, RunningAvgSamplesPerSec=107.10202712994473, CurrSamplesPerSec=108.1727569609888, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:06:08,912] [INFO] [logging.py:96:log_dist] [Rank 0] step=2130, skipped=30, lr=[4.294779807661105e-06, 4.294779807661105e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2129|ppo_ep: 1|act_loss: 0.0150299072265625|cri_loss: 0.0085296630859375|unsuper_loss: 0.0 +average reward score: 5.29296875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.40%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2130|ppo_ep: 1|act_loss: -0.00832366943359375|cri_loss: -0.0035114288330078125|unsuper_loss: 0.0 +average reward score: 5.30859375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.46s (21.28%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2131|ppo_ep: 1|act_loss: 0.0592041015625|cri_loss: 0.0307769775390625|unsuper_loss: 0.0 +average reward score: 5.6328125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.17%) |Training time=0.45s (21.19%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2132|ppo_ep: 1|act_loss: 0.0191650390625|cri_loss: 0.01004791259765625|unsuper_loss: 0.0 +average reward score: 4.9296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.80%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2133|ppo_ep: 1|act_loss: -0.0196533203125|cri_loss: -0.00951385498046875|unsuper_loss: 0.0 +average reward score: 5.0 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.83%) |Training time=0.47s (21.65%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2134|ppo_ep: 1|act_loss: 0.012603759765625|cri_loss: 0.007221221923828125|unsuper_loss: 0.0 +average reward score: 4.90625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.81%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2135|ppo_ep: 1|act_loss: -0.01303863525390625|cri_loss: -0.0056915283203125|unsuper_loss: 0.0 +average reward score: 5.46484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.68%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2136|ppo_ep: 1|act_loss: 0.005916595458984375|cri_loss: 0.003437042236328125|unsuper_loss: 0.0 +average reward score: 5.1328125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.05%) |Training time=0.46s (21.39%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2137|ppo_ep: 1|act_loss: 0.0465087890625|cri_loss: 0.02520751953125|unsuper_loss: 0.0 +average reward score: 5.375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.71%) |Training time=0.47s (21.73%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2138|ppo_ep: 1|act_loss: 0.02044677734375|cri_loss: 0.011077880859375|unsuper_loss: 0.0 +average reward score: 5.52734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.67%) |Training time=0.47s (21.78%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43 +[2023-04-14 10:06:30,321] [INFO] [logging.py:96:log_dist] [Rank 0] step=2140, skipped=24, lr=[8.268168995036705e-06, 8.268168995036705e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:06:30,339] [INFO] [timer.py:199:stop] epoch=0/micro_step=2140/global_step=2140, RunningAvgSamplesPerSec=107.09897142545462, CurrSamplesPerSec=106.86669384427232, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:06:30,432] [INFO] [logging.py:96:log_dist] [Rank 0] step=2140, skipped=30, lr=[4.2880662491685345e-06, 4.2880662491685345e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2139|ppo_ep: 1|act_loss: 0.0289459228515625|cri_loss: 0.0149688720703125|unsuper_loss: 0.0 +average reward score: 5.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.97%) |Training time=0.46s (21.48%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2140|ppo_ep: 1|act_loss: -0.02886962890625|cri_loss: -0.01361083984375|unsuper_loss: 0.0 +average reward score: 5.46484375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.76%) |Training time=0.47s (21.74%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2141|ppo_ep: 1|act_loss: 0.0021343231201171875|cri_loss: 0.0018062591552734375|unsuper_loss: 0.0 +average reward score: 5.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.50s |Gather latency=0.00s (0.00%) |Generate time=1.86s (74.51%) |Training time=0.46s (18.56%) |Others=0.17 (6.94%)|CurSamplesPerSec=12.79 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2142|ppo_ep: 1|act_loss: 0.0106658935546875|cri_loss: 0.00615692138671875|unsuper_loss: 0.0 +average reward score: 5.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.89%) |Training time=0.46s (21.53%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2143|ppo_ep: 1|act_loss: 0.044830322265625|cri_loss: 0.0255889892578125|unsuper_loss: 0.0 +average reward score: 4.66796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.80%) |Training time=0.47s (21.64%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2144|ppo_ep: 1|act_loss: -0.0017185211181640625|cri_loss: -0.0005717277526855469|unsuper_loss: 0.0 +average reward score: 4.77734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.67%) |Training time=0.47s (21.76%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2145|ppo_ep: 1|act_loss: -0.03558349609375|cri_loss: -0.0161895751953125|unsuper_loss: 0.0 +average reward score: 5.484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.00%) |Training time=0.46s (21.44%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2146|ppo_ep: 1|act_loss: -0.05517578125|cri_loss: -0.0265045166015625|unsuper_loss: 0.0 +average reward score: 5.8046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.47s (21.63%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2147|ppo_ep: 1|act_loss: -0.046875|cri_loss: -0.022003173828125|unsuper_loss: 0.0 +average reward score: 5.23828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.08%) |Training time=0.46s (21.33%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2148|ppo_ep: 1|act_loss: 0.0095367431640625|cri_loss: 0.005390167236328125|unsuper_loss: 0.0 +average reward score: 4.63671875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.96%) |Training time=0.49s (22.55%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.43 +[2023-04-14 10:06:52,265] [INFO] [logging.py:96:log_dist] [Rank 0] step=2150, skipped=24, lr=[8.255130076978754e-06, 8.255130076978754e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:06:52,283] [INFO] [timer.py:199:stop] epoch=0/micro_step=2150/global_step=2150, RunningAvgSamplesPerSec=107.08429552059788, CurrSamplesPerSec=98.68667589679414, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:06:52,375] [INFO] [logging.py:96:log_dist] [Rank 0] step=2150, skipped=30, lr=[4.281326187220675e-06, 4.281326187220675e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2149|ppo_ep: 1|act_loss: 0.029083251953125|cri_loss: 0.0157318115234375|unsuper_loss: 0.0 +average reward score: 4.90625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.02%) |Training time=0.49s (22.47%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2150|ppo_ep: 1|act_loss: 0.0665283203125|cri_loss: 0.03466796875|unsuper_loss: 0.0 +average reward score: 5.3203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.31%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2151|ppo_ep: 1|act_loss: 0.01038360595703125|cri_loss: 0.006134033203125|unsuper_loss: 0.0 +average reward score: 5.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.12%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2152|ppo_ep: 1|act_loss: 0.0092620849609375|cri_loss: 0.004779815673828125|unsuper_loss: 0.0 +average reward score: 5.390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.49s (22.47%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2153|ppo_ep: 1|act_loss: 0.001251220703125|cri_loss: 0.0017547607421875|unsuper_loss: 0.0 +average reward score: 5.7265625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.69%) |Training time=0.50s (22.81%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2154|ppo_ep: 1|act_loss: -0.04931640625|cri_loss: -0.02398681640625|unsuper_loss: 0.0 +average reward score: 5.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.88%) |Training time=0.49s (22.63%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2155|ppo_ep: 1|act_loss: 0.0128173828125|cri_loss: 0.00759124755859375|unsuper_loss: 0.0 +average reward score: 4.75 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.24%) |Training time=0.48s (22.23%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2156|ppo_ep: 1|act_loss: -0.005664825439453125|cri_loss: -0.002101898193359375|unsuper_loss: 0.0 +average reward score: 6.09375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.21%) |Training time=0.48s (22.25%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2157|ppo_ep: 1|act_loss: 0.0102081298828125|cri_loss: 0.005924224853515625|unsuper_loss: 0.0 +average reward score: 5.25 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.51%) |Training time=0.50s (21.30%) |Others=0.10 (4.19%)|CurSamplesPerSec=13.58 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2158|ppo_ep: 1|act_loss: -0.004772186279296875|cri_loss: -0.000301361083984375|unsuper_loss: 0.0 +average reward score: 5.3203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.47%) |Training time=0.47s (21.97%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.43 +[2023-04-14 10:07:14,131] [INFO] [logging.py:96:log_dist] [Rank 0] step=2160, skipped=24, lr=[8.242040316118323e-06, 8.242040316118323e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:07:14,150] [INFO] [timer.py:199:stop] epoch=0/micro_step=2160/global_step=2160, RunningAvgSamplesPerSec=107.04436231088094, CurrSamplesPerSec=103.56585864587393, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:07:14,242] [INFO] [logging.py:96:log_dist] [Rank 0] step=2160, skipped=30, lr=[4.2745597217215065e-06, 4.2745597217215065e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2159|ppo_ep: 1|act_loss: 0.0247344970703125|cri_loss: 0.01314544677734375|unsuper_loss: 0.0 +average reward score: 5.29296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.87%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2160|ppo_ep: 1|act_loss: -0.05059814453125|cri_loss: -0.0238494873046875|unsuper_loss: 0.0 +average reward score: 5.76171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.08%) |Training time=0.47s (21.65%) |Others=0.11 (5.27%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2161|ppo_ep: 1|act_loss: -0.03314208984375|cri_loss: -0.0160369873046875|unsuper_loss: 0.0 +average reward score: 5.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.90%) |Training time=0.46s (21.53%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2162|ppo_ep: 1|act_loss: -0.028076171875|cri_loss: -0.013458251953125|unsuper_loss: 0.0 +average reward score: 5.34375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.65%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2163|ppo_ep: 1|act_loss: 0.003276824951171875|cri_loss: 0.0018472671508789062|unsuper_loss: 0.0 +average reward score: 5.2109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.28%) |Training time=0.45s (21.13%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2164|ppo_ep: 1|act_loss: 0.0155181884765625|cri_loss: 0.008453369140625|unsuper_loss: 0.0 +average reward score: 5.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.46s (21.30%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2165|ppo_ep: 1|act_loss: 0.03338623046875|cri_loss: 0.017547607421875|unsuper_loss: 0.0 +average reward score: 5.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.75%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2166|ppo_ep: 1|act_loss: 0.0576171875|cri_loss: 0.029571533203125|unsuper_loss: 0.0 +average reward score: 5.2109375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.68%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2167|ppo_ep: 1|act_loss: 0.0482177734375|cri_loss: 0.02642822265625|unsuper_loss: 0.0 +average reward score: 4.84765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.85%) |Training time=0.47s (21.58%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2168|ppo_ep: 1|act_loss: 0.0726318359375|cri_loss: 0.03887939453125|unsuper_loss: 0.0 +average reward score: 5.125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.50%) |Training time=0.47s (21.95%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44 +[2023-04-14 10:07:35,717] [INFO] [logging.py:96:log_dist] [Rank 0] step=2170, skipped=24, lr=[8.228899906477248e-06, 8.228899906477248e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:07:35,735] [INFO] [timer.py:199:stop] epoch=0/micro_step=2170/global_step=2170, RunningAvgSamplesPerSec=107.03586667134884, CurrSamplesPerSec=102.76559751986322, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:07:35,828] [INFO] [logging.py:96:log_dist] [Rank 0] step=2170, skipped=30, lr=[4.267766952966369e-06, 4.267766952966369e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2169|ppo_ep: 1|act_loss: -0.03533935546875|cri_loss: -0.0171356201171875|unsuper_loss: 0.0 +average reward score: 4.89453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.89%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2170|ppo_ep: 1|act_loss: -0.030029296875|cri_loss: -0.0144195556640625|unsuper_loss: 0.0 +average reward score: 5.25390625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.63%) |Training time=0.48s (21.90%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.54 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2171|ppo_ep: 1|act_loss: -0.020263671875|cri_loss: -0.00963592529296875|unsuper_loss: 0.0 +average reward score: 5.21484375 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.43%) |Training time=0.48s (21.10%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.02 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2172|ppo_ep: 1|act_loss: 0.032257080078125|cri_loss: 0.0193328857421875|unsuper_loss: 0.0 +average reward score: 4.89453125 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.71%) |Training time=0.49s (21.10%) |Others=0.10 (4.19%)|CurSamplesPerSec=13.66 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2173|ppo_ep: 1|act_loss: -0.018524169921875|cri_loss: -0.00890350341796875|unsuper_loss: 0.0 +average reward score: 5.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.05%) |Training time=0.49s (22.33%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2174|ppo_ep: 1|act_loss: -0.00188446044921875|cri_loss: -0.0005998611450195312|unsuper_loss: 0.0 +average reward score: 5.74609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.48s (22.02%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2175|ppo_ep: 1|act_loss: 0.01287841796875|cri_loss: 0.006801605224609375|unsuper_loss: 0.0 +average reward score: 5.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.30%) |Training time=0.48s (22.16%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2176|ppo_ep: 1|act_loss: 0.0091705322265625|cri_loss: 0.004795074462890625|unsuper_loss: 0.0 +average reward score: 5.3359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.87%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2177|ppo_ep: 1|act_loss: -0.04803466796875|cri_loss: -0.02337646484375|unsuper_loss: 0.0 +average reward score: 5.125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.82%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2178|ppo_ep: 1|act_loss: 0.0144805908203125|cri_loss: 0.00882720947265625|unsuper_loss: 0.0 +average reward score: 4.93359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (22.05%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.44 +[2023-04-14 10:07:57,675] [INFO] [logging.py:96:log_dist] [Rank 0] step=2180, skipped=24, lr=[8.215709042828096e-06, 8.215709042828096e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:07:57,693] [INFO] [timer.py:199:stop] epoch=0/micro_step=2180/global_step=2180, RunningAvgSamplesPerSec=107.0068419629447, CurrSamplesPerSec=103.48066046173109, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:07:57,786] [INFO] [logging.py:96:log_dist] [Rank 0] step=2180, skipped=30, lr=[4.2609479816404836e-06, 4.2609479816404836e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2179|ppo_ep: 1|act_loss: 0.00109100341796875|cri_loss: 0.0007219314575195312|unsuper_loss: 0.0 +average reward score: 5.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.54%) |Training time=0.47s (21.90%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2180|ppo_ep: 1|act_loss: 0.00707244873046875|cri_loss: 0.0037384033203125|unsuper_loss: 0.0 +average reward score: 5.45703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.49s (22.40%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2181|ppo_ep: 1|act_loss: -0.0042572021484375|cri_loss: -0.0018062591552734375|unsuper_loss: 0.0 +average reward score: 5.18359375 +------------------------------------------------------------------------------------- +|E2E latency=2.81s |Gather latency=0.00s (0.00%) |Generate time=1.58s (56.29%) |Training time=0.48s (17.15%) |Others=0.75 (26.57%)|CurSamplesPerSec=11.38 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2182|ppo_ep: 1|act_loss: 0.054351806640625|cri_loss: 0.0278472900390625|unsuper_loss: 0.0 +average reward score: 5.1640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.25%) |Training time=0.48s (22.21%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2183|ppo_ep: 1|act_loss: -0.01373291015625|cri_loss: -0.0059661865234375|unsuper_loss: 0.0 +average reward score: 5.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.92%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2184|ppo_ep: 1|act_loss: 0.0653076171875|cri_loss: 0.03363037109375|unsuper_loss: 0.0 +average reward score: 5.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.45%) |Training time=0.47s (22.00%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2185|ppo_ep: 1|act_loss: -0.0047760009765625|cri_loss: -0.0015497207641601562|unsuper_loss: 0.0 +average reward score: 5.1953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.86%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2186|ppo_ep: 1|act_loss: -0.00888824462890625|cri_loss: -0.00391387939453125|unsuper_loss: 0.0 +average reward score: 4.875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.88%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2187|ppo_ep: 1|act_loss: -0.031890869140625|cri_loss: -0.0145111083984375|unsuper_loss: 0.0 +average reward score: 5.31640625 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.28%) |Training time=0.48s (20.51%) |Others=0.10 (4.21%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2188|ppo_ep: 1|act_loss: -0.027069091796875|cri_loss: -0.01320648193359375|unsuper_loss: 0.0 +average reward score: 5.25 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.49%) |Training time=0.47s (21.96%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44 +[2023-04-14 10:08:20,079] [INFO] [logging.py:96:log_dist] [Rank 0] step=2190, skipped=24, lr=[8.20246792069129e-06, 8.20246792069129e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:08:20,097] [INFO] [timer.py:199:stop] epoch=0/micro_step=2190/global_step=2190, RunningAvgSamplesPerSec=106.98401354109774, CurrSamplesPerSec=106.3205036803265, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:08:20,190] [INFO] [logging.py:96:log_dist] [Rank 0] step=2190, skipped=30, lr=[4.254102908817454e-06, 4.254102908817454e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2189|ppo_ep: 1|act_loss: 0.0097808837890625|cri_loss: 0.005466461181640625|unsuper_loss: 0.0 +average reward score: 5.47265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.54%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2190|ppo_ep: 1|act_loss: -0.016876220703125|cri_loss: -0.00792694091796875|unsuper_loss: 0.0 +average reward score: 5.078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.64%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2191|ppo_ep: 1|act_loss: 0.040283203125|cri_loss: 0.0205841064453125|unsuper_loss: 0.0 +average reward score: 5.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.76%) |Training time=0.47s (21.66%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2192|ppo_ep: 1|act_loss: 0.021575927734375|cri_loss: 0.0172271728515625|unsuper_loss: 0.0 +average reward score: 5.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.47s (21.96%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2193|ppo_ep: 1|act_loss: 0.045806884765625|cri_loss: 0.02337646484375|unsuper_loss: 0.0 +average reward score: 5.82421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.49s (22.38%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2194|ppo_ep: 1|act_loss: 0.0016117095947265625|cri_loss: 0.0012531280517578125|unsuper_loss: 0.0 +average reward score: 5.515625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.10%) |Training time=0.51s (23.42%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2195|ppo_ep: 1|act_loss: -0.029296875|cri_loss: -0.013427734375|unsuper_loss: 0.0 +average reward score: 5.65625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.34%) |Training time=0.51s (23.19%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2196|ppo_ep: 1|act_loss: -0.0033473968505859375|cri_loss: -0.0009822845458984375|unsuper_loss: 0.0 +average reward score: 5.0234375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.63%) |Training time=0.50s (22.89%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2197|ppo_ep: 1|act_loss: -0.033203125|cri_loss: -0.016326904296875|unsuper_loss: 0.0 +average reward score: 5.515625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.05%) |Training time=0.46s (21.36%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2198|ppo_ep: 1|act_loss: -0.00848388671875|cri_loss: -0.003993988037109375|unsuper_loss: 0.0 +average reward score: 5.6015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.30%) |Training time=0.48s (22.17%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.44 +[2023-04-14 10:08:41,790] [INFO] [logging.py:96:log_dist] [Rank 0] step=2200, skipped=24, lr=[8.189176736332201e-06, 8.189176736332201e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:08:41,808] [INFO] [timer.py:199:stop] epoch=0/micro_step=2200/global_step=2200, RunningAvgSamplesPerSec=106.94942238546258, CurrSamplesPerSec=98.5336633016457, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:08:41,900] [INFO] [logging.py:96:log_dist] [Rank 0] step=2200, skipped=30, lr=[4.247231835957773e-06, 4.247231835957773e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2199|ppo_ep: 1|act_loss: 0.040740966796875|cri_loss: 0.0230560302734375|unsuper_loss: 0.0 +average reward score: 4.8046875 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.48%) |Training time=0.49s (22.10%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.51 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2200|ppo_ep: 1|act_loss: -0.04095458984375|cri_loss: -0.02008056640625|unsuper_loss: 0.0 +average reward score: 5.19140625 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.04%) |Training time=0.49s (21.63%) |Others=0.10 (4.33%)|CurSamplesPerSec=14.06 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2201|ppo_ep: 1|act_loss: 0.0758056640625|cri_loss: 0.03912353515625|unsuper_loss: 0.0 +average reward score: 5.3671875 +------------------------------------------------------------------------------------- +|E2E latency=2.61s |Gather latency=0.00s (0.00%) |Generate time=1.58s (60.48%) |Training time=0.49s (18.92%) |Others=0.54 (20.61%)|CurSamplesPerSec=12.26 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2202|ppo_ep: 1|act_loss: -0.07659912109375|cri_loss: -0.0325927734375|unsuper_loss: 0.0 +average reward score: 5.4921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.77%) |Training time=0.46s (21.66%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2203|ppo_ep: 1|act_loss: -0.0361328125|cri_loss: -0.0171661376953125|unsuper_loss: 0.0 +average reward score: 5.25 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.96%) |Training time=0.46s (21.45%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2204|ppo_ep: 1|act_loss: 0.01202392578125|cri_loss: 0.006740570068359375|unsuper_loss: 0.0 +average reward score: 5.14453125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.02%) |Training time=0.46s (21.38%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2205|ppo_ep: 1|act_loss: -0.01861572265625|cri_loss: -0.0091094970703125|unsuper_loss: 0.0 +average reward score: 5.51171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.77%) |Training time=0.47s (21.67%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2206|ppo_ep: 1|act_loss: 0.0245513916015625|cri_loss: 0.013031005859375|unsuper_loss: 0.0 +average reward score: 5.04296875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.72%) |Training time=0.47s (21.69%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2207|ppo_ep: 1|act_loss: -0.00606536865234375|cri_loss: -0.0018520355224609375|unsuper_loss: 0.0 +average reward score: 5.03515625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.80%) |Training time=0.46s (21.59%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2208|ppo_ep: 1|act_loss: 0.030975341796875|cri_loss: 0.0165557861328125|unsuper_loss: 0.0 +average reward score: 4.87890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.21%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.44 +[2023-04-14 10:09:03,847] [INFO] [logging.py:96:log_dist] [Rank 0] step=2210, skipped=24, lr=[8.175835686758245e-06, 8.175835686758245e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:09:03,865] [INFO] [timer.py:199:stop] epoch=0/micro_step=2210/global_step=2210, RunningAvgSamplesPerSec=106.93468634085123, CurrSamplesPerSec=105.99765446011911, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:09:03,958] [INFO] [logging.py:96:log_dist] [Rank 0] step=2210, skipped=30, lr=[4.240334864907317e-06, 4.240334864907317e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2209|ppo_ep: 1|act_loss: 0.018798828125|cri_loss: 0.0097198486328125|unsuper_loss: 0.0 +average reward score: 5.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.75%) |Training time=0.46s (21.67%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44 +[2023-04-14 10:09:06,094] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 2210|ppo_ep: 1|act_loss: 0.09375|cri_loss: 0.048583984375|unsuper_loss: 0.0 +average reward score: 5.0859375 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.20%) |Training time=0.46s (21.69%) |Others=0.09 (4.10%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.44 +[2023-04-14 10:09:08,221] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 2211|ppo_ep: 1|act_loss: 0.0106658935546875|cri_loss: 0.006023406982421875|unsuper_loss: 0.0 +average reward score: 5.44921875 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.58%) |Training time=0.45s (21.30%) |Others=0.09 (4.12%)|CurSamplesPerSec=15.04 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2212|ppo_ep: 1|act_loss: -0.001712799072265625|cri_loss: -0.0007014274597167969|unsuper_loss: 0.0 +average reward score: 4.8125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.88%) |Training time=0.46s (21.54%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2213|ppo_ep: 1|act_loss: 0.02947998046875|cri_loss: 0.0155029296875|unsuper_loss: 0.0 +average reward score: 5.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.13%) |Training time=0.45s (21.28%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2214|ppo_ep: 1|act_loss: -0.0101470947265625|cri_loss: -0.004581451416015625|unsuper_loss: 0.0 +average reward score: 4.8984375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.24%) |Training time=0.45s (21.16%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2215|ppo_ep: 1|act_loss: 0.004032135009765625|cri_loss: 0.0030670166015625|unsuper_loss: 0.0 +average reward score: 5.203125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.92%) |Training time=0.46s (21.50%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2216|ppo_ep: 1|act_loss: -0.00817108154296875|cri_loss: -0.0038509368896484375|unsuper_loss: 0.0 +average reward score: 5.69140625 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.84%) |Training time=0.46s (19.93%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2217|ppo_ep: 1|act_loss: -0.03326416015625|cri_loss: -0.0159149169921875|unsuper_loss: 0.0 +average reward score: 5.328125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.15%) |Training time=0.45s (21.26%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2218|ppo_ep: 1|act_loss: 0.026885986328125|cri_loss: 0.01406097412109375|unsuper_loss: 0.0 +average reward score: 4.87890625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.07%) |Training time=0.46s (21.34%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.44 +[2023-04-14 10:09:25,399] [INFO] [logging.py:96:log_dist] [Rank 0] step=2220, skipped=24, lr=[8.162444969715961e-06, 8.162444969715961e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:09:25,418] [INFO] [timer.py:199:stop] epoch=0/micro_step=2220/global_step=2220, RunningAvgSamplesPerSec=106.94111013177839, CurrSamplesPerSec=108.4043708167457, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:09:25,510] [INFO] [logging.py:96:log_dist] [Rank 0] step=2220, skipped=32, lr=[4.234798710055124e-06, 4.234798710055124e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2219|ppo_ep: 1|act_loss: 0.033538818359375|cri_loss: 0.0184783935546875|unsuper_loss: 0.0 +average reward score: 4.8359375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.06%) |Training time=0.46s (21.37%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2220|ppo_ep: 1|act_loss: 0.016204833984375|cri_loss: 0.00933837890625|unsuper_loss: 0.0 +average reward score: 4.73828125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.24%) |Training time=0.45s (21.16%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2221|ppo_ep: 1|act_loss: 0.01727294921875|cri_loss: 0.0093231201171875|unsuper_loss: 0.0 +average reward score: 4.63671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.22%) |Training time=0.46s (21.21%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2222|ppo_ep: 1|act_loss: -0.0013675689697265625|cri_loss: -0.0002951622009277344|unsuper_loss: 0.0 +average reward score: 4.44140625 +------------------------------------------------------------------------------------- +|E2E latency=3.02s |Gather latency=0.00s (0.00%) |Generate time=1.58s (52.40%) |Training time=0.46s (15.08%) |Others=0.98 (32.52%)|CurSamplesPerSec=10.58 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2223|ppo_ep: 1|act_loss: 0.0084075927734375|cri_loss: 0.0046844482421875|unsuper_loss: 0.0 +average reward score: 5.078125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.78%) |Training time=0.49s (22.45%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2224|ppo_ep: 1|act_loss: -0.0291595458984375|cri_loss: -0.0138092041015625|unsuper_loss: 0.0 +average reward score: 5.19140625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.39%) |Training time=0.50s (23.11%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2225|ppo_ep: 1|act_loss: 0.0131683349609375|cri_loss: 0.00794219970703125|unsuper_loss: 0.0 +average reward score: 5.0859375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.16%) |Training time=0.49s (22.33%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.44 +[2023-04-14 10:09:41,411] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 2226|ppo_ep: 1|act_loss: -0.040618896484375|cri_loss: -0.018310546875|unsuper_loss: 0.0 +average reward score: 5.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.85%) |Training time=0.44s (20.58%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.44 +[2023-04-14 10:09:43,556] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 2227|ppo_ep: 1|act_loss: 0.018310546875|cri_loss: 0.009613037109375|unsuper_loss: 0.0 +average reward score: 5.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.46s (21.22%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2228|ppo_ep: 1|act_loss: -0.00025463104248046875|cri_loss: 0.0007419586181640625|unsuper_loss: 0.0 +average reward score: 4.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.04%) |Training time=0.49s (22.49%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.44 +[2023-04-14 10:09:48,024] [INFO] [logging.py:96:log_dist] [Rank 0] step=2230, skipped=26, lr=[8.15169676886067e-06, 8.15169676886067e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:09:48,043] [INFO] [timer.py:199:stop] epoch=0/micro_step=2230/global_step=2230, RunningAvgSamplesPerSec=106.92186677661333, CurrSamplesPerSec=95.00962924947352, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:09:48,135] [INFO] [logging.py:96:log_dist] [Rank 0] step=2230, skipped=32, lr=[4.227855380137234e-06, 4.227855380137234e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2229|ppo_ep: 1|act_loss: 0.0445556640625|cri_loss: 0.024322509765625|unsuper_loss: 0.0 +average reward score: 5.08203125 +------------------------------------------------------------------------------------- +|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.69s (73.91%) |Training time=0.50s (21.81%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.98 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2230|ppo_ep: 1|act_loss: -0.001605987548828125|cri_loss: -0.00045108795166015625|unsuper_loss: 0.0 +average reward score: 5.04296875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.25%) |Training time=0.45s (21.15%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2231|ppo_ep: 1|act_loss: -0.0088653564453125|cri_loss: -0.00415802001953125|unsuper_loss: 0.0 +average reward score: 5.8203125 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.02%) |Training time=0.49s (20.77%) |Others=0.10 (4.21%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2232|ppo_ep: 1|act_loss: 0.00551605224609375|cri_loss: 0.00337982177734375|unsuper_loss: 0.0 +average reward score: 5.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.88%) |Training time=0.46s (21.53%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2233|ppo_ep: 1|act_loss: 0.00797271728515625|cri_loss: 0.0043792724609375|unsuper_loss: 0.0 +average reward score: 5.36328125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.13%) |Training time=0.45s (21.28%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2234|ppo_ep: 1|act_loss: 0.0162353515625|cri_loss: 0.0086517333984375|unsuper_loss: 0.0 +average reward score: 5.1171875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.17%) |Training time=0.46s (21.16%) |Others=0.15 (6.67%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2235|ppo_ep: 1|act_loss: -0.0178070068359375|cri_loss: -0.00872039794921875|unsuper_loss: 0.0 +average reward score: 4.796875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.08%) |Training time=0.46s (21.31%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2236|ppo_ep: 1|act_loss: -0.037078857421875|cri_loss: -0.0180206298828125|unsuper_loss: 0.0 +average reward score: 5.1796875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.90%) |Training time=0.46s (21.53%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2237|ppo_ep: 1|act_loss: -0.02801513671875|cri_loss: -0.01322174072265625|unsuper_loss: 0.0 +average reward score: 4.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.73%) |Training time=0.46s (21.68%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2238|ppo_ep: 1|act_loss: 0.031951904296875|cri_loss: 0.0170135498046875|unsuper_loss: 0.0 +average reward score: 5.37890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.37%) |Training time=0.47s (21.87%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44 +[2023-04-14 10:10:09,702] [INFO] [logging.py:96:log_dist] [Rank 0] step=2240, skipped=26, lr=[8.138217151044717e-06, 8.138217151044717e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:10:09,721] [INFO] [timer.py:199:stop] epoch=0/micro_step=2240/global_step=2240, RunningAvgSamplesPerSec=106.91724169004912, CurrSamplesPerSec=102.24546793214905, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:10:09,813] [INFO] [logging.py:96:log_dist] [Rank 0] step=2240, skipped=32, lr=[4.220886439234385e-06, 4.220886439234385e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2239|ppo_ep: 1|act_loss: -0.039947509765625|cri_loss: -0.0196380615234375|unsuper_loss: 0.0 +average reward score: 5.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.48s (22.09%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2240|ppo_ep: 1|act_loss: -0.023529052734375|cri_loss: -0.01129913330078125|unsuper_loss: 0.0 +average reward score: 5.23828125 +------------------------------------------------------------------------------------- +|E2E latency=2.44s |Gather latency=0.00s (0.00%) |Generate time=1.58s (64.78%) |Training time=0.49s (20.00%) |Others=0.37 (15.22%)|CurSamplesPerSec=13.11 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2241|ppo_ep: 1|act_loss: 0.07281494140625|cri_loss: 0.0377197265625|unsuper_loss: 0.0 +average reward score: 5.18359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.52%) |Training time=0.43s (19.84%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2242|ppo_ep: 1|act_loss: 0.0274200439453125|cri_loss: 0.0153656005859375|unsuper_loss: 0.0 +average reward score: 4.90234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.38%) |Training time=0.44s (20.04%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2243|ppo_ep: 1|act_loss: 0.03289794921875|cri_loss: 0.017913818359375|unsuper_loss: 0.0 +average reward score: 5.5 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.36%) |Training time=0.46s (21.14%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2244|ppo_ep: 1|act_loss: 0.00852203369140625|cri_loss: 0.004596710205078125|unsuper_loss: 0.0 +average reward score: 5.62890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.29%) |Training time=0.46s (21.18%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2245|ppo_ep: 1|act_loss: 0.0084381103515625|cri_loss: 0.005046844482421875|unsuper_loss: 0.0 +average reward score: 4.7109375 +------------------------------------------------------------------------------------- +|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.61s (67.92%) |Training time=0.46s (19.40%) |Others=0.30 (12.68%)|CurSamplesPerSec=13.51 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2246|ppo_ep: 1|act_loss: 0.0362548828125|cri_loss: 0.020599365234375|unsuper_loss: 0.0 +average reward score: 4.9765625 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.91%) |Training time=0.44s (19.68%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.24 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2247|ppo_ep: 1|act_loss: -0.027587890625|cri_loss: -0.01079559326171875|unsuper_loss: 0.0 +average reward score: 5.12109375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.05%) |Training time=0.44s (20.38%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2248|ppo_ep: 1|act_loss: -0.033111572265625|cri_loss: -0.0156707763671875|unsuper_loss: 0.0 +average reward score: 5.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.29%) |Training time=0.46s (21.18%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.44 +[2023-04-14 10:10:31,997] [INFO] [logging.py:96:log_dist] [Rank 0] step=2250, skipped=26, lr=[8.124688423357883e-06, 8.124688423357883e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:10:32,015] [INFO] [timer.py:199:stop] epoch=0/micro_step=2250/global_step=2250, RunningAvgSamplesPerSec=106.928745519496, CurrSamplesPerSec=107.82194537475227, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:10:32,108] [INFO] [logging.py:96:log_dist] [Rank 0] step=2250, skipped=32, lr=[4.213891990643095e-06, 4.213891990643095e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2249|ppo_ep: 1|act_loss: -0.00763702392578125|cri_loss: -0.0036678314208984375|unsuper_loss: 0.0 +average reward score: 5.29296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.27%) |Training time=0.46s (21.20%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2250|ppo_ep: 1|act_loss: -0.029144287109375|cri_loss: -0.013763427734375|unsuper_loss: 0.0 +average reward score: 5.921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.33%) |Training time=0.46s (21.11%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2251|ppo_ep: 1|act_loss: -0.017852783203125|cri_loss: -0.00787353515625|unsuper_loss: 0.0 +average reward score: 5.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.28%) |Training time=0.46s (21.16%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2252|ppo_ep: 1|act_loss: 0.00494384765625|cri_loss: 0.0029144287109375|unsuper_loss: 0.0 +average reward score: 5.01171875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.80%) |Training time=0.47s (21.67%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2253|ppo_ep: 1|act_loss: 0.024017333984375|cri_loss: 0.0124053955078125|unsuper_loss: 0.0 +average reward score: 5.1015625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.15%) |Training time=0.46s (21.22%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2254|ppo_ep: 1|act_loss: 0.05157470703125|cri_loss: 0.0269622802734375|unsuper_loss: 0.0 +average reward score: 5.2109375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.19%) |Training time=0.49s (22.35%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2255|ppo_ep: 1|act_loss: 0.03814697265625|cri_loss: 0.0195159912109375|unsuper_loss: 0.0 +average reward score: 5.09375 +------------------------------------------------------------------------------------- +|E2E latency=3.26s |Gather latency=0.00s (0.00%) |Generate time=1.61s (49.47%) |Training time=0.46s (14.15%) |Others=1.19 (36.38%)|CurSamplesPerSec=9.81 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2256|ppo_ep: 1|act_loss: 0.008209228515625|cri_loss: 0.0043182373046875|unsuper_loss: 0.0 +average reward score: 4.98828125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.66%) |Training time=0.46s (20.83%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2257|ppo_ep: 1|act_loss: -0.010101318359375|cri_loss: -0.00496673583984375|unsuper_loss: 0.0 +average reward score: 5.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.92%) |Training time=0.45s (20.57%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2258|ppo_ep: 1|act_loss: -0.053955078125|cri_loss: -0.0260467529296875|unsuper_loss: 0.0 +average reward score: 5.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.09%) |Training time=0.47s (20.59%) |Others=0.10 (4.32%)|CurSamplesPerSec=14.04 |AvgSamplesPerSec=14.44 +[2023-04-14 10:10:54,960] [INFO] [logging.py:96:log_dist] [Rank 0] step=2260, skipped=26, lr=[8.11111078632855e-06, 8.11111078632855e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:10:54,978] [INFO] [timer.py:199:stop] epoch=0/micro_step=2260/global_step=2260, RunningAvgSamplesPerSec=106.92700638560709, CurrSamplesPerSec=110.86272184070586, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:10:55,071] [INFO] [logging.py:96:log_dist] [Rank 0] step=2260, skipped=32, lr=[4.206872138037964e-06, 4.206872138037964e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2259|ppo_ep: 1|act_loss: -0.022674560546875|cri_loss: -0.01006317138671875|unsuper_loss: 0.0 +average reward score: 5.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.50%) |Training time=0.45s (20.95%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2260|ppo_ep: 1|act_loss: -0.013671875|cri_loss: -0.0060577392578125|unsuper_loss: 0.0 +average reward score: 5.4921875 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.07%) |Training time=0.49s (20.76%) |Others=0.10 (4.17%)|CurSamplesPerSec=13.57 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2261|ppo_ep: 1|act_loss: 0.00531005859375|cri_loss: 0.00421905517578125|unsuper_loss: 0.0 +average reward score: 5.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.40%) |Training time=0.45s (21.05%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2262|ppo_ep: 1|act_loss: -0.034423828125|cri_loss: -0.0164947509765625|unsuper_loss: 0.0 +average reward score: 5.796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.32%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2263|ppo_ep: 1|act_loss: -0.00730133056640625|cri_loss: -0.003337860107421875|unsuper_loss: 0.0 +average reward score: 5.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.46s (21.18%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2264|ppo_ep: 1|act_loss: 0.0159912109375|cri_loss: 0.0100555419921875|unsuper_loss: 0.0 +average reward score: 5.50390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.28%) |Training time=0.46s (21.16%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2265|ppo_ep: 1|act_loss: 0.012115478515625|cri_loss: 0.00759124755859375|unsuper_loss: 0.0 +average reward score: 5.1328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.26%) |Training time=0.46s (21.18%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2266|ppo_ep: 1|act_loss: 0.00399017333984375|cri_loss: 0.0026092529296875|unsuper_loss: 0.0 +average reward score: 5.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.74s |Gather latency=0.00s (0.00%) |Generate time=1.60s (58.38%) |Training time=0.45s (16.54%) |Others=0.69 (25.08%)|CurSamplesPerSec=11.69 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2267|ppo_ep: 1|act_loss: -0.047210693359375|cri_loss: -0.0228271484375|unsuper_loss: 0.0 +average reward score: 5.58984375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.02%) |Training time=0.49s (22.45%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2268|ppo_ep: 1|act_loss: -0.0060577392578125|cri_loss: -0.00244140625|unsuper_loss: 0.0 +average reward score: 5.51171875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.28%) |Training time=0.48s (22.19%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.44 +[2023-04-14 10:11:17,364] [INFO] [logging.py:96:log_dist] [Rank 0] step=2270, skipped=26, lr=[8.097484441210055e-06, 8.097484441210055e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:11:17,382] [INFO] [timer.py:199:stop] epoch=0/micro_step=2270/global_step=2270, RunningAvgSamplesPerSec=106.91831941913372, CurrSamplesPerSec=103.83418432078822, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:11:17,475] [INFO] [logging.py:96:log_dist] [Rank 0] step=2270, skipped=32, lr=[4.199826985470142e-06, 4.199826985470142e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2269|ppo_ep: 1|act_loss: -0.04705810546875|cri_loss: -0.0223846435546875|unsuper_loss: 0.0 +average reward score: 5.1796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.83%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2270|ppo_ep: 1|act_loss: -0.030059814453125|cri_loss: -0.01427459716796875|unsuper_loss: 0.0 +average reward score: 5.6015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.90%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2271|ppo_ep: 1|act_loss: -0.016143798828125|cri_loss: -0.0068817138671875|unsuper_loss: 0.0 +average reward score: 5.296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.51%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2272|ppo_ep: 1|act_loss: -0.05194091796875|cri_loss: -0.020294189453125|unsuper_loss: 0.0 +average reward score: 5.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.55%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2273|ppo_ep: 1|act_loss: -0.018035888671875|cri_loss: -0.00811767578125|unsuper_loss: 0.0 +average reward score: 4.9921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.77%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2274|ppo_ep: 1|act_loss: 0.01580810546875|cri_loss: 0.0082855224609375|unsuper_loss: 0.0 +average reward score: 5.44921875 +------------------------------------------------------------------------------------- +|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.89%) |Training time=0.49s (20.92%) |Others=0.10 (4.19%)|CurSamplesPerSec=13.64 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2275|ppo_ep: 1|act_loss: 0.017425537109375|cri_loss: 0.00977325439453125|unsuper_loss: 0.0 +average reward score: 4.67578125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.81%) |Training time=0.47s (21.70%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2276|ppo_ep: 1|act_loss: -0.0180816650390625|cri_loss: -0.0082855224609375|unsuper_loss: 0.0 +average reward score: 5.01171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.46s (21.50%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2277|ppo_ep: 1|act_loss: -0.01300048828125|cri_loss: -0.0060882568359375|unsuper_loss: 0.0 +average reward score: 5.51953125 +------------------------------------------------------------------------------------- +|E2E latency=3.46s |Gather latency=0.00s (0.00%) |Generate time=1.59s (45.97%) |Training time=0.47s (13.45%) |Others=1.41 (40.58%)|CurSamplesPerSec=9.24 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2278|ppo_ep: 1|act_loss: -0.017974853515625|cri_loss: -0.008453369140625|unsuper_loss: 0.0 +average reward score: 5.65234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.90%) |Training time=0.46s (21.53%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43 +[2023-04-14 10:11:40,451] [INFO] [logging.py:96:log_dist] [Rank 0] step=2280, skipped=26, lr=[8.083809589977701e-06, 8.083809589977701e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:11:40,469] [INFO] [timer.py:199:stop] epoch=0/micro_step=2280/global_step=2280, RunningAvgSamplesPerSec=106.90553455426142, CurrSamplesPerSec=104.34825010417897, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:11:40,562] [INFO] [logging.py:96:log_dist] [Rank 0] step=2280, skipped=32, lr=[4.192756637365787e-06, 4.192756637365787e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2279|ppo_ep: 1|act_loss: 0.0111846923828125|cri_loss: 0.00595855712890625|unsuper_loss: 0.0 +average reward score: 5.81640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.77%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2280|ppo_ep: 1|act_loss: 0.0008172988891601562|cri_loss: 0.0007848739624023438|unsuper_loss: 0.0 +average reward score: 5.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.75%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2281|ppo_ep: 1|act_loss: 0.016265869140625|cri_loss: 0.00873565673828125|unsuper_loss: 0.0 +average reward score: 5.11328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.14%) |Training time=0.49s (22.33%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2282|ppo_ep: 1|act_loss: 0.050537109375|cri_loss: 0.027069091796875|unsuper_loss: 0.0 +average reward score: 5.98046875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.70%) |Training time=0.50s (22.79%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2283|ppo_ep: 1|act_loss: 0.00830078125|cri_loss: 0.004779815673828125|unsuper_loss: 0.0 +average reward score: 5.0234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.49s (22.37%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2284|ppo_ep: 1|act_loss: -0.0672607421875|cri_loss: -0.032012939453125|unsuper_loss: 0.0 +average reward score: 5.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.46s (21.46%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2285|ppo_ep: 1|act_loss: -0.0333251953125|cri_loss: -0.0162506103515625|unsuper_loss: 0.0 +average reward score: 5.1328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.34%) |Training time=0.43s (19.93%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2286|ppo_ep: 1|act_loss: 0.019287109375|cri_loss: 0.0107269287109375|unsuper_loss: 0.0 +average reward score: 5.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.68s (75.92%) |Training time=0.44s (19.62%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.42 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2287|ppo_ep: 1|act_loss: 0.0294036865234375|cri_loss: 0.01515960693359375|unsuper_loss: 0.0 +average reward score: 5.5 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.98%) |Training time=0.44s (20.44%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2288|ppo_ep: 1|act_loss: 0.016387939453125|cri_loss: 0.00876617431640625|unsuper_loss: 0.0 +average reward score: 5.328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.06%) |Training time=0.44s (20.36%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43 +[2023-04-14 10:12:02,342] [INFO] [logging.py:96:log_dist] [Rank 0] step=2290, skipped=26, lr=[8.070086435325772e-06, 8.070086435325772e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:12:02,361] [INFO] [timer.py:199:stop] epoch=0/micro_step=2290/global_step=2290, RunningAvgSamplesPerSec=106.89813915883823, CurrSamplesPerSec=87.2612355131024, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:12:02,454] [INFO] [logging.py:96:log_dist] [Rank 0] step=2290, skipped=32, lr=[4.185661198524513e-06, 4.185661198524513e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2289|ppo_ep: 1|act_loss: 0.00677490234375|cri_loss: 0.0037097930908203125|unsuper_loss: 0.0 +average reward score: 5.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.71s (72.81%) |Training time=0.54s (23.01%) |Others=0.10 (4.18%)|CurSamplesPerSec=13.63 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2290|ppo_ep: 1|act_loss: 0.04388427734375|cri_loss: 0.0227508544921875|unsuper_loss: 0.0 +average reward score: 5.14453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.49%) |Training time=0.43s (19.93%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2291|ppo_ep: 1|act_loss: 0.035247802734375|cri_loss: 0.0183563232421875|unsuper_loss: 0.0 +average reward score: 5.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.32%) |Training time=0.43s (20.11%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2292|ppo_ep: 1|act_loss: -0.0136871337890625|cri_loss: -0.0064544677734375|unsuper_loss: 0.0 +average reward score: 5.41796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.48%) |Training time=0.45s (20.92%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2293|ppo_ep: 1|act_loss: 0.006671905517578125|cri_loss: 0.0039825439453125|unsuper_loss: 0.0 +average reward score: 5.296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.66%) |Training time=0.45s (20.76%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2294|ppo_ep: 1|act_loss: -0.09033203125|cri_loss: -0.040313720703125|unsuper_loss: 0.0 +average reward score: 5.13671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.79%) |Training time=0.44s (20.63%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2295|ppo_ep: 1|act_loss: -0.0204925537109375|cri_loss: -0.00887298583984375|unsuper_loss: 0.0 +average reward score: 5.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.41%) |Training time=0.45s (20.84%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2296|ppo_ep: 1|act_loss: -0.05853271484375|cri_loss: -0.028350830078125|unsuper_loss: 0.0 +average reward score: 4.9140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.35%) |Training time=0.45s (21.07%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2297|ppo_ep: 1|act_loss: -0.0142822265625|cri_loss: -0.00630950927734375|unsuper_loss: 0.0 +average reward score: 5.24609375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.05%) |Training time=0.49s (22.44%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2298|ppo_ep: 1|act_loss: 0.0034999847412109375|cri_loss: 0.002185821533203125|unsuper_loss: 0.0 +average reward score: 6.1015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.88%) |Training time=0.47s (21.58%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.44 +[2023-04-14 10:12:23,932] [INFO] [logging.py:96:log_dist] [Rank 0] step=2300, skipped=26, lr=[8.056315180664529e-06, 8.056315180664529e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:12:23,950] [INFO] [timer.py:199:stop] epoch=0/micro_step=2300/global_step=2300, RunningAvgSamplesPerSec=106.91339089598668, CurrSamplesPerSec=109.58776763603377, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:12:24,042] [INFO] [logging.py:96:log_dist] [Rank 0] step=2300, skipped=32, lr=[4.17854077411784e-06, 4.17854077411784e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2299|ppo_ep: 1|act_loss: -0.0406494140625|cri_loss: -0.0195770263671875|unsuper_loss: 0.0 +average reward score: 4.9453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (21.08%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2300|ppo_ep: 1|act_loss: 0.0259552001953125|cri_loss: 0.0132904052734375|unsuper_loss: 0.0 +average reward score: 5.515625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.46s (21.18%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2301|ppo_ep: 1|act_loss: 0.07537841796875|cri_loss: 0.038604736328125|unsuper_loss: 0.0 +average reward score: 5.1171875 +------------------------------------------------------------------------------------- +|E2E latency=2.46s |Gather latency=0.00s (0.00%) |Generate time=1.59s (64.52%) |Training time=0.46s (18.84%) |Others=0.41 (16.63%)|CurSamplesPerSec=12.98 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2302|ppo_ep: 1|act_loss: 0.051544189453125|cri_loss: 0.0265960693359375|unsuper_loss: 0.0 +average reward score: 5.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.77%) |Training time=0.47s (21.71%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2303|ppo_ep: 1|act_loss: 0.0904541015625|cri_loss: 0.047149658203125|unsuper_loss: 0.0 +average reward score: 5.05078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.67%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2304|ppo_ep: 1|act_loss: -0.0343017578125|cri_loss: -0.0167236328125|unsuper_loss: 0.0 +average reward score: 5.24609375 +------------------------------------------------------------------------------------- +|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.92%) |Training time=0.49s (20.88%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.64 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2305|ppo_ep: 1|act_loss: -0.024200439453125|cri_loss: -0.0115966796875|unsuper_loss: 0.0 +average reward score: 5.12109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.96%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2306|ppo_ep: 1|act_loss: 0.001941680908203125|cri_loss: 0.0023021697998046875|unsuper_loss: 0.0 +average reward score: 5.3671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.84%) |Training time=0.47s (21.61%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2307|ppo_ep: 1|act_loss: -0.0360107421875|cri_loss: -0.0176544189453125|unsuper_loss: 0.0 +average reward score: 5.34375 +------------------------------------------------------------------------------------- +|E2E latency=2.61s |Gather latency=0.00s (0.00%) |Generate time=1.59s (60.94%) |Training time=0.46s (17.63%) |Others=0.56 (21.43%)|CurSamplesPerSec=12.25 |AvgSamplesPerSec=14.43 +epoch: 0|step: 2308|ppo_ep: 1|act_loss: -0.017486572265625|cri_loss: -0.00789642333984375|unsuper_loss: 0.0 +average reward score: 5.046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.27%) |Training time=0.48s (22.18%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.43 +[2023-04-14 10:12:46,476] [INFO] [logging.py:96:log_dist] [Rank 0] step=2310, skipped=26, lr=[8.042496030117185e-06, 8.042496030117185e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:12:46,494] [INFO] [timer.py:199:stop] epoch=0/micro_step=2310/global_step=2310, RunningAvgSamplesPerSec=106.90101331828065, CurrSamplesPerSec=105.29688544396153, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:12:46,587] [INFO] [logging.py:96:log_dist] [Rank 0] step=2310, skipped=32, lr=[4.171395469687636e-06, 4.171395469687636e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2309|ppo_ep: 1|act_loss: 0.034759521484375|cri_loss: 0.017852783203125|unsuper_loss: 0.0 +average reward score: 5.5703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.47s (21.70%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2310|ppo_ep: 1|act_loss: 0.018341064453125|cri_loss: 0.0097503662109375|unsuper_loss: 0.0 +average reward score: 5.41015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.88%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2311|ppo_ep: 1|act_loss: 0.01898193359375|cri_loss: 0.0101470947265625|unsuper_loss: 0.0 +average reward score: 5.68359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.27%) |Training time=0.48s (22.20%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.44 +[2023-04-14 10:12:53,077] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 2312|ppo_ep: 1|act_loss: 0.0770263671875|cri_loss: 0.04034423828125|unsuper_loss: 0.0 +average reward score: 5.78515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.49s (22.57%) |Others=0.09 (4.04%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.44 +[2023-04-14 10:12:55,219] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 2313|ppo_ep: 1|act_loss: 0.0029315948486328125|cri_loss: 0.0021209716796875|unsuper_loss: 0.0 +average reward score: 5.6171875 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.59s (69.81%) |Training time=0.47s (20.57%) |Others=0.22 (9.62%)|CurSamplesPerSec=14.08 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2314|ppo_ep: 1|act_loss: -0.02044677734375|cri_loss: -0.01010894775390625|unsuper_loss: 0.0 +average reward score: 4.8828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.90%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2315|ppo_ep: 1|act_loss: 0.0189056396484375|cri_loss: 0.01038360595703125|unsuper_loss: 0.0 +average reward score: 5.1796875 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.62s (71.70%) |Training time=0.50s (21.90%) |Others=0.14 (6.40%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2316|ppo_ep: 1|act_loss: 0.01366424560546875|cri_loss: 0.00711822509765625|unsuper_loss: 0.0 +average reward score: 5.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.82%) |Training time=0.46s (21.32%) |Others=0.11 (4.86%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2317|ppo_ep: 1|act_loss: 0.034149169921875|cri_loss: 0.0182647705078125|unsuper_loss: 0.0 +average reward score: 5.39453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.43%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2318|ppo_ep: 1|act_loss: -0.0007562637329101562|cri_loss: -0.00014853477478027344|unsuper_loss: 0.0 +average reward score: 5.55859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.59%) |Training time=0.47s (21.83%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.44 +[2023-04-14 10:13:08,300] [INFO] [logging.py:96:log_dist] [Rank 0] step=2320, skipped=26, lr=[8.028629188516887e-06, 8.028629188516887e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:13:08,539] [INFO] [timer.py:199:stop] epoch=0/micro_step=2320/global_step=2320, RunningAvgSamplesPerSec=106.84855308456247, CurrSamplesPerSec=61.47600233413061, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:13:08,632] [INFO] [logging.py:96:log_dist] [Rank 0] step=2320, skipped=34, lr=[4.165661383686135e-06, 4.165661383686135e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2319|ppo_ep: 1|act_loss: -0.05047607421875|cri_loss: -0.02435302734375|unsuper_loss: 0.0 +average reward score: 5.30859375 +------------------------------------------------------------------------------------- +|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.59s (67.09%) |Training time=0.68s (28.79%) |Others=0.10 (4.11%)|CurSamplesPerSec=13.49 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2320|ppo_ep: 1|act_loss: 0.0015115737915039062|cri_loss: 0.0011758804321289062|unsuper_loss: 0.0 +average reward score: 5.05859375 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.88%) |Training time=0.49s (20.92%) |Others=0.10 (4.19%)|CurSamplesPerSec=13.65 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2321|ppo_ep: 1|act_loss: -0.0006151199340820312|cri_loss: -3.0040740966796875e-05|unsuper_loss: 0.0 +average reward score: 5.44140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.73%) |Training time=0.47s (21.70%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2322|ppo_ep: 1|act_loss: -0.0045928955078125|cri_loss: -0.00206756591796875|unsuper_loss: 0.0 +average reward score: 5.6953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.38%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2323|ppo_ep: 1|act_loss: 0.0068511962890625|cri_loss: 0.00389862060546875|unsuper_loss: 0.0 +average reward score: 5.37890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.47s (21.63%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2324|ppo_ep: 1|act_loss: 0.016845703125|cri_loss: 0.0091400146484375|unsuper_loss: 0.0 +average reward score: 5.46484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.91%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2325|ppo_ep: 1|act_loss: -0.028961181640625|cri_loss: -0.01421356201171875|unsuper_loss: 0.0 +average reward score: 5.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.08s |Gather latency=0.00s (0.00%) |Generate time=1.59s (76.63%) |Training time=0.39s (18.61%) |Others=0.10 (4.76%)|CurSamplesPerSec=15.42 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2326|ppo_ep: 1|act_loss: 0.0291290283203125|cri_loss: 0.01497650146484375|unsuper_loss: 0.0 +average reward score: 5.06640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (22.07%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2327|ppo_ep: 1|act_loss: 0.020904541015625|cri_loss: 0.01064300537109375|unsuper_loss: 0.0 +average reward score: 5.41015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.46s (21.28%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44 +[2023-04-14 10:13:28,936] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 2328|ppo_ep: 1|act_loss: 0.05792236328125|cri_loss: 0.03179931640625|unsuper_loss: 0.0 +average reward score: 5.08203125 +------------------------------------------------------------------------------------- +|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.59s (75.04%) |Training time=0.43s (20.32%) |Others=0.10 (4.64%)|CurSamplesPerSec=15.13 |AvgSamplesPerSec=14.44 +[2023-04-14 10:13:31,062] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +[2023-04-14 10:13:31,063] [INFO] [logging.py:96:log_dist] [Rank 0] step=2330, skipped=28, lr=[8.017501515777759e-06, 8.017501515777759e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:13:31,063] [INFO] [timer.py:199:stop] epoch=0/micro_step=2330/global_step=2330, RunningAvgSamplesPerSec=106.86175914958172, CurrSamplesPerSec=123.63699739954438, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:13:31,155] [INFO] [logging.py:96:log_dist] [Rank 0] step=2330, skipped=34, lr=[4.158471562355229e-06, 4.158471562355229e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2329|ppo_ep: 1|act_loss: -0.018341064453125|cri_loss: -0.00897216796875|unsuper_loss: 0.0 +average reward score: 5.40234375 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.62%) |Training time=0.42s (19.78%) |Others=0.10 (4.59%)|CurSamplesPerSec=15.05 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2330|ppo_ep: 1|act_loss: 0.0045166015625|cri_loss: 0.002838134765625|unsuper_loss: 0.0 +average reward score: 4.87109375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.14%) |Training time=0.45s (21.26%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2331|ppo_ep: 1|act_loss: 0.00347137451171875|cri_loss: 0.0021820068359375|unsuper_loss: 0.0 +average reward score: 5.37109375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.17%) |Training time=0.45s (21.23%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2332|ppo_ep: 1|act_loss: -0.056640625|cri_loss: -0.02764892578125|unsuper_loss: 0.0 +average reward score: 5.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.34%) |Training time=0.45s (21.06%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2333|ppo_ep: 1|act_loss: -0.02557373046875|cri_loss: -0.01247406005859375|unsuper_loss: 0.0 +average reward score: 5.76953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.17%) |Training time=0.45s (21.24%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2334|ppo_ep: 1|act_loss: 0.012054443359375|cri_loss: 0.006420135498046875|unsuper_loss: 0.0 +average reward score: 4.94921875 +------------------------------------------------------------------------------------- +|E2E latency=2.43s |Gather latency=0.00s (0.00%) |Generate time=1.75s (71.93%) |Training time=0.45s (18.59%) |Others=0.23 (9.47%)|CurSamplesPerSec=13.14 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2335|ppo_ep: 1|act_loss: 0.03173828125|cri_loss: 0.01629638671875|unsuper_loss: 0.0 +average reward score: 5.796875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.22%) |Training time=0.45s (21.18%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2336|ppo_ep: 1|act_loss: 0.0335693359375|cri_loss: 0.0183258056640625|unsuper_loss: 0.0 +average reward score: 4.7265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.28%) |Training time=0.45s (21.12%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2337|ppo_ep: 1|act_loss: -0.042266845703125|cri_loss: -0.0205078125|unsuper_loss: 0.0 +average reward score: 5.2734375 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.27%) |Training time=0.45s (21.11%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2338|ppo_ep: 1|act_loss: 0.015228271484375|cri_loss: 0.0080108642578125|unsuper_loss: 0.0 +average reward score: 5.4921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.79%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44 +[2023-04-14 10:13:52,736] [INFO] [logging.py:96:log_dist] [Rank 0] step=2340, skipped=28, lr=[8.003549348716149e-06, 8.003549348716149e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:13:52,754] [INFO] [timer.py:199:stop] epoch=0/micro_step=2340/global_step=2340, RunningAvgSamplesPerSec=106.87153709245142, CurrSamplesPerSec=106.25905734856126, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:13:52,846] [INFO] [logging.py:96:log_dist] [Rank 0] step=2340, skipped=34, lr=[4.151257158474923e-06, 4.151257158474923e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2339|ppo_ep: 1|act_loss: 0.041748046875|cri_loss: 0.02178955078125|unsuper_loss: 0.0 +average reward score: 5.484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.76%) |Training time=0.46s (21.67%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2340|ppo_ep: 1|act_loss: -0.0221710205078125|cri_loss: -0.01041412353515625|unsuper_loss: 0.0 +average reward score: 5.3359375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.82%) |Training time=0.46s (21.58%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2341|ppo_ep: 1|act_loss: 0.018798828125|cri_loss: 0.0099029541015625|unsuper_loss: 0.0 +average reward score: 4.84765625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.10%) |Training time=0.46s (21.32%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2342|ppo_ep: 1|act_loss: 0.00762939453125|cri_loss: 0.007808685302734375|unsuper_loss: 0.0 +average reward score: 5.7578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.23%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2343|ppo_ep: 1|act_loss: -0.029632568359375|cri_loss: -0.01442718505859375|unsuper_loss: 0.0 +average reward score: 5.5 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.46s (21.22%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2344|ppo_ep: 1|act_loss: 0.008575439453125|cri_loss: 0.004589080810546875|unsuper_loss: 0.0 +average reward score: 5.109375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.68%) |Training time=0.45s (20.80%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2345|ppo_ep: 1|act_loss: 0.0219573974609375|cri_loss: 0.01154327392578125|unsuper_loss: 0.0 +average reward score: 5.65234375 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.69s (75.47%) |Training time=0.45s (20.13%) |Others=0.10 (4.39%)|CurSamplesPerSec=14.29 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2346|ppo_ep: 1|act_loss: 0.00861358642578125|cri_loss: 0.00467681884765625|unsuper_loss: 0.0 +average reward score: 5.6328125 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.32%) |Training time=0.45s (21.07%) |Others=0.10 (4.62%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2347|ppo_ep: 1|act_loss: 0.0147552490234375|cri_loss: 0.00815582275390625|unsuper_loss: 0.0 +average reward score: 5.31640625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.33%) |Training time=0.45s (21.07%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2348|ppo_ep: 1|act_loss: -0.0386962890625|cri_loss: -0.01849365234375|unsuper_loss: 0.0 +average reward score: 5.12109375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.37%) |Training time=0.45s (21.04%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.44 +[2023-04-14 10:14:14,328] [INFO] [logging.py:96:log_dist] [Rank 0] step=2350, skipped=28, lr=[7.989550067885356e-06, 7.989550067885356e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:14:14,860] [INFO] [timer.py:199:stop] epoch=0/micro_step=2350/global_step=2350, RunningAvgSamplesPerSec=106.79837881327074, CurrSamplesPerSec=38.86040216651423, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:14:14,953] [INFO] [logging.py:96:log_dist] [Rank 0] step=2350, skipped=34, lr=[4.1440182789800875e-06, 4.1440182789800875e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2349|ppo_ep: 1|act_loss: -0.01824951171875|cri_loss: -0.00884246826171875|unsuper_loss: 0.0 +average reward score: 5.453125 +------------------------------------------------------------------------------------- +|E2E latency=2.67s |Gather latency=0.00s (0.00%) |Generate time=1.59s (59.43%) |Training time=0.99s (36.91%) |Others=0.10 (3.65%)|CurSamplesPerSec=11.98 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2350|ppo_ep: 1|act_loss: 0.01251220703125|cri_loss: 0.00763702392578125|unsuper_loss: 0.0 +average reward score: 5.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.45s (21.20%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2351|ppo_ep: 1|act_loss: 0.006336212158203125|cri_loss: 0.0037937164306640625|unsuper_loss: 0.0 +average reward score: 5.35546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.34%) |Training time=0.45s (21.07%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2352|ppo_ep: 1|act_loss: -0.009918212890625|cri_loss: -0.004642486572265625|unsuper_loss: 0.0 +average reward score: 5.77734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.65%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2353|ppo_ep: 1|act_loss: 0.069091796875|cri_loss: 0.03546142578125|unsuper_loss: 0.0 +average reward score: 4.89453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.75%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2354|ppo_ep: 1|act_loss: 0.0645751953125|cri_loss: 0.033111572265625|unsuper_loss: 0.0 +average reward score: 5.7109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.94%) |Training time=0.46s (21.48%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2355|ppo_ep: 1|act_loss: -0.000469207763671875|cri_loss: 0.00017404556274414062|unsuper_loss: 0.0 +average reward score: 5.0859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.80%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2356|ppo_ep: 1|act_loss: 0.0271759033203125|cri_loss: 0.0141143798828125|unsuper_loss: 0.0 +average reward score: 5.109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.87%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2357|ppo_ep: 1|act_loss: -0.05279541015625|cri_loss: -0.025543212890625|unsuper_loss: 0.0 +average reward score: 5.7265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.55%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2358|ppo_ep: 1|act_loss: -0.038299560546875|cri_loss: -0.017913818359375|unsuper_loss: 0.0 +average reward score: 5.3671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.85%) |Training time=0.46s (21.58%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44 +[2023-04-14 10:14:36,326] [INFO] [logging.py:96:log_dist] [Rank 0] step=2360, skipped=28, lr=[7.975503880788498e-06, 7.975503880788498e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:14:36,344] [INFO] [timer.py:199:stop] epoch=0/micro_step=2360/global_step=2360, RunningAvgSamplesPerSec=106.79686323206259, CurrSamplesPerSec=106.73972149543116, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:14:36,437] [INFO] [logging.py:96:log_dist] [Rank 0] step=2360, skipped=34, lr=[4.13675503116838e-06, 4.13675503116838e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2359|ppo_ep: 1|act_loss: -0.00870513916015625|cri_loss: -0.003223419189453125|unsuper_loss: 0.0 +average reward score: 5.69140625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.81%) |Training time=0.46s (21.61%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2360|ppo_ep: 1|act_loss: -0.0218505859375|cri_loss: -0.010284423828125|unsuper_loss: 0.0 +average reward score: 5.37890625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.83%) |Training time=0.46s (21.59%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2361|ppo_ep: 1|act_loss: 0.003803253173828125|cri_loss: 0.002079010009765625|unsuper_loss: 0.0 +average reward score: 5.25390625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.04%) |Training time=0.46s (21.35%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2362|ppo_ep: 1|act_loss: 0.09405517578125|cri_loss: 0.04913330078125|unsuper_loss: 0.0 +average reward score: 6.08203125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.81%) |Training time=0.46s (21.61%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2363|ppo_ep: 1|act_loss: 0.03997802734375|cri_loss: 0.0205230712890625|unsuper_loss: 0.0 +average reward score: 5.0234375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.50%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2364|ppo_ep: 1|act_loss: -0.01141357421875|cri_loss: -0.00485992431640625|unsuper_loss: 0.0 +average reward score: 5.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.68s |Gather latency=0.00s (0.00%) |Generate time=1.58s (58.98%) |Training time=0.46s (17.17%) |Others=0.64 (23.86%)|CurSamplesPerSec=11.94 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2365|ppo_ep: 1|act_loss: 0.00457000732421875|cri_loss: 0.00278472900390625|unsuper_loss: 0.0 +average reward score: 4.96875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.90%) |Training time=0.46s (21.50%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2366|ppo_ep: 1|act_loss: 0.01678466796875|cri_loss: 0.00885772705078125|unsuper_loss: 0.0 +average reward score: 5.65234375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.14%) |Training time=0.45s (21.26%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2367|ppo_ep: 1|act_loss: -0.0163726806640625|cri_loss: -0.0079345703125|unsuper_loss: 0.0 +average reward score: 5.48828125 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.99%) |Training time=0.46s (21.40%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2368|ppo_ep: 1|act_loss: 0.0004329681396484375|cri_loss: 0.0009245872497558594|unsuper_loss: 0.0 +average reward score: 5.1171875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.72%) |Training time=0.46s (21.69%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44 +[2023-04-14 10:14:58,277] [INFO] [logging.py:96:log_dist] [Rank 0] step=2370, skipped=28, lr=[7.961410995623948e-06, 7.961410995623948e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:14:58,296] [INFO] [timer.py:199:stop] epoch=0/micro_step=2370/global_step=2370, RunningAvgSamplesPerSec=106.79987659925104, CurrSamplesPerSec=108.05257971649202, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:14:58,388] [INFO] [logging.py:96:log_dist] [Rank 0] step=2370, skipped=34, lr=[4.129467522698653e-06, 4.129467522698653e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2369|ppo_ep: 1|act_loss: -0.04071044921875|cri_loss: -0.01959228515625|unsuper_loss: 0.0 +average reward score: 5.02734375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.41%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2370|ppo_ep: 1|act_loss: -0.04754638671875|cri_loss: -0.023162841796875|unsuper_loss: 0.0 +average reward score: 5.46484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.87%) |Training time=0.46s (21.55%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2371|ppo_ep: 1|act_loss: -0.02392578125|cri_loss: -0.0097503662109375|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.05%) |Training time=0.46s (21.36%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2372|ppo_ep: 1|act_loss: 0.03375244140625|cri_loss: 0.0176544189453125|unsuper_loss: 0.0 +average reward score: 5.765625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.70%) |Training time=0.44s (20.72%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2373|ppo_ep: 1|act_loss: 0.03924560546875|cri_loss: 0.02032470703125|unsuper_loss: 0.0 +average reward score: 5.7109375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.96%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2374|ppo_ep: 1|act_loss: 0.0574951171875|cri_loss: 0.03045654296875|unsuper_loss: 0.0 +average reward score: 5.01171875 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.61%) |Training time=0.45s (20.03%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.20 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2375|ppo_ep: 1|act_loss: 0.0867919921875|cri_loss: 0.047027587890625|unsuper_loss: 0.0 +average reward score: 5.3203125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.13%) |Training time=0.46s (21.29%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2376|ppo_ep: 1|act_loss: 0.0019407272338867188|cri_loss: 0.00193023681640625|unsuper_loss: 0.0 +average reward score: 4.84765625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.05%) |Training time=0.46s (21.36%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2377|ppo_ep: 1|act_loss: -0.01904296875|cri_loss: -0.00902557373046875|unsuper_loss: 0.0 +average reward score: 4.78515625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.71%) |Training time=0.47s (21.71%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2378|ppo_ep: 1|act_loss: -0.0322265625|cri_loss: -0.01522064208984375|unsuper_loss: 0.0 +average reward score: 5.1640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.90%) |Training time=0.46s (21.53%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44 +[2023-04-14 10:15:19,996] [INFO] [logging.py:96:log_dist] [Rank 0] step=2380, skipped=28, lr=[7.947271621282263e-06, 7.947271621282263e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:15:20,014] [INFO] [timer.py:199:stop] epoch=0/micro_step=2380/global_step=2380, RunningAvgSamplesPerSec=106.80961062155866, CurrSamplesPerSec=108.14730586013292, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:15:20,107] [INFO] [logging.py:96:log_dist] [Rank 0] step=2380, skipped=34, lr=[4.122155861589364e-06, 4.122155861589364e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2379|ppo_ep: 1|act_loss: -0.03759765625|cri_loss: -0.0180816650390625|unsuper_loss: 0.0 +average reward score: 5.453125 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.98%) |Training time=0.46s (19.79%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.80 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2380|ppo_ep: 1|act_loss: -0.0775146484375|cri_loss: -0.037384033203125|unsuper_loss: 0.0 +average reward score: 5.37890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.38%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2381|ppo_ep: 1|act_loss: 0.005031585693359375|cri_loss: 0.0032672882080078125|unsuper_loss: 0.0 +average reward score: 5.078125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.38%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2382|ppo_ep: 1|act_loss: 0.00275421142578125|cri_loss: 0.0020999908447265625|unsuper_loss: 0.0 +average reward score: 5.24609375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.44%) |Training time=0.47s (21.99%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2383|ppo_ep: 1|act_loss: 0.025909423828125|cri_loss: 0.0133514404296875|unsuper_loss: 0.0 +average reward score: 5.1328125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.63%) |Training time=0.47s (21.79%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2384|ppo_ep: 1|act_loss: -0.0069427490234375|cri_loss: -0.003017425537109375|unsuper_loss: 0.0 +average reward score: 5.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.10%) |Training time=0.46s (21.36%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2385|ppo_ep: 1|act_loss: 0.0206756591796875|cri_loss: 0.01073455810546875|unsuper_loss: 0.0 +average reward score: 5.2109375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.97%) |Training time=0.46s (21.40%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2386|ppo_ep: 1|act_loss: 0.0285186767578125|cri_loss: 0.015472412109375|unsuper_loss: 0.0 +average reward score: 4.9453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.77%) |Training time=0.46s (21.63%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2387|ppo_ep: 1|act_loss: 0.01178741455078125|cri_loss: 0.006023406982421875|unsuper_loss: 0.0 +average reward score: 5.60546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.47s (21.62%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2388|ppo_ep: 1|act_loss: 0.00728607177734375|cri_loss: 0.004486083984375|unsuper_loss: 0.0 +average reward score: 5.4453125 +------------------------------------------------------------------------------------- +|E2E latency=3.55s |Gather latency=0.00s (0.00%) |Generate time=1.59s (44.74%) |Training time=0.47s (13.16%) |Others=1.49 (42.10%)|CurSamplesPerSec=9.02 |AvgSamplesPerSec=14.44 +[2023-04-14 10:15:42,901] [INFO] [logging.py:96:log_dist] [Rank 0] step=2390, skipped=28, lr=[7.933085967343084e-06, 7.933085967343084e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:15:42,920] [INFO] [timer.py:199:stop] epoch=0/micro_step=2390/global_step=2390, RunningAvgSamplesPerSec=106.80495752503842, CurrSamplesPerSec=102.83638302795524, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:15:43,012] [INFO] [logging.py:96:log_dist] [Rank 0] step=2390, skipped=34, lr=[4.114820156216969e-06, 4.114820156216969e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2389|ppo_ep: 1|act_loss: 0.00827789306640625|cri_loss: 0.004276275634765625|unsuper_loss: 0.0 +average reward score: 5.98828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.41%) |Training time=0.47s (22.04%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2390|ppo_ep: 1|act_loss: 0.07122802734375|cri_loss: 0.03704833984375|unsuper_loss: 0.0 +average reward score: 5.60546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.36%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2391|ppo_ep: 1|act_loss: -0.0092926025390625|cri_loss: -0.0042572021484375|unsuper_loss: 0.0 +average reward score: 5.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.96%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2392|ppo_ep: 1|act_loss: 0.034332275390625|cri_loss: 0.018798828125|unsuper_loss: 0.0 +average reward score: 5.453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.53%) |Training time=0.47s (21.91%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2393|ppo_ep: 1|act_loss: 0.005512237548828125|cri_loss: 0.0033626556396484375|unsuper_loss: 0.0 +average reward score: 5.21484375 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.59s (70.45%) |Training time=0.48s (21.54%) |Others=0.18 (8.01%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2394|ppo_ep: 1|act_loss: -0.0863037109375|cri_loss: -0.039642333984375|unsuper_loss: 0.0 +average reward score: 5.08984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.50%) |Training time=0.47s (21.93%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2395|ppo_ep: 1|act_loss: 0.0174560546875|cri_loss: 0.0109100341796875|unsuper_loss: 0.0 +average reward score: 5.234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.88%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2396|ppo_ep: 1|act_loss: 0.0147552490234375|cri_loss: 0.0081024169921875|unsuper_loss: 0.0 +average reward score: 5.15625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.87%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2397|ppo_ep: 1|act_loss: 0.04022216796875|cri_loss: 0.02227783203125|unsuper_loss: 0.0 +average reward score: 5.05078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.90%) |Training time=0.46s (21.53%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2398|ppo_ep: 1|act_loss: 0.00225067138671875|cri_loss: 0.002269744873046875|unsuper_loss: 0.0 +average reward score: 5.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.80%) |Training time=0.47s (21.64%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44 +[2023-04-14 10:16:04,555] [INFO] [logging.py:96:log_dist] [Rank 0] step=2400, skipped=28, lr=[7.918854244072016e-06, 7.918854244072016e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:16:04,930] [INFO] [timer.py:199:stop] epoch=0/micro_step=2400/global_step=2400, RunningAvgSamplesPerSec=106.73590195556389, CurrSamplesPerSec=48.33765913846442, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:16:05,022] [INFO] [logging.py:96:log_dist] [Rank 0] step=2400, skipped=34, lr=[4.107460515314316e-06, 4.107460515314316e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2399|ppo_ep: 1|act_loss: 0.03839111328125|cri_loss: 0.019744873046875|unsuper_loss: 0.0 +average reward score: 5.015625 +------------------------------------------------------------------------------------- +|E2E latency=2.51s |Gather latency=0.00s (0.00%) |Generate time=1.59s (63.25%) |Training time=0.83s (32.86%) |Others=0.10 (3.89%)|CurSamplesPerSec=12.74 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2400|ppo_ep: 1|act_loss: 0.0072479248046875|cri_loss: 0.0039520263671875|unsuper_loss: 0.0 +average reward score: 5.57421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.46%) |Training time=0.48s (22.00%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2401|ppo_ep: 1|act_loss: 0.014312744140625|cri_loss: 0.00835418701171875|unsuper_loss: 0.0 +average reward score: 5.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.94%) |Training time=0.46s (21.49%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2402|ppo_ep: 1|act_loss: -0.0147247314453125|cri_loss: -0.0068511962890625|unsuper_loss: 0.0 +average reward score: 4.9453125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.29%) |Training time=0.49s (22.20%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2403|ppo_ep: 1|act_loss: -0.0408935546875|cri_loss: -0.0194549560546875|unsuper_loss: 0.0 +average reward score: 5.0859375 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.99%) |Training time=0.47s (20.65%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.17 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2404|ppo_ep: 1|act_loss: -0.0189361572265625|cri_loss: -0.0086669921875|unsuper_loss: 0.0 +average reward score: 5.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.49%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2405|ppo_ep: 1|act_loss: -0.0101776123046875|cri_loss: -0.0037841796875|unsuper_loss: 0.0 +average reward score: 5.71875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.85%) |Training time=0.46s (21.57%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2406|ppo_ep: 1|act_loss: -0.02496337890625|cri_loss: -0.0120086669921875|unsuper_loss: 0.0 +average reward score: 5.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.52%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2407|ppo_ep: 1|act_loss: -0.023956298828125|cri_loss: -0.01080322265625|unsuper_loss: 0.0 +average reward score: 5.08203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.09%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2408|ppo_ep: 1|act_loss: -0.03350830078125|cri_loss: -0.0165557861328125|unsuper_loss: 0.0 +average reward score: 5.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.79%) |Training time=0.49s (21.01%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.44 +[2023-04-14 10:16:26,789] [INFO] [logging.py:96:log_dist] [Rank 0] step=2410, skipped=28, lr=[7.904576662417536e-06, 7.904576662417536e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:16:26,807] [INFO] [timer.py:199:stop] epoch=0/micro_step=2410/global_step=2410, RunningAvgSamplesPerSec=106.72267940839932, CurrSamplesPerSec=106.29027667458327, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:16:26,900] [INFO] [logging.py:96:log_dist] [Rank 0] step=2410, skipped=34, lr=[4.10007704796904e-06, 4.10007704796904e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2409|ppo_ep: 1|act_loss: 0.013824462890625|cri_loss: 0.00738525390625|unsuper_loss: 0.0 +average reward score: 5.3671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.52%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2410|ppo_ep: 1|act_loss: 0.06011962890625|cri_loss: 0.030853271484375|unsuper_loss: 0.0 +average reward score: 5.49609375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.51%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2411|ppo_ep: 1|act_loss: 0.0352783203125|cri_loss: 0.0180816650390625|unsuper_loss: 0.0 +average reward score: 5.81640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.46s (21.42%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2412|ppo_ep: 1|act_loss: -0.01898193359375|cri_loss: -0.0083770751953125|unsuper_loss: 0.0 +average reward score: 4.51953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.83%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2413|ppo_ep: 1|act_loss: 0.0094451904296875|cri_loss: 0.005046844482421875|unsuper_loss: 0.0 +average reward score: 5.3515625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.46s (21.62%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44 +[2023-04-14 10:16:37,982] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 2414|ppo_ep: 1|act_loss: 0.016632080078125|cri_loss: 0.00992584228515625|unsuper_loss: 0.0 +average reward score: 5.01171875 +------------------------------------------------------------------------------------- +|E2E latency=2.06s |Gather latency=0.00s (0.00%) |Generate time=1.58s (76.94%) |Training time=0.39s (18.77%) |Others=0.09 (4.29%)|CurSamplesPerSec=15.53 |AvgSamplesPerSec=14.44 +[2023-04-14 10:16:40,127] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 2415|ppo_ep: 1|act_loss: 0.0082550048828125|cri_loss: 0.00455474853515625|unsuper_loss: 0.0 +average reward score: 5.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.01%) |Training time=0.47s (21.90%) |Others=0.09 (4.09%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2416|ppo_ep: 1|act_loss: -0.0082855224609375|cri_loss: -0.002712249755859375|unsuper_loss: 0.0 +average reward score: 4.8359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.90%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2417|ppo_ep: 1|act_loss: -0.03045654296875|cri_loss: -0.0147552490234375|unsuper_loss: 0.0 +average reward score: 5.015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.34%) |Training time=0.48s (22.12%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2418|ppo_ep: 1|act_loss: -0.05352783203125|cri_loss: -0.0257720947265625|unsuper_loss: 0.0 +average reward score: 5.91796875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.07%) |Training time=0.46s (21.34%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44 +[2023-04-14 10:16:48,627] [INFO] [logging.py:96:log_dist] [Rank 0] step=2420, skipped=28, lr=[7.890253434007843e-06, 7.890253434007843e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:16:48,646] [INFO] [timer.py:199:stop] epoch=0/micro_step=2420/global_step=2420, RunningAvgSamplesPerSec=106.727923793645, CurrSamplesPerSec=107.89796226803395, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:16:48,739] [INFO] [logging.py:96:log_dist] [Rank 0] step=2420, skipped=36, lr=[4.09415319258652e-06, 4.09415319258652e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2419|ppo_ep: 1|act_loss: -0.021881103515625|cri_loss: -0.0089111328125|unsuper_loss: 0.0 +average reward score: 5.58203125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.98%) |Training time=0.46s (21.45%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2420|ppo_ep: 1|act_loss: -0.023956298828125|cri_loss: -0.0104522705078125|unsuper_loss: 0.0 +average reward score: 5.02734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.59%) |Training time=0.47s (21.82%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2421|ppo_ep: 1|act_loss: -0.0184326171875|cri_loss: -0.00875091552734375|unsuper_loss: 0.0 +average reward score: 4.83203125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.17%) |Training time=0.45s (21.23%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2422|ppo_ep: 1|act_loss: 0.0226287841796875|cri_loss: 0.0121002197265625|unsuper_loss: 0.0 +average reward score: 5.0 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.92%) |Training time=0.46s (21.49%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2423|ppo_ep: 1|act_loss: 0.03912353515625|cri_loss: 0.0204925537109375|unsuper_loss: 0.0 +average reward score: 4.7265625 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.76%) |Training time=0.46s (19.95%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.88 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2424|ppo_ep: 1|act_loss: -0.020233154296875|cri_loss: -0.009307861328125|unsuper_loss: 0.0 +average reward score: 5.34375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.88%) |Training time=0.46s (21.53%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2425|ppo_ep: 1|act_loss: -0.003818511962890625|cri_loss: -0.0005397796630859375|unsuper_loss: 0.0 +average reward score: 5.3671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.96%) |Training time=0.46s (21.45%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2426|ppo_ep: 1|act_loss: 0.0031452178955078125|cri_loss: 0.00262451171875|unsuper_loss: 0.0 +average reward score: 5.27734375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.92%) |Training time=0.46s (21.49%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2427|ppo_ep: 1|act_loss: 0.01540374755859375|cri_loss: 0.0081024169921875|unsuper_loss: 0.0 +average reward score: 5.15234375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.18%) |Training time=0.46s (21.24%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2428|ppo_ep: 1|act_loss: -0.0140380859375|cri_loss: -0.00598907470703125|unsuper_loss: 0.0 +average reward score: 5.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.22%) |Training time=0.45s (21.20%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44 +[2023-04-14 10:17:10,235] [INFO] [logging.py:96:log_dist] [Rank 0] step=2430, skipped=28, lr=[7.875884771147738e-06, 7.875884771147738e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:17:10,700] [INFO] [timer.py:199:stop] epoch=0/micro_step=2430/global_step=2430, RunningAvgSamplesPerSec=106.66533428350265, CurrSamplesPerSec=42.60237837151541, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:17:10,793] [INFO] [logging.py:96:log_dist] [Rank 0] step=2430, skipped=36, lr=[4.0867271136716215e-06, 4.0867271136716215e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2429|ppo_ep: 1|act_loss: 0.012298583984375|cri_loss: 0.00710296630859375|unsuper_loss: 0.0 +average reward score: 5.05859375 +------------------------------------------------------------------------------------- +|E2E latency=2.60s |Gather latency=0.00s (0.00%) |Generate time=1.59s (61.03%) |Training time=0.91s (35.20%) |Others=0.10 (3.77%)|CurSamplesPerSec=12.32 |AvgSamplesPerSec=14.44 +[2023-04-14 10:17:12,818] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 2430|ppo_ep: 1|act_loss: 0.060546875|cri_loss: 0.031036376953125|unsuper_loss: 0.0 +average reward score: 4.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.78%) |Training time=0.44s (20.58%) |Others=0.10 (4.64%)|CurSamplesPerSec=15.11 |AvgSamplesPerSec=14.44 +[2023-04-14 10:17:14,938] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 2431|ppo_ep: 1|act_loss: 0.031097412109375|cri_loss: 0.0160064697265625|unsuper_loss: 0.0 +average reward score: 5.7265625 +------------------------------------------------------------------------------------- +|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.71%) |Training time=0.42s (19.65%) |Others=0.10 (4.64%)|CurSamplesPerSec=15.09 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2432|ppo_ep: 1|act_loss: 0.023956298828125|cri_loss: 0.01248931884765625|unsuper_loss: 0.0 +average reward score: 5.125 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.06%) |Training time=0.50s (22.35%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.36 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2433|ppo_ep: 1|act_loss: -0.008575439453125|cri_loss: -0.0037174224853515625|unsuper_loss: 0.0 +average reward score: 5.54296875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.32%) |Training time=0.45s (21.08%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2434|ppo_ep: 1|act_loss: -0.03350830078125|cri_loss: -0.01513671875|unsuper_loss: 0.0 +average reward score: 5.0859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.00%) |Training time=0.46s (21.43%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2435|ppo_ep: 1|act_loss: -0.048828125|cri_loss: -0.0222625732421875|unsuper_loss: 0.0 +average reward score: 4.953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.05%) |Training time=0.46s (21.37%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2436|ppo_ep: 1|act_loss: -0.03192138671875|cri_loss: -0.01523590087890625|unsuper_loss: 0.0 +average reward score: 5.0859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.00%) |Training time=0.46s (21.43%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2437|ppo_ep: 1|act_loss: -0.03497314453125|cri_loss: -0.0169219970703125|unsuper_loss: 0.0 +average reward score: 5.203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.81%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2438|ppo_ep: 1|act_loss: -0.00943756103515625|cri_loss: -0.004093170166015625|unsuper_loss: 0.0 +average reward score: 4.97265625 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.73%) |Training time=0.47s (20.02%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.78 |AvgSamplesPerSec=14.44 +[2023-04-14 10:17:32,354] [INFO] [logging.py:96:log_dist] [Rank 0] step=2440, skipped=30, lr=[7.86435727115419e-06, 7.86435727115419e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:17:32,372] [INFO] [timer.py:199:stop] epoch=0/micro_step=2440/global_step=2440, RunningAvgSamplesPerSec=106.67192282118397, CurrSamplesPerSec=106.79475676768126, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:17:32,464] [INFO] [logging.py:96:log_dist] [Rank 0] step=2440, skipped=36, lr=[4.079277515633127e-06, 4.079277515633127e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2439|ppo_ep: 1|act_loss: -0.02801513671875|cri_loss: -0.01326751708984375|unsuper_loss: 0.0 +average reward score: 4.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.53%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2440|ppo_ep: 1|act_loss: 0.0024929046630859375|cri_loss: 0.002971649169921875|unsuper_loss: 0.0 +average reward score: 5.52734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.90%) |Training time=0.46s (21.54%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2441|ppo_ep: 1|act_loss: 0.0130157470703125|cri_loss: 0.00727081298828125|unsuper_loss: 0.0 +average reward score: 5.3046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.94%) |Training time=0.46s (21.48%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2442|ppo_ep: 1|act_loss: -0.056610107421875|cri_loss: -0.0220947265625|unsuper_loss: 0.0 +average reward score: 5.25 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.79%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2443|ppo_ep: 1|act_loss: 0.002925872802734375|cri_loss: 0.0020599365234375|unsuper_loss: 0.0 +average reward score: 5.28515625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.50%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2444|ppo_ep: 1|act_loss: -0.0277099609375|cri_loss: -0.0124053955078125|unsuper_loss: 0.0 +average reward score: 5.3828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.63%) |Training time=0.47s (21.80%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2445|ppo_ep: 1|act_loss: -0.01244354248046875|cri_loss: -0.00579071044921875|unsuper_loss: 0.0 +average reward score: 5.28125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.83%) |Training time=0.46s (21.58%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2446|ppo_ep: 1|act_loss: 0.003345489501953125|cri_loss: 0.0025730133056640625|unsuper_loss: 0.0 +average reward score: 5.1796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.85%) |Training time=0.46s (21.43%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2447|ppo_ep: 1|act_loss: 0.0196533203125|cri_loss: 0.010162353515625|unsuper_loss: 0.0 +average reward score: 5.50390625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.90%) |Training time=0.49s (22.59%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2448|ppo_ep: 1|act_loss: 0.00513458251953125|cri_loss: 0.003154754638671875|unsuper_loss: 0.0 +average reward score: 5.7578125 +------------------------------------------------------------------------------------- +|E2E latency=3.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (50.62%) |Training time=0.49s (15.71%) |Others=1.06 (33.67%)|CurSamplesPerSec=10.16 |AvgSamplesPerSec=14.44 +[2023-04-14 10:17:54,826] [INFO] [logging.py:96:log_dist] [Rank 0] step=2450, skipped=30, lr=[7.849907363439066e-06, 7.849907363439066e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:17:54,844] [INFO] [timer.py:199:stop] epoch=0/micro_step=2450/global_step=2450, RunningAvgSamplesPerSec=106.67218273543259, CurrSamplesPerSec=143.05433748050848, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:17:54,937] [INFO] [logging.py:96:log_dist] [Rank 0] step=2450, skipped=36, lr=[4.071804508892053e-06, 4.071804508892053e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2449|ppo_ep: 1|act_loss: 0.0030059814453125|cri_loss: 0.0019683837890625|unsuper_loss: 0.0 +average reward score: 5.13671875 +------------------------------------------------------------------------------------- +|E2E latency=2.09s |Gather latency=0.00s (0.00%) |Generate time=1.61s (76.84%) |Training time=0.39s (18.46%) |Others=0.10 (4.70%)|CurSamplesPerSec=15.28 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2450|ppo_ep: 1|act_loss: 0.0238037109375|cri_loss: 0.0124053955078125|unsuper_loss: 0.0 +average reward score: 5.75 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.47%) |Training time=0.48s (22.01%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2451|ppo_ep: 1|act_loss: 0.023345947265625|cri_loss: 0.01213836669921875|unsuper_loss: 0.0 +average reward score: 4.91015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.95%) |Training time=0.47s (21.52%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2452|ppo_ep: 1|act_loss: -0.028411865234375|cri_loss: -0.0133209228515625|unsuper_loss: 0.0 +average reward score: 5.86328125 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.61s (69.67%) |Training time=0.60s (26.07%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.87 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2453|ppo_ep: 1|act_loss: -0.03826904296875|cri_loss: -0.01776123046875|unsuper_loss: 0.0 +average reward score: 5.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.20%) |Training time=0.46s (21.27%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2454|ppo_ep: 1|act_loss: -0.0279541015625|cri_loss: -0.01248931884765625|unsuper_loss: 0.0 +average reward score: 5.98046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.85%) |Training time=0.47s (21.62%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2455|ppo_ep: 1|act_loss: -0.070068359375|cri_loss: -0.033843994140625|unsuper_loss: 0.0 +average reward score: 5.5 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.92%) |Training time=0.47s (21.53%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2456|ppo_ep: 1|act_loss: -0.0152740478515625|cri_loss: -0.00713348388671875|unsuper_loss: 0.0 +average reward score: 5.61328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.47s (21.47%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2457|ppo_ep: 1|act_loss: 0.03961181640625|cri_loss: 0.0210113525390625|unsuper_loss: 0.0 +average reward score: 5.94140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.77%) |Training time=0.47s (21.69%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2458|ppo_ep: 1|act_loss: 0.0264739990234375|cri_loss: 0.013702392578125|unsuper_loss: 0.0 +average reward score: 5.20703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.15%) |Training time=0.46s (21.30%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.44 +[2023-04-14 10:18:16,652] [INFO] [logging.py:96:log_dist] [Rank 0] step=2460, skipped=30, lr=[7.83541261929962e-06, 7.83541261929962e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:18:16,671] [INFO] [timer.py:199:stop] epoch=0/micro_step=2460/global_step=2460, RunningAvgSamplesPerSec=106.65022498334142, CurrSamplesPerSec=106.88601260323848, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:18:16,763] [INFO] [logging.py:96:log_dist] [Rank 0] step=2460, skipped=36, lr=[4.064308204216384e-06, 4.064308204216384e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2459|ppo_ep: 1|act_loss: 0.04412841796875|cri_loss: 0.0247955322265625|unsuper_loss: 0.0 +average reward score: 4.89453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.19%) |Training time=0.46s (21.30%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2460|ppo_ep: 1|act_loss: 0.00756072998046875|cri_loss: 0.004199981689453125|unsuper_loss: 0.0 +average reward score: 5.3046875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.90%) |Training time=0.47s (21.58%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2461|ppo_ep: 1|act_loss: -0.016754150390625|cri_loss: -0.00782012939453125|unsuper_loss: 0.0 +average reward score: 5.046875 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.36%) |Training time=0.50s (22.24%) |Others=0.10 (4.40%)|CurSamplesPerSec=14.30 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2462|ppo_ep: 1|act_loss: -0.0281982421875|cri_loss: -0.01314544677734375|unsuper_loss: 0.0 +average reward score: 5.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.72s (73.11%) |Training time=0.53s (22.69%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.64 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2463|ppo_ep: 1|act_loss: 0.015106201171875|cri_loss: 0.00778961181640625|unsuper_loss: 0.0 +average reward score: 6.09765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.79%) |Training time=0.45s (20.65%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2464|ppo_ep: 1|act_loss: 0.005290985107421875|cri_loss: 0.003032684326171875|unsuper_loss: 0.0 +average reward score: 5.55859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.44%) |Training time=0.45s (21.01%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2465|ppo_ep: 1|act_loss: 0.03558349609375|cri_loss: 0.018585205078125|unsuper_loss: 0.0 +average reward score: 5.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.61%) |Training time=0.45s (20.82%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2466|ppo_ep: 1|act_loss: 0.05950927734375|cri_loss: 0.03204345703125|unsuper_loss: 0.0 +average reward score: 5.8828125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.17%) |Training time=0.47s (21.34%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2467|ppo_ep: 1|act_loss: -0.024322509765625|cri_loss: -0.011688232421875|unsuper_loss: 0.0 +average reward score: 5.859375 +------------------------------------------------------------------------------------- +|E2E latency=2.83s |Gather latency=0.00s (0.00%) |Generate time=1.77s (62.55%) |Training time=0.47s (16.55%) |Others=0.59 (20.91%)|CurSamplesPerSec=11.32 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2468|ppo_ep: 1|act_loss: -0.0190582275390625|cri_loss: -0.0084686279296875|unsuper_loss: 0.0 +average reward score: 5.515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.32%) |Training time=0.46s (21.10%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.44 +[2023-04-14 10:18:39,237] [INFO] [logging.py:96:log_dist] [Rank 0] step=2470, skipped=30, lr=[7.820873253582933e-06, 7.820873253582933e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:18:39,255] [INFO] [timer.py:199:stop] epoch=0/micro_step=2470/global_step=2470, RunningAvgSamplesPerSec=106.64010750425537, CurrSamplesPerSec=108.77555969060602, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:18:39,348] [INFO] [logging.py:96:log_dist] [Rank 0] step=2470, skipped=36, lr=[4.0567887127194405e-06, 4.0567887127194405e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2469|ppo_ep: 1|act_loss: -0.0386962890625|cri_loss: -0.01812744140625|unsuper_loss: 0.0 +average reward score: 5.453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.33%) |Training time=0.46s (21.14%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2470|ppo_ep: 1|act_loss: -0.01959228515625|cri_loss: -0.00817108154296875|unsuper_loss: 0.0 +average reward score: 5.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.08%) |Training time=0.46s (21.37%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2471|ppo_ep: 1|act_loss: -0.060882568359375|cri_loss: -0.029144287109375|unsuper_loss: 0.0 +average reward score: 5.24609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.31%) |Training time=0.46s (21.13%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2472|ppo_ep: 1|act_loss: -0.0433349609375|cri_loss: -0.02105712890625|unsuper_loss: 0.0 +average reward score: 5.73046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.31%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2473|ppo_ep: 1|act_loss: 0.016021728515625|cri_loss: 0.00916290283203125|unsuper_loss: 0.0 +average reward score: 5.015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.57%) |Training time=0.45s (20.86%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2474|ppo_ep: 1|act_loss: 0.01537322998046875|cri_loss: 0.008544921875|unsuper_loss: 0.0 +average reward score: 5.8203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.46s (21.28%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2475|ppo_ep: 1|act_loss: 0.0110931396484375|cri_loss: 0.00629425048828125|unsuper_loss: 0.0 +average reward score: 5.01171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.29%) |Training time=0.46s (21.16%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2476|ppo_ep: 1|act_loss: 0.033050537109375|cri_loss: 0.01837158203125|unsuper_loss: 0.0 +average reward score: 5.48046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.10%) |Training time=0.46s (21.35%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2477|ppo_ep: 1|act_loss: 0.0188446044921875|cri_loss: 0.00989532470703125|unsuper_loss: 0.0 +average reward score: 5.63671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.35%) |Training time=0.46s (21.12%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2478|ppo_ep: 1|act_loss: 0.025146484375|cri_loss: 0.0129547119140625|unsuper_loss: 0.0 +average reward score: 5.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.99%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44 +[2023-04-14 10:19:00,844] [INFO] [logging.py:96:log_dist] [Rank 0] step=2480, skipped=30, lr=[7.806289481797477e-06, 7.806289481797477e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:19:00,863] [INFO] [timer.py:199:stop] epoch=0/micro_step=2480/global_step=2480, RunningAvgSamplesPerSec=106.6468968374762, CurrSamplesPerSec=107.46410836588056, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:19:00,955] [INFO] [logging.py:96:log_dist] [Rank 0] step=2480, skipped=36, lr=[4.049246145858227e-06, 4.049246145858227e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2479|ppo_ep: 1|act_loss: 0.01556396484375|cri_loss: 0.008331298828125|unsuper_loss: 0.0 +average reward score: 5.2734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.16%) |Training time=0.46s (21.31%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2480|ppo_ep: 1|act_loss: -0.0232086181640625|cri_loss: -0.01064300537109375|unsuper_loss: 0.0 +average reward score: 5.2734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.23%) |Training time=0.46s (21.21%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2481|ppo_ep: 1|act_loss: -0.02044677734375|cri_loss: -0.01001739501953125|unsuper_loss: 0.0 +average reward score: 5.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.16%) |Training time=0.46s (21.29%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2482|ppo_ep: 1|act_loss: -0.02606201171875|cri_loss: -0.01244354248046875|unsuper_loss: 0.0 +average reward score: 5.43359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.26%) |Training time=0.46s (21.18%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2483|ppo_ep: 1|act_loss: -0.022430419921875|cri_loss: -0.01071929931640625|unsuper_loss: 0.0 +average reward score: 5.38671875 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.22%) |Training time=0.45s (19.56%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2484|ppo_ep: 1|act_loss: -0.041107177734375|cri_loss: -0.0194549560546875|unsuper_loss: 0.0 +average reward score: 6.00390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.46s (21.17%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2485|ppo_ep: 1|act_loss: -0.0049285888671875|cri_loss: -0.0019235610961914062|unsuper_loss: 0.0 +average reward score: 5.7421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.98%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2486|ppo_ep: 1|act_loss: 0.01280975341796875|cri_loss: 0.006969451904296875|unsuper_loss: 0.0 +average reward score: 5.6484375 +------------------------------------------------------------------------------------- +|E2E latency=3.28s |Gather latency=0.00s (0.00%) |Generate time=1.61s (49.07%) |Training time=0.45s (13.68%) |Others=1.22 (37.25%)|CurSamplesPerSec=9.76 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2487|ppo_ep: 1|act_loss: -0.00832366943359375|cri_loss: -0.0038585662841796875|unsuper_loss: 0.0 +average reward score: 5.8203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.24%) |Training time=0.46s (21.21%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2488|ppo_ep: 1|act_loss: 0.04339599609375|cri_loss: 0.0228118896484375|unsuper_loss: 0.0 +average reward score: 5.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.40%) |Training time=0.45s (21.04%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.44 +[2023-04-14 10:19:23,722] [INFO] [logging.py:96:log_dist] [Rank 0] step=2490, skipped=30, lr=[7.791661520109931e-06, 7.791661520109931e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:19:23,741] [INFO] [timer.py:199:stop] epoch=0/micro_step=2490/global_step=2490, RunningAvgSamplesPerSec=106.6586371426018, CurrSamplesPerSec=114.51340619587567, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:19:23,833] [INFO] [logging.py:96:log_dist] [Rank 0] step=2490, skipped=36, lr=[4.041680615431779e-06, 4.041680615431779e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2489|ppo_ep: 1|act_loss: -0.00444793701171875|cri_loss: -0.0018949508666992188|unsuper_loss: 0.0 +average reward score: 5.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.01%) |Training time=0.44s (20.45%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2490|ppo_ep: 1|act_loss: -0.036956787109375|cri_loss: -0.0178070068359375|unsuper_loss: 0.0 +average reward score: 5.8828125 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.48%) |Training time=0.46s (20.95%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2491|ppo_ep: 1|act_loss: -0.022216796875|cri_loss: -0.0098419189453125|unsuper_loss: 0.0 +average reward score: 5.5 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.59%) |Training time=0.45s (20.84%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2492|ppo_ep: 1|act_loss: -0.011993408203125|cri_loss: -0.00548553466796875|unsuper_loss: 0.0 +average reward score: 5.8125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.99%) |Training time=0.44s (20.43%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2493|ppo_ep: 1|act_loss: 0.0078582763671875|cri_loss: 0.0042724609375|unsuper_loss: 0.0 +average reward score: 5.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.56%) |Training time=0.46s (20.92%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2494|ppo_ep: 1|act_loss: 0.00855255126953125|cri_loss: 0.00464630126953125|unsuper_loss: 0.0 +average reward score: 5.828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.93%) |Training time=0.47s (21.55%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2495|ppo_ep: 1|act_loss: -0.022430419921875|cri_loss: -0.0108795166015625|unsuper_loss: 0.0 +average reward score: 5.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.52%) |Training time=0.45s (20.92%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2496|ppo_ep: 1|act_loss: 0.018402099609375|cri_loss: 0.0096435546875|unsuper_loss: 0.0 +average reward score: 5.55859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.56%) |Training time=0.45s (20.88%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2497|ppo_ep: 1|act_loss: -0.00981903076171875|cri_loss: -0.004726409912109375|unsuper_loss: 0.0 +average reward score: 5.44921875 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.66s (73.36%) |Training time=0.50s (22.27%) |Others=0.10 (4.37%)|CurSamplesPerSec=14.16 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2498|ppo_ep: 1|act_loss: 0.0111083984375|cri_loss: 0.00756072998046875|unsuper_loss: 0.0 +average reward score: 5.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.53%) |Training time=0.45s (20.90%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44 +[2023-04-14 10:19:45,473] [INFO] [logging.py:96:log_dist] [Rank 0] step=2500, skipped=30, lr=[7.776989585341974e-06, 7.776989585341974e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:19:45,491] [INFO] [timer.py:199:stop] epoch=0/micro_step=2500/global_step=2500, RunningAvgSamplesPerSec=106.674029550773, CurrSamplesPerSec=108.61095709058188, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:19:45,584] [INFO] [logging.py:96:log_dist] [Rank 0] step=2500, skipped=36, lr=[4.034092233579507e-06, 4.034092233579507e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2499|ppo_ep: 1|act_loss: 0.00739288330078125|cri_loss: 0.003993988037109375|unsuper_loss: 0.0 +average reward score: 5.921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.28%) |Training time=0.46s (21.19%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2500|ppo_ep: 1|act_loss: 0.02899169921875|cri_loss: 0.015228271484375|unsuper_loss: 0.0 +average reward score: 5.62109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.29%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2501|ppo_ep: 1|act_loss: 0.0305938720703125|cri_loss: 0.0160675048828125|unsuper_loss: 0.0 +average reward score: 5.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.08%) |Training time=0.46s (21.37%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2502|ppo_ep: 1|act_loss: -0.043670654296875|cri_loss: -0.02056884765625|unsuper_loss: 0.0 +average reward score: 5.40234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.29%) |Training time=0.46s (21.15%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2503|ppo_ep: 1|act_loss: -0.059326171875|cri_loss: -0.02783203125|unsuper_loss: 0.0 +average reward score: 6.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.34%) |Training time=0.45s (21.09%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2504|ppo_ep: 1|act_loss: -0.0218048095703125|cri_loss: -0.01064300537109375|unsuper_loss: 0.0 +average reward score: 5.8125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.44%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2505|ppo_ep: 1|act_loss: 0.0115966796875|cri_loss: 0.006610870361328125|unsuper_loss: 0.0 +average reward score: 5.53515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.34%) |Training time=0.46s (21.11%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2506|ppo_ep: 1|act_loss: 0.0372314453125|cri_loss: 0.019439697265625|unsuper_loss: 0.0 +average reward score: 4.828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.94%) |Training time=0.47s (21.52%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2507|ppo_ep: 1|act_loss: 0.019927978515625|cri_loss: 0.01120758056640625|unsuper_loss: 0.0 +average reward score: 5.12109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.87%) |Training time=0.44s (20.55%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2508|ppo_ep: 1|act_loss: -0.009979248046875|cri_loss: -0.00489044189453125|unsuper_loss: 0.0 +average reward score: 5.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.14%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45 +[2023-04-14 10:20:07,063] [INFO] [logging.py:96:log_dist] [Rank 0] step=2510, skipped=30, lr=[7.762273894967078e-06, 7.762273894967078e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:20:07,081] [INFO] [timer.py:199:stop] epoch=0/micro_step=2510/global_step=2510, RunningAvgSamplesPerSec=106.67546349044484, CurrSamplesPerSec=101.66207505639152, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:20:07,174] [INFO] [logging.py:96:log_dist] [Rank 0] step=2510, skipped=36, lr=[4.0264811127795375e-06, 4.0264811127795375e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2509|ppo_ep: 1|act_loss: 0.059600830078125|cri_loss: 0.034027099609375|unsuper_loss: 0.0 +average reward score: 4.93359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.34%) |Training time=0.48s (22.12%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2510|ppo_ep: 1|act_loss: -0.0127716064453125|cri_loss: -0.00566864013671875|unsuper_loss: 0.0 +average reward score: 5.8125 +------------------------------------------------------------------------------------- +|E2E latency=2.90s |Gather latency=0.00s (0.00%) |Generate time=1.58s (54.53%) |Training time=0.47s (16.30%) |Others=0.85 (29.17%)|CurSamplesPerSec=11.04 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2511|ppo_ep: 1|act_loss: 0.017974853515625|cri_loss: 0.0095367431640625|unsuper_loss: 0.0 +average reward score: 4.94140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.95%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2512|ppo_ep: 1|act_loss: -0.02691650390625|cri_loss: -0.01318359375|unsuper_loss: 0.0 +average reward score: 5.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.82%) |Training time=0.48s (21.59%) |Others=0.15 (6.59%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2513|ppo_ep: 1|act_loss: 0.04351806640625|cri_loss: 0.025054931640625|unsuper_loss: 0.0 +average reward score: 5.44921875 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.66%) |Training time=0.47s (20.99%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.17 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2514|ppo_ep: 1|act_loss: 0.0301361083984375|cri_loss: 0.0162506103515625|unsuper_loss: 0.0 +average reward score: 5.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.81%) |Training time=0.49s (22.68%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2515|ppo_ep: 1|act_loss: 0.0187225341796875|cri_loss: 0.0095977783203125|unsuper_loss: 0.0 +average reward score: 5.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.79%) |Training time=0.47s (21.65%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.44 +[2023-04-14 10:20:23,173] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 2516|ppo_ep: 1|act_loss: -0.0048065185546875|cri_loss: -0.0020961761474609375|unsuper_loss: 0.0 +average reward score: 5.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.86%) |Training time=0.47s (22.07%) |Others=0.09 (4.08%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44 +[2023-04-14 10:20:25,315] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 2517|ppo_ep: 1|act_loss: -0.00971221923828125|cri_loss: -0.0034580230712890625|unsuper_loss: 0.0 +average reward score: 5.328125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.93%) |Training time=0.47s (21.99%) |Others=0.09 (4.08%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2518|ppo_ep: 1|act_loss: 0.0168914794921875|cri_loss: 0.01013946533203125|unsuper_loss: 0.0 +average reward score: 5.2734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.92%) |Training time=0.47s (21.53%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.44 +[2023-04-14 10:20:29,556] [INFO] [logging.py:96:log_dist] [Rank 0] step=2520, skipped=30, lr=[7.747514667107269e-06, 7.747514667107269e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:20:29,574] [INFO] [timer.py:199:stop] epoch=0/micro_step=2520/global_step=2520, RunningAvgSamplesPerSec=106.65882156515856, CurrSamplesPerSec=102.7891293792571, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:20:29,667] [INFO] [logging.py:96:log_dist] [Rank 0] step=2520, skipped=38, lr=[4.020375919897733e-06, 4.020375919897733e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2519|ppo_ep: 1|act_loss: 0.005268096923828125|cri_loss: 0.003143310546875|unsuper_loss: 0.0 +average reward score: 5.7578125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.89%) |Training time=0.47s (21.65%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2520|ppo_ep: 1|act_loss: 0.034912109375|cri_loss: 0.0180206298828125|unsuper_loss: 0.0 +average reward score: 5.859375 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.09%) |Training time=0.47s (20.58%) |Others=0.10 (4.33%)|CurSamplesPerSec=14.10 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2521|ppo_ep: 1|act_loss: 0.02166748046875|cri_loss: 0.0110626220703125|unsuper_loss: 0.0 +average reward score: 5.53515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.97%) |Training time=0.47s (21.50%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2522|ppo_ep: 1|act_loss: 0.0024871826171875|cri_loss: 0.0013885498046875|unsuper_loss: 0.0 +average reward score: 5.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.63%) |Training time=0.47s (21.84%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2523|ppo_ep: 1|act_loss: -0.024139404296875|cri_loss: -0.01153564453125|unsuper_loss: 0.0 +average reward score: 5.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.85%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2524|ppo_ep: 1|act_loss: -0.0002079010009765625|cri_loss: 0.0004534721374511719|unsuper_loss: 0.0 +average reward score: 5.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.47s (21.96%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2525|ppo_ep: 1|act_loss: -0.00968170166015625|cri_loss: -0.0042877197265625|unsuper_loss: 0.0 +average reward score: 5.015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.48s (22.08%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2526|ppo_ep: 1|act_loss: -0.06610107421875|cri_loss: -0.03070068359375|unsuper_loss: 0.0 +average reward score: 5.44140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.31%) |Training time=0.48s (22.12%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2527|ppo_ep: 1|act_loss: -0.04644775390625|cri_loss: -0.0222320556640625|unsuper_loss: 0.0 +average reward score: 5.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.56%) |Training time=0.47s (21.46%) |Others=0.13 (5.99%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2528|ppo_ep: 1|act_loss: -0.002399444580078125|cri_loss: -0.00034809112548828125|unsuper_loss: 0.0 +average reward score: 5.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.71s (74.85%) |Training time=0.47s (20.52%) |Others=0.11 (4.63%)|CurSamplesPerSec=14.00 |AvgSamplesPerSec=14.45 +[2023-04-14 10:20:51,418] [INFO] [logging.py:96:log_dist] [Rank 0] step=2530, skipped=30, lr=[7.732712120529913e-06, 7.732712120529913e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:20:51,436] [INFO] [timer.py:199:stop] epoch=0/micro_step=2530/global_step=2530, RunningAvgSamplesPerSec=106.64682603841231, CurrSamplesPerSec=102.19486489141136, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:20:51,529] [INFO] [logging.py:96:log_dist] [Rank 0] step=2530, skipped=38, lr=[4.012724153511568e-06, 4.012724153511568e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2529|ppo_ep: 1|act_loss: -0.02001953125|cri_loss: -0.00980377197265625|unsuper_loss: 0.0 +average reward score: 6.09765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.48s (22.08%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2530|ppo_ep: 1|act_loss: 0.0421142578125|cri_loss: 0.022308349609375|unsuper_loss: 0.0 +average reward score: 5.67578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.66%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2531|ppo_ep: 1|act_loss: 0.024688720703125|cri_loss: 0.0128326416015625|unsuper_loss: 0.0 +average reward score: 5.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.82%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45 +[2023-04-14 10:20:57,867] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 2532|ppo_ep: 1|act_loss: 0.07666015625|cri_loss: 0.041778564453125|unsuper_loss: 0.0 +average reward score: 5.01953125 +------------------------------------------------------------------------------------- +|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.67%) |Training time=0.44s (20.73%) |Others=0.10 (4.60%)|CurSamplesPerSec=15.09 |AvgSamplesPerSec=14.45 +[2023-04-14 10:20:59,996] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 2533|ppo_ep: 1|act_loss: 0.046142578125|cri_loss: 0.0245513916015625|unsuper_loss: 0.0 +average reward score: 5.87890625 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.53%) |Training time=0.44s (20.87%) |Others=0.10 (4.60%)|CurSamplesPerSec=15.02 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2534|ppo_ep: 1|act_loss: -7.677078247070312e-05|cri_loss: 0.0001392364501953125|unsuper_loss: 0.0 +average reward score: 5.8984375 +------------------------------------------------------------------------------------- +|E2E latency=2.60s |Gather latency=0.00s (0.00%) |Generate time=1.58s (60.80%) |Training time=0.48s (18.31%) |Others=0.54 (20.90%)|CurSamplesPerSec=12.29 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2535|ppo_ep: 1|act_loss: 0.00254058837890625|cri_loss: 0.0013990402221679688|unsuper_loss: 0.0 +average reward score: 5.76171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.86%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2536|ppo_ep: 1|act_loss: -0.01519012451171875|cri_loss: -0.00730133056640625|unsuper_loss: 0.0 +average reward score: 5.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.61%) |Training time=0.45s (20.84%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2537|ppo_ep: 1|act_loss: -0.02679443359375|cri_loss: -0.01297760009765625|unsuper_loss: 0.0 +average reward score: 5.33984375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.51%) |Training time=0.45s (20.89%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2538|ppo_ep: 1|act_loss: -0.04571533203125|cri_loss: -0.022308349609375|unsuper_loss: 0.0 +average reward score: 5.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.64%) |Training time=0.47s (21.80%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45 +[2023-04-14 10:21:13,351] [INFO] [logging.py:96:log_dist] [Rank 0] step=2540, skipped=32, lr=[7.720839041213051e-06, 7.720839041213051e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:21:13,369] [INFO] [timer.py:199:stop] epoch=0/micro_step=2540/global_step=2540, RunningAvgSamplesPerSec=106.64877381183305, CurrSamplesPerSec=102.9472844528714, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:21:13,462] [INFO] [logging.py:96:log_dist] [Rank 0] step=2540, skipped=38, lr=[4.0050499649041985e-06, 4.0050499649041985e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2539|ppo_ep: 1|act_loss: -0.027313232421875|cri_loss: -0.01280975341796875|unsuper_loss: 0.0 +average reward score: 5.37109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.94%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2540|ppo_ep: 1|act_loss: 0.001941680908203125|cri_loss: 0.0014667510986328125|unsuper_loss: 0.0 +average reward score: 5.3671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.54%) |Training time=0.47s (21.90%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2541|ppo_ep: 1|act_loss: 0.00600433349609375|cri_loss: 0.003421783447265625|unsuper_loss: 0.0 +average reward score: 5.39453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.44%) |Training time=0.47s (22.01%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2542|ppo_ep: 1|act_loss: 0.00748443603515625|cri_loss: 0.004150390625|unsuper_loss: 0.0 +average reward score: 5.57421875 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.58%) |Training time=0.47s (20.19%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.80 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2543|ppo_ep: 1|act_loss: 0.025390625|cri_loss: 0.014404296875|unsuper_loss: 0.0 +average reward score: 5.6015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.86%) |Training time=0.46s (21.58%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2544|ppo_ep: 1|act_loss: 0.011566162109375|cri_loss: 0.00630950927734375|unsuper_loss: 0.0 +average reward score: 5.546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.73%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2545|ppo_ep: 1|act_loss: 0.0166015625|cri_loss: 0.0087432861328125|unsuper_loss: 0.0 +average reward score: 4.2734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.50%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2546|ppo_ep: 1|act_loss: 0.0025577545166015625|cri_loss: 0.001453399658203125|unsuper_loss: 0.0 +average reward score: 5.66015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.74%) |Training time=0.47s (21.70%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2547|ppo_ep: 1|act_loss: -0.005771636962890625|cri_loss: -0.002132415771484375|unsuper_loss: 0.0 +average reward score: 5.41015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.67%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2548|ppo_ep: 1|act_loss: -0.0005550384521484375|cri_loss: 0.0006742477416992188|unsuper_loss: 0.0 +average reward score: 5.48046875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.15%) |Training time=0.46s (21.34%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.45 +[2023-04-14 10:21:35,117] [INFO] [logging.py:96:log_dist] [Rank 0] step=2550, skipped=32, lr=[7.705959074285325e-06, 7.705959074285325e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:21:35,135] [INFO] [timer.py:199:stop] epoch=0/micro_step=2550/global_step=2550, RunningAvgSamplesPerSec=106.64088588508935, CurrSamplesPerSec=103.17353387250142, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:21:35,228] [INFO] [logging.py:96:log_dist] [Rank 0] step=2550, skipped=38, lr=[3.997353467825614e-06, 3.997353467825614e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2549|ppo_ep: 1|act_loss: -0.033782958984375|cri_loss: -0.016387939453125|unsuper_loss: 0.0 +average reward score: 5.765625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.02%) |Training time=0.47s (21.52%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2550|ppo_ep: 1|act_loss: -0.016998291015625|cri_loss: -0.0077667236328125|unsuper_loss: 0.0 +average reward score: 5.6484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.46s (21.61%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2551|ppo_ep: 1|act_loss: -0.00411224365234375|cri_loss: -0.0017375946044921875|unsuper_loss: 0.0 +average reward score: 5.66015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.13%) |Training time=0.46s (21.29%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2552|ppo_ep: 1|act_loss: 0.027069091796875|cri_loss: 0.01416778564453125|unsuper_loss: 0.0 +average reward score: 5.40234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.59%) |Training time=0.45s (20.85%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2553|ppo_ep: 1|act_loss: 0.00783538818359375|cri_loss: 0.004398345947265625|unsuper_loss: 0.0 +average reward score: 5.63671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.28%) |Training time=0.46s (21.15%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2554|ppo_ep: 1|act_loss: -0.0142974853515625|cri_loss: -0.00685882568359375|unsuper_loss: 0.0 +average reward score: 5.29296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.08%) |Training time=0.46s (21.34%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2555|ppo_ep: 1|act_loss: -0.004123687744140625|cri_loss: -0.001667022705078125|unsuper_loss: 0.0 +average reward score: 5.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.69%) |Training time=0.45s (20.73%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2556|ppo_ep: 1|act_loss: -0.0052032470703125|cri_loss: -0.0022716522216796875|unsuper_loss: 0.0 +average reward score: 5.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.17%) |Training time=0.46s (21.26%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2557|ppo_ep: 1|act_loss: -0.01218414306640625|cri_loss: -0.0057220458984375|unsuper_loss: 0.0 +average reward score: 5.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.84s |Gather latency=0.00s (0.00%) |Generate time=1.77s (62.32%) |Training time=0.45s (16.02%) |Others=0.61 (21.65%)|CurSamplesPerSec=11.27 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2558|ppo_ep: 1|act_loss: 0.0281524658203125|cri_loss: 0.0150299072265625|unsuper_loss: 0.0 +average reward score: 5.12890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.94%) |Training time=0.47s (21.51%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.45 +[2023-04-14 10:21:57,356] [INFO] [logging.py:96:log_dist] [Rank 0] step=2560, skipped=32, lr=[7.691036404594183e-06, 7.691036404594183e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:21:57,374] [INFO] [timer.py:199:stop] epoch=0/micro_step=2560/global_step=2560, RunningAvgSamplesPerSec=106.64851899366893, CurrSamplesPerSec=108.4322206360786, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:21:57,467] [INFO] [logging.py:96:log_dist] [Rank 0] step=2560, skipped=38, lr=[3.989634776356468e-06, 3.989634776356468e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2559|ppo_ep: 1|act_loss: -0.00821685791015625|cri_loss: -0.0036067962646484375|unsuper_loss: 0.0 +average reward score: 4.890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.26%) |Training time=0.46s (21.21%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2560|ppo_ep: 1|act_loss: -0.01593017578125|cri_loss: -0.0075836181640625|unsuper_loss: 0.0 +average reward score: 5.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.24%) |Training time=0.46s (21.20%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2561|ppo_ep: 1|act_loss: 0.012786865234375|cri_loss: 0.006656646728515625|unsuper_loss: 0.0 +average reward score: 5.96875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.29%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2562|ppo_ep: 1|act_loss: 0.0024127960205078125|cri_loss: 0.0014629364013671875|unsuper_loss: 0.0 +average reward score: 5.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.30%) |Training time=0.45s (20.98%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2563|ppo_ep: 1|act_loss: -0.028656005859375|cri_loss: -0.0138397216796875|unsuper_loss: 0.0 +average reward score: 4.98046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.35%) |Training time=0.45s (21.08%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2564|ppo_ep: 1|act_loss: 0.01380157470703125|cri_loss: 0.007137298583984375|unsuper_loss: 0.0 +average reward score: 5.71875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.32%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2565|ppo_ep: 1|act_loss: 0.05413818359375|cri_loss: 0.0276336669921875|unsuper_loss: 0.0 +average reward score: 5.00390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.49%) |Training time=0.45s (20.94%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2566|ppo_ep: 1|act_loss: 0.01453399658203125|cri_loss: 0.009857177734375|unsuper_loss: 0.0 +average reward score: 6.71875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.51%) |Training time=0.45s (20.92%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2567|ppo_ep: 1|act_loss: -0.01490020751953125|cri_loss: -0.0070953369140625|unsuper_loss: 0.0 +average reward score: 5.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.22%) |Training time=0.43s (20.18%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2568|ppo_ep: 1|act_loss: -0.03192138671875|cri_loss: -0.015533447265625|unsuper_loss: 0.0 +average reward score: 5.5 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.41%) |Training time=0.45s (21.03%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45 +[2023-04-14 10:22:18,902] [INFO] [logging.py:96:log_dist] [Rank 0] step=2570, skipped=32, lr=[7.676071253329589e-06, 7.676071253329589e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:22:18,921] [INFO] [timer.py:199:stop] epoch=0/micro_step=2570/global_step=2570, RunningAvgSamplesPerSec=106.66218526568217, CurrSamplesPerSec=109.55198828552562, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:22:19,013] [INFO] [logging.py:96:log_dist] [Rank 0] step=2570, skipped=38, lr=[3.981894004906388e-06, 3.981894004906388e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2569|ppo_ep: 1|act_loss: -0.0310821533203125|cri_loss: -0.0146331787109375|unsuper_loss: 0.0 +average reward score: 5.37890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.33%) |Training time=0.45s (21.12%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2570|ppo_ep: 1|act_loss: 0.0008916854858398438|cri_loss: 0.0008840560913085938|unsuper_loss: 0.0 +average reward score: 5.7109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.32%) |Training time=0.45s (21.10%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2571|ppo_ep: 1|act_loss: 0.0136871337890625|cri_loss: 0.007045745849609375|unsuper_loss: 0.0 +average reward score: 5.78515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.47%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2572|ppo_ep: 1|act_loss: 0.0029239654541015625|cri_loss: 0.0018987655639648438|unsuper_loss: 0.0 +average reward score: 5.28125 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.68%) |Training time=0.47s (20.12%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2573|ppo_ep: 1|act_loss: 0.03326416015625|cri_loss: 0.0170135498046875|unsuper_loss: 0.0 +average reward score: 5.41796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.32%) |Training time=0.45s (21.11%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2574|ppo_ep: 1|act_loss: 0.0264892578125|cri_loss: 0.0137939453125|unsuper_loss: 0.0 +average reward score: 5.44921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.24%) |Training time=0.45s (21.09%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2575|ppo_ep: 1|act_loss: -0.0259246826171875|cri_loss: -0.01274871826171875|unsuper_loss: 0.0 +average reward score: 5.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.65%) |Training time=0.45s (20.83%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2576|ppo_ep: 1|act_loss: -0.0147552490234375|cri_loss: -0.006183624267578125|unsuper_loss: 0.0 +average reward score: 5.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.46s (21.19%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2577|ppo_ep: 1|act_loss: -0.03240966796875|cri_loss: -0.0154571533203125|unsuper_loss: 0.0 +average reward score: 5.23046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.39%) |Training time=0.46s (21.07%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2578|ppo_ep: 1|act_loss: -0.04736328125|cri_loss: -0.0228118896484375|unsuper_loss: 0.0 +average reward score: 5.10546875 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.38%) |Training time=0.47s (21.17%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.45 +[2023-04-14 10:22:40,839] [INFO] [logging.py:96:log_dist] [Rank 0] step=2580, skipped=32, lr=[7.661063842311183e-06, 7.661063842311183e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:22:40,858] [INFO] [timer.py:199:stop] epoch=0/micro_step=2580/global_step=2580, RunningAvgSamplesPerSec=106.6658442359836, CurrSamplesPerSec=105.24808684721674, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:22:40,951] [INFO] [logging.py:96:log_dist] [Rank 0] step=2580, skipped=38, lr=[3.97413126821228e-06, 3.97413126821228e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2579|ppo_ep: 1|act_loss: -0.0352783203125|cri_loss: -0.017059326171875|unsuper_loss: 0.0 +average reward score: 5.04296875 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.16%) |Training time=0.47s (20.53%) |Others=0.10 (4.32%)|CurSamplesPerSec=14.07 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2580|ppo_ep: 1|act_loss: -0.022186279296875|cri_loss: -0.0107574462890625|unsuper_loss: 0.0 +average reward score: 5.1796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.46s (21.23%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2581|ppo_ep: 1|act_loss: 0.00836181640625|cri_loss: 0.00673675537109375|unsuper_loss: 0.0 +average reward score: 5.33984375 +------------------------------------------------------------------------------------- +|E2E latency=2.71s |Gather latency=0.00s (0.00%) |Generate time=1.62s (59.69%) |Training time=0.44s (16.41%) |Others=0.65 (23.90%)|CurSamplesPerSec=11.80 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2582|ppo_ep: 1|act_loss: -0.00373077392578125|cri_loss: -0.0012674331665039062|unsuper_loss: 0.0 +average reward score: 5.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.29%) |Training time=0.46s (21.17%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2583|ppo_ep: 1|act_loss: 0.018829345703125|cri_loss: 0.0101776123046875|unsuper_loss: 0.0 +average reward score: 5.19921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.86%) |Training time=0.47s (21.59%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2584|ppo_ep: 1|act_loss: 0.004974365234375|cri_loss: 0.0033092498779296875|unsuper_loss: 0.0 +average reward score: 5.65234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.31%) |Training time=0.45s (21.10%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2585|ppo_ep: 1|act_loss: -0.0110931396484375|cri_loss: -0.00537109375|unsuper_loss: 0.0 +average reward score: 5.1015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.49%) |Training time=0.45s (20.93%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2586|ppo_ep: 1|act_loss: -0.0142364501953125|cri_loss: -0.0065460205078125|unsuper_loss: 0.0 +average reward score: 4.75390625 +------------------------------------------------------------------------------------- +|E2E latency=2.89s |Gather latency=0.00s (0.00%) |Generate time=1.76s (60.97%) |Training time=0.48s (16.67%) |Others=0.65 (22.36%)|CurSamplesPerSec=11.08 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2587|ppo_ep: 1|act_loss: 0.0120391845703125|cri_loss: 0.00667572021484375|unsuper_loss: 0.0 +average reward score: 5.50390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.46s (21.22%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2588|ppo_ep: 1|act_loss: -0.03863525390625|cri_loss: -0.017364501953125|unsuper_loss: 0.0 +average reward score: 5.12109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (21.04%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45 +[2023-04-14 10:23:03,693] [INFO] [logging.py:96:log_dist] [Rank 0] step=2590, skipped=32, lr=[7.646014393985005e-06, 7.646014393985005e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:23:03,712] [INFO] [timer.py:199:stop] epoch=0/micro_step=2590/global_step=2590, RunningAvgSamplesPerSec=106.671823033167, CurrSamplesPerSec=107.67645173531136, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:23:03,805] [INFO] [logging.py:96:log_dist] [Rank 0] step=2590, skipped=38, lr=[3.96634668133663e-06, 3.96634668133663e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2589|ppo_ep: 1|act_loss: -0.048431396484375|cri_loss: -0.0231170654296875|unsuper_loss: 0.0 +average reward score: 5.61328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.13%) |Training time=0.46s (21.32%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2590|ppo_ep: 1|act_loss: -0.047698974609375|cri_loss: -0.02325439453125|unsuper_loss: 0.0 +average reward score: 5.1953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.32%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2591|ppo_ep: 1|act_loss: -0.00933837890625|cri_loss: -0.00406646728515625|unsuper_loss: 0.0 +average reward score: 5.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.10%) |Training time=0.46s (21.33%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2592|ppo_ep: 1|act_loss: 0.00988006591796875|cri_loss: 0.00601959228515625|unsuper_loss: 0.0 +average reward score: 4.85546875 +------------------------------------------------------------------------------------- +|E2E latency=2.60s |Gather latency=0.00s (0.00%) |Generate time=1.59s (61.40%) |Training time=0.46s (17.59%) |Others=0.55 (21.01%)|CurSamplesPerSec=12.33 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2593|ppo_ep: 1|act_loss: 0.0369873046875|cri_loss: 0.019439697265625|unsuper_loss: 0.0 +average reward score: 5.5703125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.16%) |Training time=0.46s (21.25%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2594|ppo_ep: 1|act_loss: 0.053192138671875|cri_loss: 0.0272216796875|unsuper_loss: 0.0 +average reward score: 5.52734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.38%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2595|ppo_ep: 1|act_loss: 0.025787353515625|cri_loss: 0.0153961181640625|unsuper_loss: 0.0 +average reward score: 5.41796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.46s (21.23%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2596|ppo_ep: 1|act_loss: 0.021759033203125|cri_loss: 0.011383056640625|unsuper_loss: 0.0 +average reward score: 5.75 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.86%) |Training time=0.47s (21.63%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2597|ppo_ep: 1|act_loss: 0.0087890625|cri_loss: 0.0050811767578125|unsuper_loss: 0.0 +average reward score: 5.16796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.04%) |Training time=0.46s (21.41%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2598|ppo_ep: 1|act_loss: 0.021331787109375|cri_loss: 0.01171112060546875|unsuper_loss: 0.0 +average reward score: 5.20703125 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.85%) |Training time=0.45s (20.49%) |Others=0.17 (7.66%)|CurSamplesPerSec=14.42 |AvgSamplesPerSec=14.45 +[2023-04-14 10:23:25,756] [INFO] [logging.py:96:log_dist] [Rank 0] step=2600, skipped=32, lr=[7.630923131420182e-06, 7.630923131420182e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:23:25,775] [INFO] [timer.py:199:stop] epoch=0/micro_step=2600/global_step=2600, RunningAvgSamplesPerSec=106.67467146215988, CurrSamplesPerSec=104.68854435844617, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:23:25,867] [INFO] [logging.py:96:log_dist] [Rank 0] step=2600, skipped=38, lr=[3.958540359665793e-06, 3.958540359665793e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2599|ppo_ep: 1|act_loss: -0.0157318115234375|cri_loss: -0.007659912109375|unsuper_loss: 0.0 +average reward score: 5.703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.83%) |Training time=0.47s (21.63%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2600|ppo_ep: 1|act_loss: -0.0126190185546875|cri_loss: -0.005321502685546875|unsuper_loss: 0.0 +average reward score: 5.625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.98%) |Training time=0.46s (21.46%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2601|ppo_ep: 1|act_loss: -0.0704345703125|cri_loss: -0.033203125|unsuper_loss: 0.0 +average reward score: 5.10546875 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.28%) |Training time=0.48s (20.52%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.66 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2602|ppo_ep: 1|act_loss: -0.0760498046875|cri_loss: -0.03662109375|unsuper_loss: 0.0 +average reward score: 5.58984375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.17%) |Training time=0.46s (21.32%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2603|ppo_ep: 1|act_loss: 0.004253387451171875|cri_loss: 0.002468109130859375|unsuper_loss: 0.0 +average reward score: 5.8828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.21%) |Training time=0.46s (21.21%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2604|ppo_ep: 1|act_loss: -0.00458526611328125|cri_loss: -0.002124786376953125|unsuper_loss: 0.0 +average reward score: 5.83203125 +------------------------------------------------------------------------------------- +|E2E latency=2.61s |Gather latency=0.00s (0.00%) |Generate time=1.60s (61.13%) |Training time=0.45s (17.40%) |Others=0.56 (21.47%)|CurSamplesPerSec=12.26 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2605|ppo_ep: 1|act_loss: 0.004978179931640625|cri_loss: 0.0028743743896484375|unsuper_loss: 0.0 +average reward score: 4.859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.51%) |Training time=0.45s (20.93%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2606|ppo_ep: 1|act_loss: 0.0413818359375|cri_loss: 0.0211334228515625|unsuper_loss: 0.0 +average reward score: 5.69921875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.77%) |Training time=0.47s (21.51%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2607|ppo_ep: 1|act_loss: 0.01214599609375|cri_loss: 0.00688934326171875|unsuper_loss: 0.0 +average reward score: 5.5703125 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.62s (72.09%) |Training time=0.53s (23.53%) |Others=0.10 (4.38%)|CurSamplesPerSec=14.24 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2608|ppo_ep: 1|act_loss: 0.032012939453125|cri_loss: 0.0164794921875|unsuper_loss: 0.0 +average reward score: 5.046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.49%) |Training time=0.45s (20.94%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45 +[2023-04-14 10:23:48,076] [INFO] [logging.py:96:log_dist] [Rank 0] step=2610, skipped=32, lr=[7.615790278305629e-06, 7.615790278305629e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:23:48,095] [INFO] [timer.py:199:stop] epoch=0/micro_step=2610/global_step=2610, RunningAvgSamplesPerSec=106.66852722864914, CurrSamplesPerSec=108.16473682853238, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:23:48,187] [INFO] [logging.py:96:log_dist] [Rank 0] step=2610, skipped=38, lr=[3.95071241890829e-06, 3.95071241890829e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2609|ppo_ep: 1|act_loss: 0.00861358642578125|cri_loss: 0.0045623779296875|unsuper_loss: 0.0 +average reward score: 4.97265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.26%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2610|ppo_ep: 1|act_loss: -0.0095062255859375|cri_loss: -0.00423431396484375|unsuper_loss: 0.0 +average reward score: 5.671875 +------------------------------------------------------------------------------------- +|E2E latency=2.51s |Gather latency=0.00s (0.00%) |Generate time=1.59s (63.11%) |Training time=0.47s (18.51%) |Others=0.46 (18.38%)|CurSamplesPerSec=12.73 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2611|ppo_ep: 1|act_loss: -0.018829345703125|cri_loss: -0.00910186767578125|unsuper_loss: 0.0 +average reward score: 5.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.47%) |Training time=0.48s (21.95%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2612|ppo_ep: 1|act_loss: -0.0234527587890625|cri_loss: -0.01035308837890625|unsuper_loss: 0.0 +average reward score: 5.23046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.12%) |Training time=0.48s (22.33%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2613|ppo_ep: 1|act_loss: 0.005229949951171875|cri_loss: 0.00333404541015625|unsuper_loss: 0.0 +average reward score: 5.24609375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (21.99%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2614|ppo_ep: 1|act_loss: -0.0267486572265625|cri_loss: -0.01274871826171875|unsuper_loss: 0.0 +average reward score: 5.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.52%) |Training time=0.47s (21.91%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2615|ppo_ep: 1|act_loss: 0.0052490234375|cri_loss: 0.003108978271484375|unsuper_loss: 0.0 +average reward score: 5.3671875 +------------------------------------------------------------------------------------- +|E2E latency=2.54s |Gather latency=0.00s (0.00%) |Generate time=1.62s (63.71%) |Training time=0.63s (24.65%) |Others=0.30 (11.64%)|CurSamplesPerSec=12.60 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2616|ppo_ep: 1|act_loss: 0.04046630859375|cri_loss: 0.021759033203125|unsuper_loss: 0.0 +average reward score: 5.09375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.68%) |Training time=0.47s (21.73%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2617|ppo_ep: 1|act_loss: -0.0025787353515625|cri_loss: 0.0013027191162109375|unsuper_loss: 0.0 +average reward score: 5.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.74%) |Training time=0.47s (21.69%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45 +[2023-04-14 10:24:08,322] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 2618|ppo_ep: 1|act_loss: 0.017486572265625|cri_loss: 0.00926971435546875|unsuper_loss: 0.0 +average reward score: 5.20703125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.09%) |Training time=0.47s (21.81%) |Others=0.09 (4.10%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.45 +[2023-04-14 10:24:10,360] [INFO] [logging.py:96:log_dist] [Rank 0] step=2620, skipped=32, lr=[7.600616058946736e-06, 7.600616058946736e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:24:10,378] [INFO] [timer.py:199:stop] epoch=0/micro_step=2620/global_step=2620, RunningAvgSamplesPerSec=106.64397854881977, CurrSamplesPerSec=103.47651192983646, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:24:10,462] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +[2023-04-14 10:24:10,463] [INFO] [logging.py:96:log_dist] [Rank 0] step=2620, skipped=40, lr=[3.9444345785206285e-06, 3.9444345785206285e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2619|ppo_ep: 1|act_loss: 0.020050048828125|cri_loss: 0.010467529296875|unsuper_loss: 0.0 +average reward score: 5.234375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.88%) |Training time=0.47s (22.05%) |Others=0.09 (4.08%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2620|ppo_ep: 1|act_loss: 0.01495361328125|cri_loss: 0.0078277587890625|unsuper_loss: 0.0 +average reward score: 5.56640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.54%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2621|ppo_ep: 1|act_loss: 0.04327392578125|cri_loss: 0.0222015380859375|unsuper_loss: 0.0 +average reward score: 5.6640625 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.58s (69.79%) |Training time=0.47s (20.78%) |Others=0.21 (9.43%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2622|ppo_ep: 1|act_loss: 0.0241546630859375|cri_loss: 0.01229095458984375|unsuper_loss: 0.0 +average reward score: 4.94140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (22.00%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2623|ppo_ep: 1|act_loss: -0.0059356689453125|cri_loss: -0.0025787353515625|unsuper_loss: 0.0 +average reward score: 5.203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.63%) |Training time=0.47s (21.81%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2624|ppo_ep: 1|act_loss: -0.040802001953125|cri_loss: -0.0193634033203125|unsuper_loss: 0.0 +average reward score: 5.33984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.49s (22.41%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2625|ppo_ep: 1|act_loss: -0.048248291015625|cri_loss: -0.022308349609375|unsuper_loss: 0.0 +average reward score: 5.32421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.80%) |Training time=0.46s (21.62%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2626|ppo_ep: 1|act_loss: 0.04718017578125|cri_loss: 0.024810791015625|unsuper_loss: 0.0 +average reward score: 5.44140625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.76%) |Training time=0.44s (20.64%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2627|ppo_ep: 1|act_loss: 0.0007905960083007812|cri_loss: 0.0007834434509277344|unsuper_loss: 0.0 +average reward score: 5.34765625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.13%) |Training time=0.47s (21.40%) |Others=0.14 (6.47%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2628|ppo_ep: 1|act_loss: -0.0053253173828125|cri_loss: -0.0022602081298828125|unsuper_loss: 0.0 +average reward score: 5.625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.94%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45 +[2023-04-14 10:24:32,030] [INFO] [logging.py:96:log_dist] [Rank 0] step=2630, skipped=32, lr=[7.5854006982620415e-06, 7.5854006982620415e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:24:32,048] [INFO] [timer.py:199:stop] epoch=0/micro_step=2630/global_step=2630, RunningAvgSamplesPerSec=106.63587194700445, CurrSamplesPerSec=101.95049749373148, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:24:32,141] [INFO] [logging.py:96:log_dist] [Rank 0] step=2630, skipped=40, lr=[3.9365680160143595e-06, 3.9365680160143595e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2629|ppo_ep: 1|act_loss: 0.0084381103515625|cri_loss: 0.004375457763671875|unsuper_loss: 0.0 +average reward score: 5.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.57%) |Training time=0.48s (21.92%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2630|ppo_ep: 1|act_loss: 0.0084686279296875|cri_loss: 0.004428863525390625|unsuper_loss: 0.0 +average reward score: 5.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.90%) |Training time=0.49s (20.89%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2631|ppo_ep: 1|act_loss: 0.016143798828125|cri_loss: 0.00824737548828125|unsuper_loss: 0.0 +average reward score: 5.921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.72%) |Training time=0.46s (21.59%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2632|ppo_ep: 1|act_loss: -0.01343536376953125|cri_loss: -0.006603240966796875|unsuper_loss: 0.0 +average reward score: 5.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.60%) |Training time=0.47s (21.83%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2633|ppo_ep: 1|act_loss: -0.0085601806640625|cri_loss: -0.003986358642578125|unsuper_loss: 0.0 +average reward score: 5.5 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.58s (67.05%) |Training time=0.47s (19.71%) |Others=0.31 (13.24%)|CurSamplesPerSec=13.55 |AvgSamplesPerSec=14.45 +[2023-04-14 10:24:43,163] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 2634|ppo_ep: 1|act_loss: 0.01061248779296875|cri_loss: 0.00551605224609375|unsuper_loss: 0.0 +average reward score: 5.1875 +------------------------------------------------------------------------------------- +|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.81%) |Training time=0.44s (20.57%) |Others=0.10 (4.62%)|CurSamplesPerSec=15.09 |AvgSamplesPerSec=14.45 +[2023-04-14 10:24:45,295] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 2635|ppo_ep: 1|act_loss: 0.00872039794921875|cri_loss: 0.005645751953125|unsuper_loss: 0.0 +average reward score: 4.89453125 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.91%) |Training time=0.44s (20.51%) |Others=0.10 (4.58%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2636|ppo_ep: 1|act_loss: 0.0165863037109375|cri_loss: 0.0097808837890625|unsuper_loss: 0.0 +average reward score: 5.81640625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.19%) |Training time=0.47s (21.32%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2637|ppo_ep: 1|act_loss: 0.00852203369140625|cri_loss: 0.004772186279296875|unsuper_loss: 0.0 +average reward score: 5.375 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.22%) |Training time=0.49s (21.46%) |Others=0.10 (4.32%)|CurSamplesPerSec=14.08 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2638|ppo_ep: 1|act_loss: -0.0084228515625|cri_loss: -0.003665924072265625|unsuper_loss: 0.0 +average reward score: 5.5234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.63%) |Training time=0.47s (21.80%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45 +[2023-04-14 10:24:54,042] [INFO] [logging.py:96:log_dist] [Rank 0] step=2640, skipped=34, lr=[7.573198939494354e-06, 7.573198939494354e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:24:54,736] [INFO] [timer.py:199:stop] epoch=0/micro_step=2640/global_step=2640, RunningAvgSamplesPerSec=106.54140046893538, CurrSamplesPerSec=32.69383225358878, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:24:54,829] [INFO] [logging.py:96:log_dist] [Rank 0] step=2640, skipped=40, lr=[3.928680160104563e-06, 3.928680160104563e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2639|ppo_ep: 1|act_loss: 0.0277099609375|cri_loss: 0.01422119140625|unsuper_loss: 0.0 +average reward score: 5.0859375 +------------------------------------------------------------------------------------- +|E2E latency=2.83s |Gather latency=0.00s (0.00%) |Generate time=1.59s (56.17%) |Training time=1.14s (40.35%) |Others=0.10 (3.48%)|CurSamplesPerSec=11.31 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2640|ppo_ep: 1|act_loss: 0.0245819091796875|cri_loss: 0.01282501220703125|unsuper_loss: 0.0 +average reward score: 5.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.23%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2641|ppo_ep: 1|act_loss: -0.00899505615234375|cri_loss: -0.00397491455078125|unsuper_loss: 0.0 +average reward score: 5.80078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.14%) |Training time=0.44s (20.29%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2642|ppo_ep: 1|act_loss: -0.00818634033203125|cri_loss: -0.00392913818359375|unsuper_loss: 0.0 +average reward score: 5.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.94%) |Training time=0.46s (21.48%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2643|ppo_ep: 1|act_loss: -0.0084381103515625|cri_loss: -0.003757476806640625|unsuper_loss: 0.0 +average reward score: 5.59375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.68%) |Training time=0.47s (21.74%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2644|ppo_ep: 1|act_loss: 0.021514892578125|cri_loss: 0.0110626220703125|unsuper_loss: 0.0 +average reward score: 4.75390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.47s (21.96%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2645|ppo_ep: 1|act_loss: 0.02435302734375|cri_loss: 0.01253509521484375|unsuper_loss: 0.0 +average reward score: 5.77734375 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.58s (70.90%) |Training time=0.49s (21.96%) |Others=0.16 (7.14%)|CurSamplesPerSec=14.34 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2646|ppo_ep: 1|act_loss: 0.00466156005859375|cri_loss: 0.002773284912109375|unsuper_loss: 0.0 +average reward score: 5.30859375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.36%) |Training time=0.48s (21.81%) |Others=0.11 (4.83%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2647|ppo_ep: 1|act_loss: -0.0088958740234375|cri_loss: -0.0033321380615234375|unsuper_loss: 0.0 +average reward score: 5.34765625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.37%) |Training time=0.47s (22.05%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2648|ppo_ep: 1|act_loss: 0.00727081298828125|cri_loss: 0.004428863525390625|unsuper_loss: 0.0 +average reward score: 5.75 +------------------------------------------------------------------------------------- +|E2E latency=3.23s |Gather latency=0.00s (0.00%) |Generate time=1.58s (49.01%) |Training time=0.47s (14.46%) |Others=1.18 (36.53%)|CurSamplesPerSec=9.92 |AvgSamplesPerSec=14.45 +[2023-04-14 10:25:17,411] [INFO] [logging.py:96:log_dist] [Rank 0] step=2650, skipped=34, lr=[7.557910093162319e-06, 7.557910093162319e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:25:17,429] [INFO] [timer.py:199:stop] epoch=0/micro_step=2650/global_step=2650, RunningAvgSamplesPerSec=106.53553553330283, CurrSamplesPerSec=104.7835965849119, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:25:17,522] [INFO] [logging.py:96:log_dist] [Rank 0] step=2650, skipped=40, lr=[3.9207711277082896e-06, 3.9207711277082896e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2649|ppo_ep: 1|act_loss: -0.00821685791015625|cri_loss: -0.003902435302734375|unsuper_loss: 0.0 +average reward score: 5.953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.62%) |Training time=0.47s (21.81%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2650|ppo_ep: 1|act_loss: -0.0178070068359375|cri_loss: -0.007549285888671875|unsuper_loss: 0.0 +average reward score: 5.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.45%) |Training time=0.48s (22.02%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2651|ppo_ep: 1|act_loss: 0.04248046875|cri_loss: 0.02374267578125|unsuper_loss: 0.0 +average reward score: 5.234375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.65%) |Training time=0.47s (21.76%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2652|ppo_ep: 1|act_loss: 0.02752685546875|cri_loss: 0.01483917236328125|unsuper_loss: 0.0 +average reward score: 5.07421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.47s (22.06%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2653|ppo_ep: 1|act_loss: 0.027435302734375|cri_loss: 0.01416015625|unsuper_loss: 0.0 +average reward score: 5.66015625 +------------------------------------------------------------------------------------- +|E2E latency=3.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (49.52%) |Training time=0.48s (14.95%) |Others=1.13 (35.54%)|CurSamplesPerSec=10.06 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2654|ppo_ep: 1|act_loss: 0.0149993896484375|cri_loss: 0.0079498291015625|unsuper_loss: 0.0 +average reward score: 5.51953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.48%) |Training time=0.47s (21.93%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2655|ppo_ep: 1|act_loss: 0.026458740234375|cri_loss: 0.014007568359375|unsuper_loss: 0.0 +average reward score: 4.9140625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.92%) |Training time=0.49s (22.55%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2656|ppo_ep: 1|act_loss: 0.0091094970703125|cri_loss: 0.0047760009765625|unsuper_loss: 0.0 +average reward score: 5.56640625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.24%) |Training time=0.49s (22.27%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2657|ppo_ep: 1|act_loss: -0.015716552734375|cri_loss: -0.007526397705078125|unsuper_loss: 0.0 +average reward score: 5.484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.54%) |Training time=0.47s (21.89%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2658|ppo_ep: 1|act_loss: 0.0038909912109375|cri_loss: 0.0020961761474609375|unsuper_loss: 0.0 +average reward score: 5.37890625 +------------------------------------------------------------------------------------- +|E2E latency=2.73s |Gather latency=0.00s (0.00%) |Generate time=1.58s (57.72%) |Training time=0.48s (17.49%) |Others=0.68 (24.79%)|CurSamplesPerSec=11.71 |AvgSamplesPerSec=14.44 +[2023-04-14 10:25:40,790] [INFO] [logging.py:96:log_dist] [Rank 0] step=2660, skipped=34, lr=[7.542580738509916e-06, 7.542580738509916e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:25:40,809] [INFO] [timer.py:199:stop] epoch=0/micro_step=2660/global_step=2660, RunningAvgSamplesPerSec=106.51279863471905, CurrSamplesPerSec=93.23595653916608, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:25:40,901] [INFO] [logging.py:96:log_dist] [Rank 0] step=2660, skipped=40, lr=[3.91284103605648e-06, 3.91284103605648e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2659|ppo_ep: 1|act_loss: -0.034912109375|cri_loss: -0.01702880859375|unsuper_loss: 0.0 +average reward score: 5.25390625 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.23%) |Training time=0.51s (21.59%) |Others=0.10 (4.18%)|CurSamplesPerSec=13.65 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2660|ppo_ep: 1|act_loss: -0.05914306640625|cri_loss: -0.029052734375|unsuper_loss: 0.0 +average reward score: 5.34765625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.74%) |Training time=0.47s (21.69%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2661|ppo_ep: 1|act_loss: 0.003200531005859375|cri_loss: 0.0020351409912109375|unsuper_loss: 0.0 +average reward score: 5.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.74%) |Training time=0.47s (21.70%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2662|ppo_ep: 1|act_loss: 0.0149688720703125|cri_loss: 0.0079803466796875|unsuper_loss: 0.0 +average reward score: 5.62109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.52%) |Training time=0.47s (21.90%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2663|ppo_ep: 1|act_loss: 0.048736572265625|cri_loss: 0.02532958984375|unsuper_loss: 0.0 +average reward score: 6.01171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (22.09%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2664|ppo_ep: 1|act_loss: 0.064208984375|cri_loss: 0.03277587890625|unsuper_loss: 0.0 +average reward score: 5.31640625 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.64%) |Training time=0.48s (21.92%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2665|ppo_ep: 1|act_loss: 0.0640869140625|cri_loss: 0.034332275390625|unsuper_loss: 0.0 +average reward score: 5.02734375 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.68s (73.92%) |Training time=0.50s (21.75%) |Others=0.10 (4.32%)|CurSamplesPerSec=14.05 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2666|ppo_ep: 1|act_loss: -0.00470733642578125|cri_loss: -0.001983642578125|unsuper_loss: 0.0 +average reward score: 4.765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.08%) |Training time=0.46s (21.31%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2667|ppo_ep: 1|act_loss: -0.029388427734375|cri_loss: -0.0142974853515625|unsuper_loss: 0.0 +average reward score: 5.41015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.66%) |Training time=0.44s (20.74%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2668|ppo_ep: 1|act_loss: -0.01666259765625|cri_loss: -0.007843017578125|unsuper_loss: 0.0 +average reward score: 5.3515625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.00%) |Training time=0.46s (21.24%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.44 +[2023-04-14 10:26:02,506] [INFO] [logging.py:96:log_dist] [Rank 0] step=2670, skipped=34, lr=[7.5272111027551624e-06, 7.5272111027551624e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:26:02,524] [INFO] [timer.py:199:stop] epoch=0/micro_step=2670/global_step=2670, RunningAvgSamplesPerSec=106.50561698981126, CurrSamplesPerSec=109.33183449574705, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:26:02,618] [INFO] [logging.py:96:log_dist] [Rank 0] step=2670, skipped=40, lr=[3.904890002692223e-06, 3.904890002692223e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2669|ppo_ep: 1|act_loss: -0.06109619140625|cri_loss: -0.0295562744140625|unsuper_loss: 0.0 +average reward score: 5.3046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.23%) |Training time=0.46s (21.19%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2670|ppo_ep: 1|act_loss: -0.0343017578125|cri_loss: -0.0163726806640625|unsuper_loss: 0.0 +average reward score: 5.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.63%) |Training time=0.48s (21.84%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2671|ppo_ep: 1|act_loss: 0.048797607421875|cri_loss: 0.027435302734375|unsuper_loss: 0.0 +average reward score: 5.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.31%) |Training time=0.48s (22.12%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2672|ppo_ep: 1|act_loss: -0.0225067138671875|cri_loss: -0.01085662841796875|unsuper_loss: 0.0 +average reward score: 5.3203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.51%) |Training time=0.47s (21.91%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2673|ppo_ep: 1|act_loss: 0.0294189453125|cri_loss: 0.01519012451171875|unsuper_loss: 0.0 +average reward score: 5.8359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.48s (22.07%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2674|ppo_ep: 1|act_loss: 0.05877685546875|cri_loss: 0.03143310546875|unsuper_loss: 0.0 +average reward score: 5.453125 +------------------------------------------------------------------------------------- +|E2E latency=2.68s |Gather latency=0.00s (0.00%) |Generate time=1.58s (58.89%) |Training time=0.49s (18.37%) |Others=0.61 (22.75%)|CurSamplesPerSec=11.95 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2675|ppo_ep: 1|act_loss: -0.0154571533203125|cri_loss: -0.00719451904296875|unsuper_loss: 0.0 +average reward score: 5.52734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.23%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2676|ppo_ep: 1|act_loss: -0.0118865966796875|cri_loss: -0.004611968994140625|unsuper_loss: 0.0 +average reward score: 5.19921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.30%) |Training time=0.48s (22.12%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2677|ppo_ep: 1|act_loss: 0.06536865234375|cri_loss: 0.0384521484375|unsuper_loss: 0.0 +average reward score: 5.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.20%) |Training time=0.48s (22.24%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2678|ppo_ep: 1|act_loss: -0.0438232421875|cri_loss: -0.021148681640625|unsuper_loss: 0.0 +average reward score: 5.61328125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.23%) |Training time=0.48s (22.20%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44 +[2023-04-14 10:26:24,579] [INFO] [logging.py:96:log_dist] [Rank 0] step=2680, skipped=34, lr=[7.511801413713135e-06, 7.511801413713135e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:26:24,597] [INFO] [timer.py:199:stop] epoch=0/micro_step=2680/global_step=2680, RunningAvgSamplesPerSec=106.48575335734733, CurrSamplesPerSec=102.37766112435794, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:26:24,690] [INFO] [logging.py:96:log_dist] [Rank 0] step=2680, skipped=40, lr=[3.896918145469013e-06, 3.896918145469013e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2679|ppo_ep: 1|act_loss: -0.02886962890625|cri_loss: -0.014129638671875|unsuper_loss: 0.0 +average reward score: 5.70703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.31%) |Training time=0.47s (22.13%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2680|ppo_ep: 1|act_loss: 0.028411865234375|cri_loss: 0.0149383544921875|unsuper_loss: 0.0 +average reward score: 5.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.57s (69.21%) |Training time=0.48s (21.17%) |Others=0.22 (9.62%)|CurSamplesPerSec=14.07 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2681|ppo_ep: 1|act_loss: -0.0035610198974609375|cri_loss: -0.0012836456298828125|unsuper_loss: 0.0 +average reward score: 5.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.07%) |Training time=0.48s (22.37%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2682|ppo_ep: 1|act_loss: 0.0278167724609375|cri_loss: 0.01442718505859375|unsuper_loss: 0.0 +average reward score: 5.39453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.13%) |Training time=0.48s (22.30%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2683|ppo_ep: 1|act_loss: 0.00504302978515625|cri_loss: 0.003444671630859375|unsuper_loss: 0.0 +average reward score: 5.39453125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.28%) |Training time=0.48s (22.21%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2684|ppo_ep: 1|act_loss: 0.0285491943359375|cri_loss: 0.0146942138671875|unsuper_loss: 0.0 +average reward score: 5.25 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.19%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2685|ppo_ep: 1|act_loss: -0.033966064453125|cri_loss: -0.016143798828125|unsuper_loss: 0.0 +average reward score: 5.8046875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.01%) |Training time=0.49s (22.46%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2686|ppo_ep: 1|act_loss: 0.04290771484375|cri_loss: 0.02264404296875|unsuper_loss: 0.0 +average reward score: 5.5546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.41%) |Training time=0.47s (22.02%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2687|ppo_ep: 1|act_loss: -0.041107177734375|cri_loss: -0.0183868408203125|unsuper_loss: 0.0 +average reward score: 5.41796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.47s (22.08%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2688|ppo_ep: 1|act_loss: -0.061004638671875|cri_loss: -0.029205322265625|unsuper_loss: 0.0 +average reward score: 5.61328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.48s (22.11%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44 +[2023-04-14 10:26:46,274] [INFO] [logging.py:96:log_dist] [Rank 0] step=2690, skipped=34, lr=[7.496351899792602e-06, 7.496351899792602e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:26:46,292] [INFO] [timer.py:199:stop] epoch=0/micro_step=2690/global_step=2690, RunningAvgSamplesPerSec=106.46413319981575, CurrSamplesPerSec=102.74899790854556, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:26:46,385] [INFO] [logging.py:96:log_dist] [Rank 0] step=2690, skipped=40, lr=[3.888925582549006e-06, 3.888925582549006e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2689|ppo_ep: 1|act_loss: 0.00598907470703125|cri_loss: 0.00348663330078125|unsuper_loss: 0.0 +average reward score: 5.0078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.41%) |Training time=0.47s (22.04%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2690|ppo_ep: 1|act_loss: -0.04083251953125|cri_loss: -0.019989013671875|unsuper_loss: 0.0 +average reward score: 5.6796875 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.05%) |Training time=0.50s (20.98%) |Others=0.12 (4.97%)|CurSamplesPerSec=13.55 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2691|ppo_ep: 1|act_loss: -0.048583984375|cri_loss: -0.022552490234375|unsuper_loss: 0.0 +average reward score: 5.2109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.47s (21.66%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2692|ppo_ep: 1|act_loss: -0.016754150390625|cri_loss: -0.00821685791015625|unsuper_loss: 0.0 +average reward score: 5.5546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.31%) |Training time=0.47s (21.88%) |Others=0.10 (4.81%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2693|ppo_ep: 1|act_loss: -0.0209808349609375|cri_loss: -0.01012420654296875|unsuper_loss: 0.0 +average reward score: 5.796875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.22%) |Training time=0.49s (22.27%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2694|ppo_ep: 1|act_loss: -0.0040435791015625|cri_loss: -0.0018329620361328125|unsuper_loss: 0.0 +average reward score: 5.69140625 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.04%) |Training time=0.49s (21.66%) |Others=0.10 (4.31%)|CurSamplesPerSec=14.05 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2695|ppo_ep: 1|act_loss: -0.008453369140625|cri_loss: -0.003986358642578125|unsuper_loss: 0.0 +average reward score: 5.58984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.97%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2696|ppo_ep: 1|act_loss: 0.064208984375|cri_loss: 0.03363037109375|unsuper_loss: 0.0 +average reward score: 5.484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.51%) |Training time=0.47s (21.92%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2697|ppo_ep: 1|act_loss: 0.016510009765625|cri_loss: 0.00865936279296875|unsuper_loss: 0.0 +average reward score: 5.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.55%) |Training time=0.47s (21.90%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2698|ppo_ep: 1|act_loss: -0.002437591552734375|cri_loss: -0.0010318756103515625|unsuper_loss: 0.0 +average reward score: 5.7421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.98%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45 +[2023-04-14 10:27:08,167] [INFO] [logging.py:96:log_dist] [Rank 0] step=2700, skipped=34, lr=[7.480862789992629e-06, 7.480862789992629e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:27:08,185] [INFO] [timer.py:199:stop] epoch=0/micro_step=2700/global_step=2700, RunningAvgSamplesPerSec=106.4442632540804, CurrSamplesPerSec=101.70059117973773, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:27:08,278] [INFO] [logging.py:96:log_dist] [Rank 0] step=2700, skipped=40, lr=[3.880912432401265e-06, 3.880912432401265e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2699|ppo_ep: 1|act_loss: -0.00817108154296875|cri_loss: -0.003662109375|unsuper_loss: 0.0 +average reward score: 5.45703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.27%) |Training time=0.48s (22.18%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2700|ppo_ep: 1|act_loss: -0.0207061767578125|cri_loss: -0.00963592529296875|unsuper_loss: 0.0 +average reward score: 5.48046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.44%) |Training time=0.48s (22.05%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2701|ppo_ep: 1|act_loss: 0.0119171142578125|cri_loss: 0.0084075927734375|unsuper_loss: 0.0 +average reward score: 5.1640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.82%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2702|ppo_ep: 1|act_loss: 0.043212890625|cri_loss: 0.0235595703125|unsuper_loss: 0.0 +average reward score: 5.390625 +------------------------------------------------------------------------------------- +|E2E latency=2.53s |Gather latency=0.00s (0.00%) |Generate time=1.58s (62.43%) |Training time=0.48s (18.84%) |Others=0.47 (18.73%)|CurSamplesPerSec=12.64 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2703|ppo_ep: 1|act_loss: 0.019866943359375|cri_loss: 0.011932373046875|unsuper_loss: 0.0 +average reward score: 5.12890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.62%) |Training time=0.47s (21.80%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2704|ppo_ep: 1|act_loss: -0.0029621124267578125|cri_loss: -0.0009555816650390625|unsuper_loss: 0.0 +average reward score: 5.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.68s (71.74%) |Training time=0.56s (24.05%) |Others=0.10 (4.21%)|CurSamplesPerSec=13.64 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2705|ppo_ep: 1|act_loss: -0.01299285888671875|cri_loss: -0.006378173828125|unsuper_loss: 0.0 +average reward score: 5.4296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (22.00%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2706|ppo_ep: 1|act_loss: -0.016571044921875|cri_loss: -0.0079193115234375|unsuper_loss: 0.0 +average reward score: 5.5234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.59%) |Training time=0.47s (21.84%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2707|ppo_ep: 1|act_loss: -0.016357421875|cri_loss: -0.00804901123046875|unsuper_loss: 0.0 +average reward score: 5.52734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.53%) |Training time=0.47s (21.90%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2708|ppo_ep: 1|act_loss: 0.13623046875|cri_loss: 0.07940673828125|unsuper_loss: 0.0 +average reward score: 4.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.48s (22.07%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45 +[2023-04-14 10:27:30,285] [INFO] [logging.py:96:log_dist] [Rank 0] step=2710, skipped=34, lr=[7.465334313899188e-06, 7.465334313899188e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:27:30,304] [INFO] [timer.py:199:stop] epoch=0/micro_step=2710/global_step=2710, RunningAvgSamplesPerSec=106.42161862003059, CurrSamplesPerSec=104.10446923041124, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:27:30,396] [INFO] [logging.py:96:log_dist] [Rank 0] step=2710, skipped=40, lr=[3.8728788138000064e-06, 3.8728788138000064e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2709|ppo_ep: 1|act_loss: -0.020050048828125|cri_loss: -0.00969696044921875|unsuper_loss: 0.0 +average reward score: 5.328125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.62%) |Training time=0.47s (21.83%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2710|ppo_ep: 1|act_loss: 0.007110595703125|cri_loss: 0.003948211669921875|unsuper_loss: 0.0 +average reward score: 5.4921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.88%) |Training time=0.49s (22.58%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2711|ppo_ep: 1|act_loss: -0.0193634033203125|cri_loss: -0.007534027099609375|unsuper_loss: 0.0 +average reward score: 5.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.39%) |Training time=0.48s (22.04%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2712|ppo_ep: 1|act_loss: 0.0090179443359375|cri_loss: 0.004947662353515625|unsuper_loss: 0.0 +average reward score: 5.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.68%) |Training time=0.47s (21.75%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2713|ppo_ep: 1|act_loss: -0.0018482208251953125|cri_loss: -0.00031375885009765625|unsuper_loss: 0.0 +average reward score: 5.57421875 +------------------------------------------------------------------------------------- +|E2E latency=2.46s |Gather latency=0.00s (0.00%) |Generate time=1.58s (64.20%) |Training time=0.47s (19.22%) |Others=0.41 (16.59%)|CurSamplesPerSec=13.00 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2714|ppo_ep: 1|act_loss: -0.03118896484375|cri_loss: -0.01531982421875|unsuper_loss: 0.0 +average reward score: 6.10546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.96%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2715|ppo_ep: 1|act_loss: -0.035400390625|cri_loss: -0.016998291015625|unsuper_loss: 0.0 +average reward score: 5.68359375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.51%) |Training time=0.48s (21.96%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2716|ppo_ep: 1|act_loss: -0.0191192626953125|cri_loss: -0.0081634521484375|unsuper_loss: 0.0 +average reward score: 5.05859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.32%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2717|ppo_ep: 1|act_loss: 0.01910400390625|cri_loss: 0.010162353515625|unsuper_loss: 0.0 +average reward score: 5.80859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.18%) |Training time=0.48s (22.27%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2718|ppo_ep: 1|act_loss: 0.0635986328125|cri_loss: 0.035491943359375|unsuper_loss: 0.0 +average reward score: 5.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.50%) |Training time=0.47s (21.94%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45 +[2023-04-14 10:27:52,375] [INFO] [logging.py:96:log_dist] [Rank 0] step=2720, skipped=34, lr=[7.4497667016817536e-06, 7.4497667016817536e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:27:52,393] [INFO] [timer.py:199:stop] epoch=0/micro_step=2720/global_step=2720, RunningAvgSamplesPerSec=106.40045873498524, CurrSamplesPerSec=95.01305938452128, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:27:52,486] [INFO] [logging.py:96:log_dist] [Rank 0] step=2720, skipped=40, lr=[3.864824845822837e-06, 3.864824845822837e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2719|ppo_ep: 1|act_loss: 0.00815582275390625|cri_loss: 0.004367828369140625|unsuper_loss: 0.0 +average reward score: 5.6328125 +------------------------------------------------------------------------------------- +|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.53%) |Training time=0.50s (21.30%) |Others=0.10 (4.18%)|CurSamplesPerSec=13.64 |AvgSamplesPerSec=14.45 +[2023-04-14 10:27:54,627] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 2720|ppo_ep: 1|act_loss: 0.1143798828125|cri_loss: 0.06793212890625|unsuper_loss: 0.0 +average reward score: 5.37890625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.05%) |Training time=0.47s (21.86%) |Others=0.09 (4.09%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.45 +[2023-04-14 10:27:56,766] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 2721|ppo_ep: 1|act_loss: -0.01043701171875|cri_loss: -0.004398345947265625|unsuper_loss: 0.0 +average reward score: 5.41796875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.16%) |Training time=0.47s (21.75%) |Others=0.09 (4.09%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2722|ppo_ep: 1|act_loss: 0.0159454345703125|cri_loss: 0.0082550048828125|unsuper_loss: 0.0 +average reward score: 5.39453125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.23%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2723|ppo_ep: 1|act_loss: 0.03460693359375|cri_loss: 0.01800537109375|unsuper_loss: 0.0 +average reward score: 4.86328125 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.61s (71.37%) |Training time=0.55s (24.27%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.23 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2724|ppo_ep: 1|act_loss: 0.005645751953125|cri_loss: 0.003017425537109375|unsuper_loss: 0.0 +average reward score: 5.21484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.51%) |Training time=0.47s (21.91%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2725|ppo_ep: 1|act_loss: 0.01123046875|cri_loss: 0.006679534912109375|unsuper_loss: 0.0 +average reward score: 5.51171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.19%) |Training time=0.48s (22.22%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2726|ppo_ep: 1|act_loss: -0.03509521484375|cri_loss: -0.01535797119140625|unsuper_loss: 0.0 +average reward score: 5.453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.10%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2727|ppo_ep: 1|act_loss: -0.026458740234375|cri_loss: -0.01274871826171875|unsuper_loss: 0.0 +average reward score: 5.49609375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.48%) |Training time=0.47s (21.94%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2728|ppo_ep: 1|act_loss: -0.011383056640625|cri_loss: -0.005321502685546875|unsuper_loss: 0.0 +average reward score: 5.37890625 +------------------------------------------------------------------------------------- +|E2E latency=2.55s |Gather latency=0.00s (0.00%) |Generate time=1.57s (61.74%) |Training time=0.48s (18.74%) |Others=0.50 (19.52%)|CurSamplesPerSec=12.54 |AvgSamplesPerSec=14.45 +[2023-04-14 10:28:14,381] [INFO] [logging.py:96:log_dist] [Rank 0] step=2730, skipped=34, lr=[7.434160184089898e-06, 7.434160184089898e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:28:14,399] [INFO] [timer.py:199:stop] epoch=0/micro_step=2730/global_step=2730, RunningAvgSamplesPerSec=106.37902719012551, CurrSamplesPerSec=101.46724635971965, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:28:14,492] [INFO] [logging.py:96:log_dist] [Rank 0] step=2730, skipped=42, lr=[3.8583671001032015e-06, 3.8583671001032015e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2729|ppo_ep: 1|act_loss: 0.02996826171875|cri_loss: 0.0161590576171875|unsuper_loss: 0.0 +average reward score: 5.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.22%) |Training time=0.48s (22.20%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2730|ppo_ep: 1|act_loss: -0.06396484375|cri_loss: -0.0307159423828125|unsuper_loss: 0.0 +average reward score: 5.01953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.33%) |Training time=0.46s (21.11%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2731|ppo_ep: 1|act_loss: 0.0201263427734375|cri_loss: 0.0107421875|unsuper_loss: 0.0 +average reward score: 5.6328125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.28%) |Training time=0.46s (21.14%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2732|ppo_ep: 1|act_loss: 0.0035247802734375|cri_loss: 0.002044677734375|unsuper_loss: 0.0 +average reward score: 5.04296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.99%) |Training time=0.46s (21.43%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2733|ppo_ep: 1|act_loss: 0.03350830078125|cri_loss: 0.017333984375|unsuper_loss: 0.0 +average reward score: 5.53515625 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.05%) |Training time=0.52s (23.51%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.42 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2734|ppo_ep: 1|act_loss: 0.0076904296875|cri_loss: 0.004077911376953125|unsuper_loss: 0.0 +average reward score: 5.3671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.28%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2735|ppo_ep: 1|act_loss: 0.029144287109375|cri_loss: 0.01526641845703125|unsuper_loss: 0.0 +average reward score: 5.296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.90%) |Training time=0.47s (21.54%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45 +[2023-04-14 10:28:29,527] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 2736|ppo_ep: 1|act_loss: 0.005260467529296875|cri_loss: 0.00360870361328125|unsuper_loss: 0.0 +average reward score: 5.15625 +------------------------------------------------------------------------------------- +|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.33%) |Training time=0.43s (20.06%) |Others=0.10 (4.61%)|CurSamplesPerSec=15.09 |AvgSamplesPerSec=14.45 +[2023-04-14 10:28:31,651] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 2737|ppo_ep: 1|act_loss: -0.044464111328125|cri_loss: -0.0214996337890625|unsuper_loss: 0.0 +average reward score: 4.6875 +------------------------------------------------------------------------------------- +|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.31%) |Training time=0.43s (20.06%) |Others=0.10 (4.63%)|CurSamplesPerSec=15.07 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2738|ppo_ep: 1|act_loss: 0.0011739730834960938|cri_loss: 0.000949859619140625|unsuper_loss: 0.0 +average reward score: 5.28515625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.01%) |Training time=0.47s (21.49%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.45 +[2023-04-14 10:28:35,964] [INFO] [logging.py:96:log_dist] [Rank 0] step=2740, skipped=36, lr=[7.421647113578771e-06, 7.421647113578771e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:28:35,983] [INFO] [timer.py:199:stop] epoch=0/micro_step=2740/global_step=2740, RunningAvgSamplesPerSec=106.38762931965792, CurrSamplesPerSec=108.80439440355586, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:28:36,075] [INFO] [logging.py:96:log_dist] [Rank 0] step=2740, skipped=42, lr=[3.850276804286629e-06, 3.850276804286629e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2739|ppo_ep: 1|act_loss: -0.00032520294189453125|cri_loss: 0.0002455711364746094|unsuper_loss: 0.0 +average reward score: 5.57421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.22%) |Training time=0.46s (21.23%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2740|ppo_ep: 1|act_loss: -0.03570556640625|cri_loss: -0.0167694091796875|unsuper_loss: 0.0 +average reward score: 5.21875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.46s (21.15%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2741|ppo_ep: 1|act_loss: -0.00420379638671875|cri_loss: 0.002628326416015625|unsuper_loss: 0.0 +average reward score: 5.55859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.20%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2742|ppo_ep: 1|act_loss: 0.03314208984375|cri_loss: 0.0172271728515625|unsuper_loss: 0.0 +average reward score: 5.1796875 +------------------------------------------------------------------------------------- +|E2E latency=3.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (50.06%) |Training time=0.46s (14.54%) |Others=1.13 (35.40%)|CurSamplesPerSec=10.05 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2743|ppo_ep: 1|act_loss: 0.0535888671875|cri_loss: 0.030548095703125|unsuper_loss: 0.0 +average reward score: 4.8125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.16%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2744|ppo_ep: 1|act_loss: 0.04168701171875|cri_loss: 0.021636962890625|unsuper_loss: 0.0 +average reward score: 5.18359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.46%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2745|ppo_ep: 1|act_loss: 0.0287628173828125|cri_loss: 0.01482391357421875|unsuper_loss: 0.0 +average reward score: 6.07421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.24%) |Training time=0.46s (21.22%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2746|ppo_ep: 1|act_loss: 0.0121002197265625|cri_loss: 0.006809234619140625|unsuper_loss: 0.0 +average reward score: 5.1875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.41%) |Training time=0.45s (21.00%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2747|ppo_ep: 1|act_loss: -0.0285797119140625|cri_loss: -0.0139007568359375|unsuper_loss: 0.0 +average reward score: 5.5859375 +------------------------------------------------------------------------------------- +|E2E latency=2.44s |Gather latency=0.00s (0.00%) |Generate time=1.60s (65.48%) |Training time=0.45s (18.31%) |Others=0.40 (16.21%)|CurSamplesPerSec=13.13 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2748|ppo_ep: 1|act_loss: -0.047943115234375|cri_loss: -0.0233612060546875|unsuper_loss: 0.0 +average reward score: 5.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.76s (76.32%) |Training time=0.45s (19.41%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.87 |AvgSamplesPerSec=14.44 +[2023-04-14 10:28:58,976] [INFO] [logging.py:96:log_dist] [Rank 0] step=2750, skipped=36, lr=[7.405971149640408e-06, 7.405971149640408e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:28:58,994] [INFO] [timer.py:199:stop] epoch=0/micro_step=2750/global_step=2750, RunningAvgSamplesPerSec=106.39908594796809, CurrSamplesPerSec=114.45823124842768, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:28:59,087] [INFO] [logging.py:96:log_dist] [Rank 0] step=2750, skipped=42, lr=[3.842166494110451e-06, 3.842166494110451e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2749|ppo_ep: 1|act_loss: 0.000629425048828125|cri_loss: 0.002536773681640625|unsuper_loss: 0.0 +average reward score: 5.46484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.82%) |Training time=0.44s (20.61%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2750|ppo_ep: 1|act_loss: -0.030181884765625|cri_loss: -0.013763427734375|unsuper_loss: 0.0 +average reward score: 5.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.41%) |Training time=0.45s (21.01%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2751|ppo_ep: 1|act_loss: -0.0014810562133789062|cri_loss: -0.00019550323486328125|unsuper_loss: 0.0 +average reward score: 5.28125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.31%) |Training time=0.45s (21.08%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2752|ppo_ep: 1|act_loss: 0.04644775390625|cri_loss: 0.02606201171875|unsuper_loss: 0.0 +average reward score: 5.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.69%) |Training time=0.45s (19.96%) |Others=0.10 (4.35%)|CurSamplesPerSec=14.13 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2753|ppo_ep: 1|act_loss: 0.08673095703125|cri_loss: 0.046783447265625|unsuper_loss: 0.0 +average reward score: 5.1171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.51%) |Training time=0.45s (20.91%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2754|ppo_ep: 1|act_loss: -0.0129241943359375|cri_loss: -0.00525665283203125|unsuper_loss: 0.0 +average reward score: 6.1953125 +------------------------------------------------------------------------------------- +|E2E latency=2.83s |Gather latency=0.00s (0.00%) |Generate time=1.60s (56.37%) |Training time=0.45s (15.83%) |Others=0.79 (27.80%)|CurSamplesPerSec=11.29 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2755|ppo_ep: 1|act_loss: 0.030029296875|cri_loss: 0.0172119140625|unsuper_loss: 0.0 +average reward score: 5.08984375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.58%) |Training time=0.45s (20.83%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2756|ppo_ep: 1|act_loss: -0.00177001953125|cri_loss: -0.0005936622619628906|unsuper_loss: 0.0 +average reward score: 4.97265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.64%) |Training time=0.45s (20.79%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2757|ppo_ep: 1|act_loss: 0.004314422607421875|cri_loss: 0.00274658203125|unsuper_loss: 0.0 +average reward score: 4.90234375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.58%) |Training time=0.45s (20.82%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2758|ppo_ep: 1|act_loss: -0.0126495361328125|cri_loss: -0.0058746337890625|unsuper_loss: 0.0 +average reward score: 5.8046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (21.04%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44 +[2023-04-14 10:29:21,250] [INFO] [logging.py:96:log_dist] [Rank 0] step=2760, skipped=36, lr=[7.390256929483354e-06, 7.390256929483354e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:29:21,268] [INFO] [timer.py:199:stop] epoch=0/micro_step=2760/global_step=2760, RunningAvgSamplesPerSec=106.42223778280272, CurrSamplesPerSec=131.88637259169658, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:29:21,361] [INFO] [logging.py:96:log_dist] [Rank 0] step=2760, skipped=42, lr=[3.83403628978903e-06, 3.83403628978903e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2759|ppo_ep: 1|act_loss: -0.024261474609375|cri_loss: -0.00824737548828125|unsuper_loss: 0.0 +average reward score: 5.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.47%) |Training time=0.41s (18.93%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2760|ppo_ep: 1|act_loss: 0.00678253173828125|cri_loss: 0.003955841064453125|unsuper_loss: 0.0 +average reward score: 5.1875 +------------------------------------------------------------------------------------- +|E2E latency=3.42s |Gather latency=0.00s (0.00%) |Generate time=1.61s (47.01%) |Training time=0.43s (12.67%) |Others=1.38 (40.32%)|CurSamplesPerSec=9.35 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2761|ppo_ep: 1|act_loss: -0.016937255859375|cri_loss: -0.0078582763671875|unsuper_loss: 0.0 +average reward score: 5.7109375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.11%) |Training time=0.50s (22.90%) |Others=0.11 (4.99%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2762|ppo_ep: 1|act_loss: 0.0010814666748046875|cri_loss: 0.0017833709716796875|unsuper_loss: 0.0 +average reward score: 4.83203125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.38%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2763|ppo_ep: 1|act_loss: 0.028076171875|cri_loss: 0.014312744140625|unsuper_loss: 0.0 +average reward score: 5.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.86%) |Training time=0.46s (21.56%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2764|ppo_ep: 1|act_loss: 0.03607177734375|cri_loss: 0.0192108154296875|unsuper_loss: 0.0 +average reward score: 5.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.92%) |Training time=0.47s (21.53%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2765|ppo_ep: 1|act_loss: 0.00830078125|cri_loss: 0.00431060791015625|unsuper_loss: 0.0 +average reward score: 6.3828125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.45s (21.21%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2766|ppo_ep: 1|act_loss: -0.0135498046875|cri_loss: -0.004924774169921875|unsuper_loss: 0.0 +average reward score: 5.12890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.60%) |Training time=0.47s (21.83%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2767|ppo_ep: 1|act_loss: -0.008453369140625|cri_loss: -0.003582000732421875|unsuper_loss: 0.0 +average reward score: 5.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.28%) |Training time=0.48s (22.15%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2768|ppo_ep: 1|act_loss: -0.0152740478515625|cri_loss: -0.006717681884765625|unsuper_loss: 0.0 +average reward score: 5.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.62%) |Training time=0.47s (21.79%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.44 +[2023-04-14 10:29:44,042] [INFO] [logging.py:96:log_dist] [Rank 0] step=2770, skipped=36, lr=[7.37450468603026e-06, 7.37450468603026e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:29:44,060] [INFO] [timer.py:199:stop] epoch=0/micro_step=2770/global_step=2770, RunningAvgSamplesPerSec=106.41993681795651, CurrSamplesPerSec=104.47658694697775, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:29:44,153] [INFO] [logging.py:96:log_dist] [Rank 0] step=2770, skipped=42, lr=[3.825886311831606e-06, 3.825886311831606e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2769|ppo_ep: 1|act_loss: 0.055755615234375|cri_loss: 0.0288848876953125|unsuper_loss: 0.0 +average reward score: 5.76171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.60%) |Training time=0.47s (21.84%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2770|ppo_ep: 1|act_loss: -0.013946533203125|cri_loss: -0.006404876708984375|unsuper_loss: 0.0 +average reward score: 5.30859375 +------------------------------------------------------------------------------------- +|E2E latency=2.43s |Gather latency=0.00s (0.00%) |Generate time=1.58s (65.02%) |Training time=0.47s (19.46%) |Others=0.38 (15.52%)|CurSamplesPerSec=13.19 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2771|ppo_ep: 1|act_loss: 0.01171875|cri_loss: 0.006114959716796875|unsuper_loss: 0.0 +average reward score: 5.0859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.53%) |Training time=0.47s (21.90%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2772|ppo_ep: 1|act_loss: -0.00611114501953125|cri_loss: -0.0029449462890625|unsuper_loss: 0.0 +average reward score: 5.19921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.20%) |Training time=0.48s (22.24%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2773|ppo_ep: 1|act_loss: 0.00923919677734375|cri_loss: 0.005191802978515625|unsuper_loss: 0.0 +average reward score: 5.0859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.35%) |Training time=0.47s (22.06%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2774|ppo_ep: 1|act_loss: -0.0199432373046875|cri_loss: -0.00905609130859375|unsuper_loss: 0.0 +average reward score: 5.078125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.46s (21.26%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2775|ppo_ep: 1|act_loss: -0.026885986328125|cri_loss: -0.01313018798828125|unsuper_loss: 0.0 +average reward score: 5.03515625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.07%) |Training time=0.46s (21.31%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2776|ppo_ep: 1|act_loss: -0.0113983154296875|cri_loss: -0.00531005859375|unsuper_loss: 0.0 +average reward score: 5.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.83%) |Training time=0.46s (21.57%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2777|ppo_ep: 1|act_loss: -0.0291748046875|cri_loss: -0.014312744140625|unsuper_loss: 0.0 +average reward score: 5.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.08%) |Training time=0.50s (22.59%) |Others=0.12 (5.33%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2778|ppo_ep: 1|act_loss: 0.044647216796875|cri_loss: 0.0236968994140625|unsuper_loss: 0.0 +average reward score: 5.8828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.46s (21.28%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.44 +[2023-04-14 10:30:05,837] [INFO] [logging.py:96:log_dist] [Rank 0] step=2780, skipped=36, lr=[7.358714652767375e-06, 7.358714652767375e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:30:05,856] [INFO] [timer.py:199:stop] epoch=0/micro_step=2780/global_step=2780, RunningAvgSamplesPerSec=106.41343644976286, CurrSamplesPerSec=105.34027242011247, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:30:05,955] [INFO] [logging.py:96:log_dist] [Rank 0] step=2780, skipped=42, lr=[3.817716681040515e-06, 3.817716681040515e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2779|ppo_ep: 1|act_loss: 0.07403564453125|cri_loss: 0.03955078125|unsuper_loss: 0.0 +average reward score: 5.375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.58%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2780|ppo_ep: 1|act_loss: 0.03387451171875|cri_loss: 0.0186920166015625|unsuper_loss: 0.0 +average reward score: 4.8359375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.51%) |Training time=0.46s (21.19%) |Others=0.12 (5.29%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2781|ppo_ep: 1|act_loss: 0.061767578125|cri_loss: 0.032928466796875|unsuper_loss: 0.0 +average reward score: 5.06640625 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.51%) |Training time=0.45s (20.13%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.21 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2782|ppo_ep: 1|act_loss: -0.02105712890625|cri_loss: -0.007843017578125|unsuper_loss: 0.0 +average reward score: 4.9453125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.91%) |Training time=0.46s (21.49%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2783|ppo_ep: 1|act_loss: -0.0007939338684082031|cri_loss: -0.00011396408081054688|unsuper_loss: 0.0 +average reward score: 5.2734375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.07%) |Training time=0.46s (21.34%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2784|ppo_ep: 1|act_loss: -0.057586669921875|cri_loss: -0.0283203125|unsuper_loss: 0.0 +average reward score: 5.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.14%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2785|ppo_ep: 1|act_loss: -0.04052734375|cri_loss: -0.019744873046875|unsuper_loss: 0.0 +average reward score: 5.296875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.70%) |Training time=0.47s (21.72%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2786|ppo_ep: 1|act_loss: 0.03509521484375|cri_loss: 0.0211029052734375|unsuper_loss: 0.0 +average reward score: 4.828125 +------------------------------------------------------------------------------------- +|E2E latency=2.43s |Gather latency=0.00s (0.00%) |Generate time=1.58s (65.20%) |Training time=0.46s (19.06%) |Others=0.38 (15.74%)|CurSamplesPerSec=13.17 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2787|ppo_ep: 1|act_loss: -0.0209197998046875|cri_loss: -0.0095672607421875|unsuper_loss: 0.0 +average reward score: 5.1328125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.47%) |Training time=0.45s (20.92%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2788|ppo_ep: 1|act_loss: 0.0149383544921875|cri_loss: 0.00780487060546875|unsuper_loss: 0.0 +average reward score: 5.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.78%) |Training time=0.46s (21.62%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.44 +[2023-04-14 10:30:27,697] [INFO] [logging.py:96:log_dist] [Rank 0] step=2790, skipped=36, lr=[7.34288706374108e-06, 7.34288706374108e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:30:27,715] [INFO] [timer.py:199:stop] epoch=0/micro_step=2790/global_step=2790, RunningAvgSamplesPerSec=106.42220408681138, CurrSamplesPerSec=112.92798048000672, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:30:27,808] [INFO] [logging.py:96:log_dist] [Rank 0] step=2790, skipped=42, lr=[3.8095275185093927e-06, 3.8095275185093927e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2789|ppo_ep: 1|act_loss: -0.0347900390625|cri_loss: -0.0169677734375|unsuper_loss: 0.0 +average reward score: 5.078125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.51%) |Training time=0.45s (20.90%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2790|ppo_ep: 1|act_loss: -0.03375244140625|cri_loss: -0.01496124267578125|unsuper_loss: 0.0 +average reward score: 5.703125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.27%) |Training time=0.45s (21.13%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2791|ppo_ep: 1|act_loss: 0.0228271484375|cri_loss: 0.012115478515625|unsuper_loss: 0.0 +average reward score: 5.27734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.55%) |Training time=0.47s (21.88%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2792|ppo_ep: 1|act_loss: 0.0075836181640625|cri_loss: 0.00460052490234375|unsuper_loss: 0.0 +average reward score: 5.42578125 +------------------------------------------------------------------------------------- +|E2E latency=2.52s |Gather latency=0.00s (0.00%) |Generate time=1.77s (69.97%) |Training time=0.46s (18.37%) |Others=0.29 (11.66%)|CurSamplesPerSec=12.68 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2793|ppo_ep: 1|act_loss: 0.034515380859375|cri_loss: 0.018157958984375|unsuper_loss: 0.0 +average reward score: 4.91015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.72%) |Training time=0.47s (21.70%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2794|ppo_ep: 1|act_loss: -0.01983642578125|cri_loss: -0.0094757080078125|unsuper_loss: 0.0 +average reward score: 5.43359375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.94%) |Training time=0.46s (21.46%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2795|ppo_ep: 1|act_loss: -0.021636962890625|cri_loss: -0.01044464111328125|unsuper_loss: 0.0 +average reward score: 5.48828125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.67%) |Training time=0.47s (21.74%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2796|ppo_ep: 1|act_loss: -0.01271820068359375|cri_loss: -0.0045928955078125|unsuper_loss: 0.0 +average reward score: 5.3203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.63%) |Training time=0.47s (21.80%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2797|ppo_ep: 1|act_loss: 0.015655517578125|cri_loss: 0.0091094970703125|unsuper_loss: 0.0 +average reward score: 5.40234375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.56%) |Training time=0.47s (21.87%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2798|ppo_ep: 1|act_loss: 0.00893402099609375|cri_loss: 0.004947662353515625|unsuper_loss: 0.0 +average reward score: 5.15234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (21.99%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45 +[2023-04-14 10:30:49,522] [INFO] [logging.py:96:log_dist] [Rank 0] step=2800, skipped=36, lr=[7.327022153554431e-06, 7.327022153554431e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:30:49,540] [INFO] [timer.py:199:stop] epoch=0/micro_step=2800/global_step=2800, RunningAvgSamplesPerSec=106.41914468193407, CurrSamplesPerSec=104.58469930026338, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:30:49,633] [INFO] [logging.py:96:log_dist] [Rank 0] step=2800, skipped=42, lr=[3.8013189456213825e-06, 3.8013189456213825e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2799|ppo_ep: 1|act_loss: -0.0113983154296875|cri_loss: -0.00405120849609375|unsuper_loss: 0.0 +average reward score: 5.8203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.61%) |Training time=0.47s (21.83%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2800|ppo_ep: 1|act_loss: -0.0082550048828125|cri_loss: -0.002552032470703125|unsuper_loss: 0.0 +average reward score: 5.296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.96%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2801|ppo_ep: 1|act_loss: -0.04388427734375|cri_loss: -0.0208587646484375|unsuper_loss: 0.0 +average reward score: 5.52734375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.67%) |Training time=0.47s (21.75%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2802|ppo_ep: 1|act_loss: -0.00791168212890625|cri_loss: -0.003131866455078125|unsuper_loss: 0.0 +average reward score: 5.01171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.52%) |Training time=0.47s (21.90%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2803|ppo_ep: 1|act_loss: 0.00707244873046875|cri_loss: 0.00591278076171875|unsuper_loss: 0.0 +average reward score: 5.3515625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.71%) |Training time=0.47s (21.71%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2804|ppo_ep: 1|act_loss: -0.0084075927734375|cri_loss: -0.003631591796875|unsuper_loss: 0.0 +average reward score: 5.16015625 +------------------------------------------------------------------------------------- +|E2E latency=2.87s |Gather latency=0.00s (0.00%) |Generate time=1.59s (55.46%) |Training time=0.46s (16.10%) |Others=0.81 (28.43%)|CurSamplesPerSec=11.17 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2805|ppo_ep: 1|act_loss: 0.0189971923828125|cri_loss: 0.0105438232421875|unsuper_loss: 0.0 +average reward score: 4.80859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.33%) |Training time=0.48s (22.09%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2806|ppo_ep: 1|act_loss: 0.057586669921875|cri_loss: 0.0298919677734375|unsuper_loss: 0.0 +average reward score: 5.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.82%) |Training time=0.46s (19.91%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.88 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2807|ppo_ep: 1|act_loss: 0.03485107421875|cri_loss: 0.01806640625|unsuper_loss: 0.0 +average reward score: 5.671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.12%) |Training time=0.46s (21.29%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2808|ppo_ep: 1|act_loss: 0.054534912109375|cri_loss: 0.0288238525390625|unsuper_loss: 0.0 +average reward score: 5.7109375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.85%) |Training time=0.46s (21.57%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.44 +[2023-04-14 10:31:11,871] [INFO] [logging.py:96:log_dist] [Rank 0] step=2810, skipped=36, lr=[7.311120157363665e-06, 7.311120157363665e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:31:11,889] [INFO] [timer.py:199:stop] epoch=0/micro_step=2810/global_step=2810, RunningAvgSamplesPerSec=106.41754482170897, CurrSamplesPerSec=110.76354693624923, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:31:11,994] [INFO] [logging.py:96:log_dist] [Rank 0] step=2810, skipped=42, lr=[3.7930910840473377e-06, 3.7930910840473377e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2809|ppo_ep: 1|act_loss: 0.0004031658172607422|cri_loss: 0.00038695335388183594|unsuper_loss: 0.0 +average reward score: 5.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.45s (20.91%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2810|ppo_ep: 1|act_loss: 0.01690673828125|cri_loss: 0.0100555419921875|unsuper_loss: 0.0 +average reward score: 5.55859375 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.63s (71.55%) |Training time=0.55s (24.14%) |Others=0.10 (4.31%)|CurSamplesPerSec=14.04 |AvgSamplesPerSec=14.44 +epoch: 0|step: 2811|ppo_ep: 1|act_loss: -0.0279998779296875|cri_loss: -0.01287841796875|unsuper_loss: 0.0 +average reward score: 5.0 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.59%) |Training time=0.47s (21.83%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2812|ppo_ep: 1|act_loss: -0.038330078125|cri_loss: -0.0186614990234375|unsuper_loss: 0.0 +average reward score: 5.25390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.31%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2813|ppo_ep: 1|act_loss: 0.032928466796875|cri_loss: 0.017578125|unsuper_loss: 0.0 +average reward score: 5.19140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.10%) |Training time=0.48s (22.35%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2814|ppo_ep: 1|act_loss: -0.0133514404296875|cri_loss: -0.006317138671875|unsuper_loss: 0.0 +average reward score: 5.7109375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.59%) |Training time=0.47s (21.83%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2815|ppo_ep: 1|act_loss: 0.0244140625|cri_loss: 0.01275634765625|unsuper_loss: 0.0 +average reward score: 5.12109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.89%) |Training time=0.49s (22.57%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2816|ppo_ep: 1|act_loss: -0.0089569091796875|cri_loss: -0.0037841796875|unsuper_loss: 0.0 +average reward score: 5.28515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.28%) |Training time=0.50s (23.21%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2817|ppo_ep: 1|act_loss: -0.0140380859375|cri_loss: -0.006824493408203125|unsuper_loss: 0.0 +average reward score: 5.35546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.02%) |Training time=0.48s (22.39%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2818|ppo_ep: 1|act_loss: 0.002536773681640625|cri_loss: 0.0032253265380859375|unsuper_loss: 0.0 +average reward score: 5.5546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.27%) |Training time=0.48s (22.14%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45 +[2023-04-14 10:31:33,595] [INFO] [logging.py:96:log_dist] [Rank 0] step=2820, skipped=36, lr=[7.295181310874729e-06, 7.295181310874729e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:31:33,613] [INFO] [timer.py:199:stop] epoch=0/micro_step=2820/global_step=2820, RunningAvgSamplesPerSec=106.38886203729886, CurrSamplesPerSec=102.92391691103396, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:31:33,832] [INFO] [logging.py:96:log_dist] [Rank 0] step=2820, skipped=42, lr=[3.7848440557440147e-06, 3.7848440557440147e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2819|ppo_ep: 1|act_loss: 0.03643798828125|cri_loss: 0.021148681640625|unsuper_loss: 0.0 +average reward score: 4.859375 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.61s (69.73%) |Training time=0.60s (26.01%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.88 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2820|ppo_ep: 1|act_loss: -0.03076171875|cri_loss: -0.01486968994140625|unsuper_loss: 0.0 +average reward score: 5.1328125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.86%) |Training time=0.46s (21.57%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2821|ppo_ep: 1|act_loss: -0.05303955078125|cri_loss: -0.026031494140625|unsuper_loss: 0.0 +average reward score: 4.98828125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.48%) |Training time=0.47s (21.94%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.45 +[2023-04-14 10:31:40,440] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 2822|ppo_ep: 1|act_loss: -0.04193115234375|cri_loss: -0.0196075439453125|unsuper_loss: 0.0 +average reward score: 6.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.45%) |Training time=0.48s (20.77%) |Others=0.09 (3.78%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.45 +[2023-04-14 10:31:42,585] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 2823|ppo_ep: 1|act_loss: 0.017669677734375|cri_loss: 0.0095062255859375|unsuper_loss: 0.0 +average reward score: 5.2734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.55%) |Training time=0.48s (22.38%) |Others=0.09 (4.07%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2824|ppo_ep: 1|act_loss: -0.035980224609375|cri_loss: -0.0175323486328125|unsuper_loss: 0.0 +average reward score: 5.15625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.31%) |Training time=0.48s (22.13%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2825|ppo_ep: 1|act_loss: 0.005672454833984375|cri_loss: 0.003009796142578125|unsuper_loss: 0.0 +average reward score: 5.50390625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.29%) |Training time=0.47s (21.70%) |Others=0.13 (6.01%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2826|ppo_ep: 1|act_loss: -0.0008726119995117188|cri_loss: -0.00019288063049316406|unsuper_loss: 0.0 +average reward score: 5.31640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.49%) |Training time=0.47s (21.94%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2827|ppo_ep: 1|act_loss: -0.0009431838989257812|cri_loss: -7.43865966796875e-05|unsuper_loss: 0.0 +average reward score: 4.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (21.99%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2828|ppo_ep: 1|act_loss: 0.00569915771484375|cri_loss: 0.003047943115234375|unsuper_loss: 0.0 +average reward score: 5.125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.33%) |Training time=0.47s (22.10%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45 +[2023-04-14 10:31:55,391] [INFO] [logging.py:96:log_dist] [Rank 0] step=2830, skipped=36, lr=[7.2792058503397775e-06, 7.2792058503397775e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:31:55,410] [INFO] [timer.py:199:stop] epoch=0/micro_step=2830/global_step=2830, RunningAvgSamplesPerSec=106.37661744892732, CurrSamplesPerSec=104.80478821218995, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:31:55,502] [INFO] [logging.py:96:log_dist] [Rank 0] step=2830, skipped=44, lr=[3.77823271519263e-06, 3.77823271519263e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2829|ppo_ep: 1|act_loss: 0.0153961181640625|cri_loss: 0.0088958740234375|unsuper_loss: 0.0 +average reward score: 4.9453125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.62%) |Training time=0.47s (21.81%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2830|ppo_ep: 1|act_loss: -0.013885498046875|cri_loss: -0.00673675537109375|unsuper_loss: 0.0 +average reward score: 5.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.18%) |Training time=0.48s (22.24%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2831|ppo_ep: 1|act_loss: -0.035552978515625|cri_loss: -0.015960693359375|unsuper_loss: 0.0 +average reward score: 5.71875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.19%) |Training time=0.48s (22.21%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2832|ppo_ep: 1|act_loss: -0.01444244384765625|cri_loss: -0.0068206787109375|unsuper_loss: 0.0 +average reward score: 5.41796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.49%) |Training time=0.47s (21.93%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2833|ppo_ep: 1|act_loss: -0.024932861328125|cri_loss: -0.01194000244140625|unsuper_loss: 0.0 +average reward score: 5.50390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.55%) |Training time=0.47s (21.88%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2834|ppo_ep: 1|act_loss: 0.0003077983856201172|cri_loss: 0.00028443336486816406|unsuper_loss: 0.0 +average reward score: 5.77734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.88%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2835|ppo_ep: 1|act_loss: -0.03741455078125|cri_loss: -0.017669677734375|unsuper_loss: 0.0 +average reward score: 5.36328125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.97%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2836|ppo_ep: 1|act_loss: -0.024627685546875|cri_loss: -0.0111541748046875|unsuper_loss: 0.0 +average reward score: 4.69921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.76%) |Training time=0.44s (20.67%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2837|ppo_ep: 1|act_loss: -0.0279388427734375|cri_loss: -0.0135650634765625|unsuper_loss: 0.0 +average reward score: 5.10546875 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.39%) |Training time=0.47s (20.39%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.45 +[2023-04-14 10:32:14,915] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 2838|ppo_ep: 1|act_loss: -0.03369140625|cri_loss: -0.01508331298828125|unsuper_loss: 0.0 +average reward score: 5.00390625 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.42%) |Training time=0.45s (20.97%) |Others=0.10 (4.61%)|CurSamplesPerSec=15.06 |AvgSamplesPerSec=14.45 +[2023-04-14 10:32:17,061] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +[2023-04-14 10:32:17,062] [INFO] [logging.py:96:log_dist] [Rank 0] step=2840, skipped=38, lr=[7.266399278906688e-06, 7.266399278906688e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:32:17,062] [INFO] [timer.py:199:stop] epoch=0/micro_step=2840/global_step=2840, RunningAvgSamplesPerSec=106.37877097595695, CurrSamplesPerSec=116.98970678860347, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:32:17,154] [INFO] [logging.py:96:log_dist] [Rank 0] step=2840, skipped=44, lr=[3.769951495013317e-06, 3.769951495013317e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2839|ppo_ep: 1|act_loss: -0.0142669677734375|cri_loss: -0.006763458251953125|unsuper_loss: 0.0 +average reward score: 5.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.15%) |Training time=0.44s (20.29%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2840|ppo_ep: 1|act_loss: -0.00069427490234375|cri_loss: 0.0004968643188476562|unsuper_loss: 0.0 +average reward score: 5.0859375 +------------------------------------------------------------------------------------- +|E2E latency=2.45s |Gather latency=0.00s (0.00%) |Generate time=1.68s (68.55%) |Training time=0.47s (19.18%) |Others=0.30 (12.27%)|CurSamplesPerSec=13.07 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2841|ppo_ep: 1|act_loss: 0.0382080078125|cri_loss: 0.0198822021484375|unsuper_loss: 0.0 +average reward score: 5.45703125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.94%) |Training time=0.46s (21.47%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2842|ppo_ep: 1|act_loss: 0.035308837890625|cri_loss: 0.0183868408203125|unsuper_loss: 0.0 +average reward score: 5.484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.11%) |Training time=0.48s (22.33%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2843|ppo_ep: 1|act_loss: -0.0150146484375|cri_loss: -0.007251739501953125|unsuper_loss: 0.0 +average reward score: 4.95703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.51%) |Training time=0.47s (21.91%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2844|ppo_ep: 1|act_loss: -0.037109375|cri_loss: -0.017974853515625|unsuper_loss: 0.0 +average reward score: 4.9296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.36%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2845|ppo_ep: 1|act_loss: -0.0166168212890625|cri_loss: -0.007476806640625|unsuper_loss: 0.0 +average reward score: 5.37109375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.96%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2846|ppo_ep: 1|act_loss: -0.0270538330078125|cri_loss: -0.01276397705078125|unsuper_loss: 0.0 +average reward score: 5.375 +------------------------------------------------------------------------------------- +|E2E latency=3.05s |Gather latency=0.00s (0.00%) |Generate time=1.58s (51.83%) |Training time=0.46s (15.23%) |Others=1.00 (32.94%)|CurSamplesPerSec=10.50 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2847|ppo_ep: 1|act_loss: -0.033111572265625|cri_loss: -0.01593017578125|unsuper_loss: 0.0 +average reward score: 5.75 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.66%) |Training time=0.47s (21.74%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2848|ppo_ep: 1|act_loss: -0.0134124755859375|cri_loss: -0.006237030029296875|unsuper_loss: 0.0 +average reward score: 5.95703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.77%) |Training time=0.47s (21.72%) |Others=0.12 (5.51%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.45 +[2023-04-14 10:32:39,758] [INFO] [logging.py:96:log_dist] [Rank 0] step=2850, skipped=38, lr=[7.2503585101744275e-06, 7.2503585101744275e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:32:39,776] [INFO] [timer.py:199:stop] epoch=0/micro_step=2850/global_step=2850, RunningAvgSamplesPerSec=106.37187852437273, CurrSamplesPerSec=110.91549223611467, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:32:39,869] [INFO] [logging.py:96:log_dist] [Rank 0] step=2850, skipped=44, lr=[3.7616514510892553e-06, 3.7616514510892553e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2849|ppo_ep: 1|act_loss: -0.0085906982421875|cri_loss: -0.002605438232421875|unsuper_loss: 0.0 +average reward score: 5.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.57%) |Training time=0.45s (20.86%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2850|ppo_ep: 1|act_loss: 0.0133056640625|cri_loss: 0.00717926025390625|unsuper_loss: 0.0 +average reward score: 5.35546875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.34%) |Training time=0.48s (22.14%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2851|ppo_ep: 1|act_loss: 0.03900146484375|cri_loss: 0.021392822265625|unsuper_loss: 0.0 +average reward score: 5.0234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.86%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2852|ppo_ep: 1|act_loss: 0.02392578125|cri_loss: 0.01236724853515625|unsuper_loss: 0.0 +average reward score: 5.390625 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.74s (75.60%) |Training time=0.46s (20.13%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.87 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2853|ppo_ep: 1|act_loss: 0.0272216796875|cri_loss: 0.01763916015625|unsuper_loss: 0.0 +average reward score: 4.8515625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.47s (21.99%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2854|ppo_ep: 1|act_loss: 0.0024585723876953125|cri_loss: 0.001834869384765625|unsuper_loss: 0.0 +average reward score: 5.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.67%) |Training time=0.49s (22.60%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2855|ppo_ep: 1|act_loss: -0.029083251953125|cri_loss: -0.0135650634765625|unsuper_loss: 0.0 +average reward score: 4.68359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.96%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2856|ppo_ep: 1|act_loss: -0.001056671142578125|cri_loss: -0.0002918243408203125|unsuper_loss: 0.0 +average reward score: 5.53515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.86%) |Training time=0.49s (22.56%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2857|ppo_ep: 1|act_loss: -0.02532958984375|cri_loss: -0.01251983642578125|unsuper_loss: 0.0 +average reward score: 4.75390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.86%) |Training time=0.49s (22.59%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2858|ppo_ep: 1|act_loss: -0.033660888671875|cri_loss: -0.0146331787109375|unsuper_loss: 0.0 +average reward score: 5.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.95s |Gather latency=0.00s (0.00%) |Generate time=1.58s (53.57%) |Training time=0.50s (16.83%) |Others=0.87 (29.60%)|CurSamplesPerSec=10.85 |AvgSamplesPerSec=14.45 +[2023-04-14 10:33:02,332] [INFO] [logging.py:96:log_dist] [Rank 0] step=2860, skipped=38, lr=[7.234281791778185e-06, 7.234281791778185e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:33:02,351] [INFO] [timer.py:199:stop] epoch=0/micro_step=2860/global_step=2860, RunningAvgSamplesPerSec=106.34976605325895, CurrSamplesPerSec=101.42614741121662, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:33:02,444] [INFO] [logging.py:96:log_dist] [Rank 0] step=2860, skipped=44, lr=[3.7533327064471216e-06, 3.7533327064471216e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2859|ppo_ep: 1|act_loss: 0.00896453857421875|cri_loss: 0.00466156005859375|unsuper_loss: 0.0 +average reward score: 5.48828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.30%) |Training time=0.48s (22.12%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2860|ppo_ep: 1|act_loss: 0.04473876953125|cri_loss: 0.0229949951171875|unsuper_loss: 0.0 +average reward score: 5.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.49s (22.40%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2861|ppo_ep: 1|act_loss: -0.00661468505859375|cri_loss: -0.002811431884765625|unsuper_loss: 0.0 +average reward score: 5.1328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.71%) |Training time=0.49s (22.76%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2862|ppo_ep: 1|act_loss: 0.0772705078125|cri_loss: 0.0430908203125|unsuper_loss: 0.0 +average reward score: 5.4453125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.78%) |Training time=0.46s (21.63%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2863|ppo_ep: 1|act_loss: 0.0021076202392578125|cri_loss: 0.0012416839599609375|unsuper_loss: 0.0 +average reward score: 5.20703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.69%) |Training time=0.47s (21.70%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2864|ppo_ep: 1|act_loss: -0.037628173828125|cri_loss: -0.017913818359375|unsuper_loss: 0.0 +average reward score: 5.50390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.47s (21.85%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2865|ppo_ep: 1|act_loss: -0.0204315185546875|cri_loss: -0.0097503662109375|unsuper_loss: 0.0 +average reward score: 4.85546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.06%) |Training time=0.48s (22.38%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2866|ppo_ep: 1|act_loss: -0.007228851318359375|cri_loss: -0.0034275054931640625|unsuper_loss: 0.0 +average reward score: 4.875 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.63%) |Training time=0.49s (21.17%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2867|ppo_ep: 1|act_loss: -0.015838623046875|cri_loss: -0.007709503173828125|unsuper_loss: 0.0 +average reward score: 5.40625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.04%) |Training time=0.49s (22.43%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2868|ppo_ep: 1|act_loss: -0.0140533447265625|cri_loss: -0.006755828857421875|unsuper_loss: 0.0 +average reward score: 5.16796875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.38%) |Training time=0.49s (22.14%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.45 +[2023-04-14 10:33:24,269] [INFO] [logging.py:96:log_dist] [Rank 0] step=2870, skipped=38, lr=[7.2181693620137145e-06, 7.2181693620137145e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:33:24,287] [INFO] [timer.py:199:stop] epoch=0/micro_step=2870/global_step=2870, RunningAvgSamplesPerSec=106.32509058550373, CurrSamplesPerSec=95.30749151435104, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:33:24,380] [INFO] [logging.py:96:log_dist] [Rank 0] step=2870, skipped=44, lr=[3.7449953843907764e-06, 3.7449953843907764e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2869|ppo_ep: 1|act_loss: -0.019073486328125|cri_loss: -0.00899505615234375|unsuper_loss: 0.0 +average reward score: 5.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.68s (73.84%) |Training time=0.50s (21.87%) |Others=0.10 (4.29%)|CurSamplesPerSec=14.05 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2870|ppo_ep: 1|act_loss: -0.05633544921875|cri_loss: -0.0259857177734375|unsuper_loss: 0.0 +average reward score: 5.1171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.62%) |Training time=0.50s (22.87%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2871|ppo_ep: 1|act_loss: 0.0181427001953125|cri_loss: 0.00942230224609375|unsuper_loss: 0.0 +average reward score: 5.8125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.89%) |Training time=0.49s (22.57%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2872|ppo_ep: 1|act_loss: 0.0108642578125|cri_loss: 0.005611419677734375|unsuper_loss: 0.0 +average reward score: 5.11328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.69%) |Training time=0.49s (22.71%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2873|ppo_ep: 1|act_loss: 0.064453125|cri_loss: 0.033233642578125|unsuper_loss: 0.0 +average reward score: 5.5234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.75%) |Training time=0.49s (22.73%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2874|ppo_ep: 1|act_loss: 0.0105133056640625|cri_loss: 0.00571441650390625|unsuper_loss: 0.0 +average reward score: 5.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.26%) |Training time=0.48s (22.20%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2875|ppo_ep: 1|act_loss: -0.03668212890625|cri_loss: -0.0172271728515625|unsuper_loss: 0.0 +average reward score: 5.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.14%) |Training time=0.48s (22.30%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2876|ppo_ep: 1|act_loss: 0.009979248046875|cri_loss: 0.00543212890625|unsuper_loss: 0.0 +average reward score: 5.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.12%) |Training time=0.48s (22.32%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2877|ppo_ep: 1|act_loss: -0.024017333984375|cri_loss: -0.01128387451171875|unsuper_loss: 0.0 +average reward score: 5.1328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.71%) |Training time=0.49s (22.76%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2878|ppo_ep: 1|act_loss: -0.0335693359375|cri_loss: -0.0164337158203125|unsuper_loss: 0.0 +average reward score: 4.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.78%) |Training time=0.49s (22.33%) |Others=0.13 (5.88%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.45 +[2023-04-14 10:33:45,963] [INFO] [logging.py:96:log_dist] [Rank 0] step=2880, skipped=38, lr=[7.20202145970609e-06, 7.20202145970609e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:33:45,981] [INFO] [timer.py:199:stop] epoch=0/micro_step=2880/global_step=2880, RunningAvgSamplesPerSec=106.29718321043626, CurrSamplesPerSec=108.31802231928128, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:33:46,081] [INFO] [logging.py:96:log_dist] [Rank 0] step=2880, skipped=44, lr=[3.736639608499448e-06, 3.736639608499448e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2879|ppo_ep: 1|act_loss: -0.027557373046875|cri_loss: -0.01270294189453125|unsuper_loss: 0.0 +average reward score: 5.40625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.23%) |Others=0.11 (4.89%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2880|ppo_ep: 1|act_loss: -0.0287933349609375|cri_loss: -0.0141143798828125|unsuper_loss: 0.0 +average reward score: 5.84765625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.13%) |Training time=0.46s (21.27%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2881|ppo_ep: 1|act_loss: -0.0151824951171875|cri_loss: -0.006744384765625|unsuper_loss: 0.0 +average reward score: 5.1640625 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.76s (76.27%) |Training time=0.45s (19.46%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.88 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2882|ppo_ep: 1|act_loss: 0.0058746337890625|cri_loss: 0.003101348876953125|unsuper_loss: 0.0 +average reward score: 5.203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.02%) |Training time=0.46s (21.41%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2883|ppo_ep: 1|act_loss: 0.01129150390625|cri_loss: 0.0064697265625|unsuper_loss: 0.0 +average reward score: 5.9609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.90%) |Training time=0.46s (21.50%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2884|ppo_ep: 1|act_loss: -0.00933837890625|cri_loss: -0.003841400146484375|unsuper_loss: 0.0 +average reward score: 5.28125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.35%) |Training time=0.45s (21.05%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2885|ppo_ep: 1|act_loss: -0.0268402099609375|cri_loss: -0.0126953125|unsuper_loss: 0.0 +average reward score: 5.79296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.70%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2886|ppo_ep: 1|act_loss: 0.00786590576171875|cri_loss: 0.004169464111328125|unsuper_loss: 0.0 +average reward score: 5.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.73%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2887|ppo_ep: 1|act_loss: -0.00860595703125|cri_loss: -0.004085540771484375|unsuper_loss: 0.0 +average reward score: 5.53515625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.96%) |Training time=0.49s (22.49%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2888|ppo_ep: 1|act_loss: 0.017120361328125|cri_loss: 0.00911712646484375|unsuper_loss: 0.0 +average reward score: 5.16015625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.43%) |Training time=0.45s (20.87%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.45 +[2023-04-14 10:34:07,712] [INFO] [logging.py:96:log_dist] [Rank 0] step=2890, skipped=38, lr=[7.185838324206182e-06, 7.185838324206182e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:34:07,730] [INFO] [timer.py:199:stop] epoch=0/micro_step=2890/global_step=2890, RunningAvgSamplesPerSec=106.30418029404586, CurrSamplesPerSec=124.88239437340486, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:34:07,822] [INFO] [logging.py:96:log_dist] [Rank 0] step=2890, skipped=44, lr=[3.7282655026258895e-06, 3.7282655026258895e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2889|ppo_ep: 1|act_loss: -0.0262298583984375|cri_loss: -0.01256561279296875|unsuper_loss: 0.0 +average reward score: 4.9765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.13%) |Training time=0.42s (19.31%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2890|ppo_ep: 1|act_loss: -0.00704193115234375|cri_loss: -0.0017490386962890625|unsuper_loss: 0.0 +average reward score: 5.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.68%) |Training time=0.47s (21.77%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2891|ppo_ep: 1|act_loss: 0.01342010498046875|cri_loss: 0.0097503662109375|unsuper_loss: 0.0 +average reward score: 5.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.30%) |Training time=0.46s (21.15%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2892|ppo_ep: 1|act_loss: 0.007518768310546875|cri_loss: 0.00547027587890625|unsuper_loss: 0.0 +average reward score: 5.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.92%) |Training time=0.47s (21.54%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2893|ppo_ep: 1|act_loss: 0.09027099609375|cri_loss: 0.05419921875|unsuper_loss: 0.0 +average reward score: 5.16015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.78%) |Training time=0.47s (21.65%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2894|ppo_ep: 1|act_loss: 0.01409912109375|cri_loss: 0.00913238525390625|unsuper_loss: 0.0 +average reward score: 3.931640625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.80%) |Training time=0.43s (19.65%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2895|ppo_ep: 1|act_loss: 0.0149078369140625|cri_loss: 0.0116729736328125|unsuper_loss: 0.0 +average reward score: 5.12109375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.99%) |Training time=0.44s (20.34%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2896|ppo_ep: 1|act_loss: 0.025390625|cri_loss: 0.01457977294921875|unsuper_loss: 0.0 +average reward score: 4.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.22%) |Training time=0.48s (20.54%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2897|ppo_ep: 1|act_loss: 0.033203125|cri_loss: 0.0169677734375|unsuper_loss: 0.0 +average reward score: 5.1640625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.75%) |Training time=0.48s (21.76%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2898|ppo_ep: 1|act_loss: 0.0023441314697265625|cri_loss: 0.00432586669921875|unsuper_loss: 0.0 +average reward score: 5.03125 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.66%) |Training time=0.48s (21.01%) |Others=0.10 (4.33%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.45 +[2023-04-14 10:34:29,698] [INFO] [logging.py:96:log_dist] [Rank 0] step=2900, skipped=38, lr=[7.169620195387097e-06, 7.169620195387097e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:34:29,717] [INFO] [timer.py:199:stop] epoch=0/micro_step=2900/global_step=2900, RunningAvgSamplesPerSec=106.30332220155994, CurrSamplesPerSec=100.94465398682478, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:34:29,809] [INFO] [logging.py:96:log_dist] [Rank 0] step=2900, skipped=44, lr=[3.719873190894554e-06, 3.719873190894554e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2899|ppo_ep: 1|act_loss: 0.0145416259765625|cri_loss: 0.007717132568359375|unsuper_loss: 0.0 +average reward score: 5.48046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.26%) |Training time=0.48s (22.21%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2900|ppo_ep: 1|act_loss: 0.007343292236328125|cri_loss: 0.0048675537109375|unsuper_loss: 0.0 +average reward score: 5.23828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.67%) |Training time=0.47s (21.79%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2901|ppo_ep: 1|act_loss: 0.00492095947265625|cri_loss: 0.00273895263671875|unsuper_loss: 0.0 +average reward score: 5.69140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.82%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2902|ppo_ep: 1|act_loss: 0.000827789306640625|cri_loss: 0.001277923583984375|unsuper_loss: 0.0 +average reward score: 5.17578125 +------------------------------------------------------------------------------------- +|E2E latency=2.50s |Gather latency=0.00s (0.00%) |Generate time=1.59s (63.82%) |Training time=0.47s (18.99%) |Others=0.43 (17.19%)|CurSamplesPerSec=12.82 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2903|ppo_ep: 1|act_loss: -0.0103912353515625|cri_loss: -0.004791259765625|unsuper_loss: 0.0 +average reward score: 5.3671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.74%) |Training time=0.47s (21.73%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2904|ppo_ep: 1|act_loss: 0.003299713134765625|cri_loss: 0.0017385482788085938|unsuper_loss: 0.0 +average reward score: 5.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.48s (21.97%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2905|ppo_ep: 1|act_loss: -0.005413055419921875|cri_loss: -0.0025043487548828125|unsuper_loss: 0.0 +average reward score: 5.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.85%) |Training time=0.47s (21.60%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2906|ppo_ep: 1|act_loss: 0.001773834228515625|cri_loss: 0.0010547637939453125|unsuper_loss: 0.0 +average reward score: 5.65625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.86%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2907|ppo_ep: 1|act_loss: 0.0262451171875|cri_loss: 0.01409912109375|unsuper_loss: 0.0 +average reward score: 5.90234375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.80%) |Training time=0.49s (22.18%) |Others=0.13 (6.02%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2908|ppo_ep: 1|act_loss: 0.00876617431640625|cri_loss: 0.0050048828125|unsuper_loss: 0.0 +average reward score: 5.6953125 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.33%) |Training time=0.45s (20.96%) |Others=0.10 (4.71%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.45 +[2023-04-14 10:34:51,677] [INFO] [logging.py:96:log_dist] [Rank 0] step=2910, skipped=38, lr=[7.1533673136406274e-06, 7.1533673136406274e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:34:51,695] [INFO] [timer.py:199:stop] epoch=0/micro_step=2910/global_step=2910, RunningAvgSamplesPerSec=106.29643390840222, CurrSamplesPerSec=104.84285647330844, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:34:51,788] [INFO] [logging.py:96:log_dist] [Rank 0] step=2910, skipped=44, lr=[3.7114627976997454e-06, 3.7114627976997454e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2909|ppo_ep: 1|act_loss: 0.01393890380859375|cri_loss: 0.008758544921875|unsuper_loss: 0.0 +average reward score: 5.07421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.86%) |Training time=0.47s (21.60%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2910|ppo_ep: 1|act_loss: -0.00762939453125|cri_loss: -0.0035247802734375|unsuper_loss: 0.0 +average reward score: 5.40234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.95%) |Training time=0.49s (22.51%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2911|ppo_ep: 1|act_loss: 0.01641845703125|cri_loss: 0.00928497314453125|unsuper_loss: 0.0 +average reward score: 5.37109375 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.58s (70.96%) |Training time=0.48s (21.42%) |Others=0.17 (7.62%)|CurSamplesPerSec=14.33 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2912|ppo_ep: 1|act_loss: -0.03759765625|cri_loss: -0.0178375244140625|unsuper_loss: 0.0 +average reward score: 5.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.15%) |Training time=0.48s (22.16%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2913|ppo_ep: 1|act_loss: -0.00995635986328125|cri_loss: -0.004405975341796875|unsuper_loss: 0.0 +average reward score: 5.09375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (22.01%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2914|ppo_ep: 1|act_loss: -0.023712158203125|cri_loss: -0.0115509033203125|unsuper_loss: 0.0 +average reward score: 5.6171875 +------------------------------------------------------------------------------------- +|E2E latency=2.62s |Gather latency=0.00s (0.00%) |Generate time=1.58s (60.47%) |Training time=0.48s (18.30%) |Others=0.56 (21.22%)|CurSamplesPerSec=12.22 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2915|ppo_ep: 1|act_loss: -0.03619384765625|cri_loss: -0.0172119140625|unsuper_loss: 0.0 +average reward score: 5.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.13%) |Training time=0.48s (22.30%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2916|ppo_ep: 1|act_loss: 0.022613525390625|cri_loss: 0.014739990234375|unsuper_loss: 0.0 +average reward score: 5.29296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.51%) |Training time=0.47s (21.95%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2917|ppo_ep: 1|act_loss: 0.03131103515625|cri_loss: 0.016082763671875|unsuper_loss: 0.0 +average reward score: 5.28125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.23%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2918|ppo_ep: 1|act_loss: 0.025970458984375|cri_loss: 0.0135650634765625|unsuper_loss: 0.0 +average reward score: 5.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.46%) |Training time=0.47s (21.99%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45 +[2023-04-14 10:35:13,853] [INFO] [logging.py:96:log_dist] [Rank 0] step=2920, skipped=38, lr=[7.1370799198736894e-06, 7.1370799198736894e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:35:13,872] [INFO] [timer.py:199:stop] epoch=0/micro_step=2920/global_step=2920, RunningAvgSamplesPerSec=106.27708410449475, CurrSamplesPerSec=102.26821669159538, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:35:13,964] [INFO] [logging.py:96:log_dist] [Rank 0] step=2920, skipped=44, lr=[3.7030344477037794e-06, 3.7030344477037794e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2919|ppo_ep: 1|act_loss: -0.017486572265625|cri_loss: -0.00734710693359375|unsuper_loss: 0.0 +average reward score: 5.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.47%) |Training time=0.48s (22.00%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2920|ppo_ep: 1|act_loss: 0.004459381103515625|cri_loss: 0.003025054931640625|unsuper_loss: 0.0 +average reward score: 5.109375 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.59s (67.90%) |Training time=0.48s (20.49%) |Others=0.27 (11.61%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2921|ppo_ep: 1|act_loss: -0.0131072998046875|cri_loss: -0.005886077880859375|unsuper_loss: 0.0 +average reward score: 5.5 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.82%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2922|ppo_ep: 1|act_loss: -0.017913818359375|cri_loss: -0.008392333984375|unsuper_loss: 0.0 +average reward score: 5.14453125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.65%) |Training time=0.48s (21.85%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2923|ppo_ep: 1|act_loss: 0.001232147216796875|cri_loss: 0.0011682510375976562|unsuper_loss: 0.0 +average reward score: 5.41015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (22.04%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.45 +[2023-04-14 10:35:24,979] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 2924|ppo_ep: 1|act_loss: 0.05810546875|cri_loss: 0.03289794921875|unsuper_loss: 0.0 +average reward score: 5.9375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.47s (21.72%) |Others=0.09 (4.08%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45 +[2023-04-14 10:35:27,132] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 2925|ppo_ep: 1|act_loss: 0.0648193359375|cri_loss: 0.034698486328125|unsuper_loss: 0.0 +average reward score: 5.36328125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.48s (22.17%) |Others=0.09 (4.08%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2926|ppo_ep: 1|act_loss: 0.02801513671875|cri_loss: 0.01515960693359375|unsuper_loss: 0.0 +average reward score: 5.953125 +------------------------------------------------------------------------------------- +|E2E latency=2.83s |Gather latency=0.00s (0.00%) |Generate time=1.78s (62.85%) |Training time=0.47s (16.68%) |Others=0.58 (20.46%)|CurSamplesPerSec=11.31 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2927|ppo_ep: 1|act_loss: 0.0149688720703125|cri_loss: 0.00780487060546875|unsuper_loss: 0.0 +average reward score: 5.49609375 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.58%) |Training time=0.48s (21.07%) |Others=0.10 (4.35%)|CurSamplesPerSec=14.12 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2928|ppo_ep: 1|act_loss: 0.00994110107421875|cri_loss: 0.0059051513671875|unsuper_loss: 0.0 +average reward score: 5.9296875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.05%) |Training time=0.49s (22.43%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.45 +[2023-04-14 10:35:36,456] [INFO] [logging.py:96:log_dist] [Rank 0] step=2930, skipped=38, lr=[7.120758255504751e-06, 7.120758255504751e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:35:36,474] [INFO] [timer.py:199:stop] epoch=0/micro_step=2930/global_step=2930, RunningAvgSamplesPerSec=106.26202270186438, CurrSamplesPerSec=101.59874100814726, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:35:36,567] [INFO] [logging.py:96:log_dist] [Rank 0] step=2930, skipped=46, lr=[3.6962789227532165e-06, 3.6962789227532165e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2929|ppo_ep: 1|act_loss: 0.00696563720703125|cri_loss: 0.004253387451171875|unsuper_loss: 0.0 +average reward score: 5.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.48s (22.09%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2930|ppo_ep: 1|act_loss: 0.020294189453125|cri_loss: 0.01128387451171875|unsuper_loss: 0.0 +average reward score: 5.12890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.48s (22.01%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2931|ppo_ep: 1|act_loss: -0.0184783935546875|cri_loss: -0.00890350341796875|unsuper_loss: 0.0 +average reward score: 4.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.48s (22.00%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2932|ppo_ep: 1|act_loss: -0.0265960693359375|cri_loss: -0.01244354248046875|unsuper_loss: 0.0 +average reward score: 5.16015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.22%) |Training time=0.48s (22.19%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2933|ppo_ep: 1|act_loss: -0.001399993896484375|cri_loss: -0.00023174285888671875|unsuper_loss: 0.0 +average reward score: 5.28515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.12%) |Training time=0.48s (22.34%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2934|ppo_ep: 1|act_loss: -0.023223876953125|cri_loss: -0.01146697998046875|unsuper_loss: 0.0 +average reward score: 5.21484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.34%) |Training time=0.48s (22.11%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2935|ppo_ep: 1|act_loss: -0.043487548828125|cri_loss: -0.021331787109375|unsuper_loss: 0.0 +average reward score: 4.94140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.13%) |Training time=0.48s (22.32%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2936|ppo_ep: 1|act_loss: -0.0113525390625|cri_loss: -0.00528717041015625|unsuper_loss: 0.0 +average reward score: 5.296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.17%) |Training time=0.48s (22.27%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2937|ppo_ep: 1|act_loss: 0.0017881393432617188|cri_loss: 0.00162506103515625|unsuper_loss: 0.0 +average reward score: 5.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.14%) |Training time=0.48s (22.32%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2938|ppo_ep: 1|act_loss: 0.004360198974609375|cri_loss: 0.002471923828125|unsuper_loss: 0.0 +average reward score: 5.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.99s |Gather latency=0.00s (0.00%) |Generate time=1.58s (53.00%) |Training time=0.48s (16.19%) |Others=0.92 (30.80%)|CurSamplesPerSec=10.72 |AvgSamplesPerSec=14.45 +[2023-04-14 10:35:58,887] [INFO] [logging.py:96:log_dist] [Rank 0] step=2940, skipped=38, lr=[7.10440256246025e-06, 7.10440256246025e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:35:58,905] [INFO] [timer.py:199:stop] epoch=0/micro_step=2940/global_step=2940, RunningAvgSamplesPerSec=106.2466257993554, CurrSamplesPerSec=115.49485676915266, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:35:58,998] [INFO] [logging.py:96:log_dist] [Rank 0] step=2940, skipped=46, lr=[3.6878185655126163e-06, 3.6878185655126163e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2939|ppo_ep: 1|act_loss: 0.02459716796875|cri_loss: 0.0127716064453125|unsuper_loss: 0.0 +average reward score: 5.41796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.94%) |Training time=0.44s (20.48%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45 +[2023-04-14 10:36:01,038] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 2940|ppo_ep: 1|act_loss: -0.0093536376953125|cri_loss: -0.0039825439453125|unsuper_loss: 0.0 +average reward score: 4.94921875 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.23%) |Training time=0.45s (21.17%) |Others=0.10 (4.60%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.45 +[2023-04-14 10:36:03,170] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 2941|ppo_ep: 1|act_loss: 0.01451873779296875|cri_loss: 0.007579803466796875|unsuper_loss: 0.0 +average reward score: 5.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.31%) |Training time=0.45s (21.09%) |Others=0.10 (4.60%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2942|ppo_ep: 1|act_loss: 0.003055572509765625|cri_loss: 0.002071380615234375|unsuper_loss: 0.0 +average reward score: 5.15234375 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.25%) |Training time=0.48s (20.52%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2943|ppo_ep: 1|act_loss: -0.0129547119140625|cri_loss: -0.00557708740234375|unsuper_loss: 0.0 +average reward score: 5.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.23%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2944|ppo_ep: 1|act_loss: -0.022705078125|cri_loss: -0.0099639892578125|unsuper_loss: 0.0 +average reward score: 5.234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.18%) |Training time=0.48s (22.27%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2945|ppo_ep: 1|act_loss: -0.002780914306640625|cri_loss: -0.001194000244140625|unsuper_loss: 0.0 +average reward score: 5.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.04%) |Training time=0.49s (22.42%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2946|ppo_ep: 1|act_loss: -0.0325927734375|cri_loss: -0.01580810546875|unsuper_loss: 0.0 +average reward score: 4.828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.34%) |Training time=0.48s (22.11%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2947|ppo_ep: 1|act_loss: 0.043701171875|cri_loss: 0.02294921875|unsuper_loss: 0.0 +average reward score: 5.7421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.23%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2948|ppo_ep: 1|act_loss: 0.03558349609375|cri_loss: 0.018707275390625|unsuper_loss: 0.0 +average reward score: 5.06640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.19%) |Training time=0.48s (22.26%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.45 +[2023-04-14 10:36:20,616] [INFO] [logging.py:96:log_dist] [Rank 0] step=2950, skipped=40, lr=[7.091293670274905e-06, 7.091293670274905e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:36:20,635] [INFO] [timer.py:199:stop] epoch=0/micro_step=2950/global_step=2950, RunningAvgSamplesPerSec=106.23428625978279, CurrSamplesPerSec=103.20907689290974, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:36:20,728] [INFO] [logging.py:96:log_dist] [Rank 0] step=2950, skipped=46, lr=[3.679340601935418e-06, 3.679340601935418e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2949|ppo_ep: 1|act_loss: 0.055755615234375|cri_loss: 0.029510498046875|unsuper_loss: 0.0 +average reward score: 5.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.89%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2950|ppo_ep: 1|act_loss: 0.02801513671875|cri_loss: 0.0146636962890625|unsuper_loss: 0.0 +average reward score: 5.12890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (22.08%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2951|ppo_ep: 1|act_loss: 0.028778076171875|cri_loss: 0.0155487060546875|unsuper_loss: 0.0 +average reward score: 4.46484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.21%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2952|ppo_ep: 1|act_loss: 0.002716064453125|cri_loss: 0.002109527587890625|unsuper_loss: 0.0 +average reward score: 5.390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.47%) |Training time=0.47s (21.98%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2953|ppo_ep: 1|act_loss: -0.032562255859375|cri_loss: -0.0157012939453125|unsuper_loss: 0.0 +average reward score: 4.70703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.31%) |Training time=0.48s (22.15%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2954|ppo_ep: 1|act_loss: -0.05206298828125|cri_loss: -0.0253143310546875|unsuper_loss: 0.0 +average reward score: 5.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.78%) |Training time=0.47s (21.65%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2955|ppo_ep: 1|act_loss: -0.016204833984375|cri_loss: -0.007656097412109375|unsuper_loss: 0.0 +average reward score: 5.7578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.76%) |Training time=0.47s (21.52%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2956|ppo_ep: 1|act_loss: -0.006183624267578125|cri_loss: -0.0024566650390625|unsuper_loss: 0.0 +average reward score: 5.4921875 +------------------------------------------------------------------------------------- +|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.62s (70.42%) |Training time=0.52s (22.78%) |Others=0.16 (6.80%)|CurSamplesPerSec=13.89 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2957|ppo_ep: 1|act_loss: 0.0180206298828125|cri_loss: 0.00919342041015625|unsuper_loss: 0.0 +average reward score: 5.56640625 +------------------------------------------------------------------------------------- +|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.63s (69.64%) |Training time=0.47s (20.24%) |Others=0.24 (10.12%)|CurSamplesPerSec=13.63 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2958|ppo_ep: 1|act_loss: 0.006927490234375|cri_loss: 0.0037059783935546875|unsuper_loss: 0.0 +average reward score: 5.43359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.30%) |Training time=0.48s (22.15%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45 +[2023-04-14 10:36:42,565] [INFO] [logging.py:96:log_dist] [Rank 0] step=2960, skipped=40, lr=[7.07487733687673e-06, 7.07487733687673e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:36:42,583] [INFO] [timer.py:199:stop] epoch=0/micro_step=2960/global_step=2960, RunningAvgSamplesPerSec=106.21938333389814, CurrSamplesPerSec=99.69147854428071, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:36:42,675] [INFO] [logging.py:96:log_dist] [Rank 0] step=2960, skipped=46, lr=[3.6708451576854964e-06, 3.6708451576854964e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2959|ppo_ep: 1|act_loss: 0.02166748046875|cri_loss: 0.011566162109375|unsuper_loss: 0.0 +average reward score: 5.56640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.01%) |Training time=0.48s (22.45%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2960|ppo_ep: 1|act_loss: 0.03948974609375|cri_loss: 0.0203857421875|unsuper_loss: 0.0 +average reward score: 5.27734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.48s (22.39%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2961|ppo_ep: 1|act_loss: 0.0328369140625|cri_loss: 0.0174560546875|unsuper_loss: 0.0 +average reward score: 5.90625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.39%) |Training time=0.47s (22.04%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2962|ppo_ep: 1|act_loss: -0.00704193115234375|cri_loss: -0.003192901611328125|unsuper_loss: 0.0 +average reward score: 5.66015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.13%) |Training time=0.48s (22.31%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2963|ppo_ep: 1|act_loss: 0.02923583984375|cri_loss: 0.0168609619140625|unsuper_loss: 0.0 +average reward score: 4.71484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.21%) |Training time=0.48s (22.24%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2964|ppo_ep: 1|act_loss: -0.00560760498046875|cri_loss: -0.002147674560546875|unsuper_loss: 0.0 +average reward score: 5.55859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.48s (22.09%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2965|ppo_ep: 1|act_loss: -0.017333984375|cri_loss: -0.00814056396484375|unsuper_loss: 0.0 +average reward score: 5.6328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.00%) |Training time=0.48s (22.45%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2966|ppo_ep: 1|act_loss: -0.05963134765625|cri_loss: -0.0282135009765625|unsuper_loss: 0.0 +average reward score: 5.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.21%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2967|ppo_ep: 1|act_loss: 0.060333251953125|cri_loss: 0.032135009765625|unsuper_loss: 0.0 +average reward score: 4.875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.19%) |Training time=0.48s (22.26%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2968|ppo_ep: 1|act_loss: -0.0307159423828125|cri_loss: -0.014678955078125|unsuper_loss: 0.0 +average reward score: 5.6328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.17%) |Training time=0.48s (22.29%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45 +[2023-04-14 10:37:04,143] [INFO] [logging.py:96:log_dist] [Rank 0] step=2970, skipped=40, lr=[7.0584276548688896e-06, 7.0584276548688896e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:37:04,161] [INFO] [timer.py:199:stop] epoch=0/micro_step=2970/global_step=2970, RunningAvgSamplesPerSec=106.20216768200422, CurrSamplesPerSec=108.36122310905577, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:37:04,254] [INFO] [logging.py:96:log_dist] [Rank 0] step=2970, skipped=46, lr=[3.662332358685826e-06, 3.662332358685826e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2969|ppo_ep: 1|act_loss: -0.0328369140625|cri_loss: -0.0156707763671875|unsuper_loss: 0.0 +average reward score: 5.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.26%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2970|ppo_ep: 1|act_loss: -0.018341064453125|cri_loss: -0.00885772705078125|unsuper_loss: 0.0 +average reward score: 5.56640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.75%) |Training time=0.49s (22.72%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2971|ppo_ep: 1|act_loss: 9.191036224365234e-05|cri_loss: 0.0001608133316040039|unsuper_loss: 0.0 +average reward score: 5.35546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.31%) |Training time=0.48s (22.13%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2972|ppo_ep: 1|act_loss: -0.028167724609375|cri_loss: -0.0137939453125|unsuper_loss: 0.0 +average reward score: 5.28515625 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.07%) |Training time=0.48s (20.72%) |Others=0.10 (4.21%)|CurSamplesPerSec=13.70 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2973|ppo_ep: 1|act_loss: -0.0010089874267578125|cri_loss: 0.0002651214599609375|unsuper_loss: 0.0 +average reward score: 5.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.25%) |Training time=0.48s (22.19%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2974|ppo_ep: 1|act_loss: 0.0003681182861328125|cri_loss: 0.00141143798828125|unsuper_loss: 0.0 +average reward score: 5.4453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.22%) |Training time=0.48s (22.23%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2975|ppo_ep: 1|act_loss: -0.0010356903076171875|cri_loss: -0.00015783309936523438|unsuper_loss: 0.0 +average reward score: 5.5859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.11%) |Training time=0.48s (22.34%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2976|ppo_ep: 1|act_loss: 0.00823211669921875|cri_loss: 0.004405975341796875|unsuper_loss: 0.0 +average reward score: 5.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.13%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2977|ppo_ep: 1|act_loss: -0.0009083747863769531|cri_loss: -0.00011587142944335938|unsuper_loss: 0.0 +average reward score: 5.25 +------------------------------------------------------------------------------------- +|E2E latency=2.51s |Gather latency=0.00s (0.00%) |Generate time=1.58s (63.02%) |Training time=0.48s (19.08%) |Others=0.45 (17.90%)|CurSamplesPerSec=12.75 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2978|ppo_ep: 1|act_loss: -0.031768798828125|cri_loss: -0.01482391357421875|unsuper_loss: 0.0 +average reward score: 5.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.06s |Gather latency=0.00s (0.00%) |Generate time=1.58s (76.46%) |Training time=0.39s (18.75%) |Others=0.10 (4.78%)|CurSamplesPerSec=15.52 |AvgSamplesPerSec=14.45 +[2023-04-14 10:37:26,167] [INFO] [logging.py:96:log_dist] [Rank 0] step=2980, skipped=40, lr=[7.041944868075353e-06, 7.041944868075353e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:37:26,185] [INFO] [timer.py:199:stop] epoch=0/micro_step=2980/global_step=2980, RunningAvgSamplesPerSec=106.19269914626885, CurrSamplesPerSec=101.53371919119633, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:37:26,278] [INFO] [logging.py:96:log_dist] [Rank 0] step=2980, skipped=46, lr=[3.6538023311166273e-06, 3.6538023311166273e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2979|ppo_ep: 1|act_loss: -0.02227783203125|cri_loss: -0.0102081298828125|unsuper_loss: 0.0 +average reward score: 5.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.30%) |Training time=0.48s (22.14%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2980|ppo_ep: 1|act_loss: -0.004032135009765625|cri_loss: -0.0018053054809570312|unsuper_loss: 0.0 +average reward score: 5.68359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.11%) |Training time=0.44s (20.34%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2981|ppo_ep: 1|act_loss: -0.040283203125|cri_loss: -0.018829345703125|unsuper_loss: 0.0 +average reward score: 5.66015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.08%) |Training time=0.46s (21.36%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2982|ppo_ep: 1|act_loss: -0.0193023681640625|cri_loss: -0.00920867919921875|unsuper_loss: 0.0 +average reward score: 5.58984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.84%) |Training time=0.47s (21.62%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2983|ppo_ep: 1|act_loss: -0.01230621337890625|cri_loss: -0.00579071044921875|unsuper_loss: 0.0 +average reward score: 5.50390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.05%) |Training time=0.44s (20.41%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2984|ppo_ep: 1|act_loss: 0.021392822265625|cri_loss: 0.01100921630859375|unsuper_loss: 0.0 +average reward score: 5.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.57s |Gather latency=0.00s (0.00%) |Generate time=1.62s (63.31%) |Training time=0.44s (17.34%) |Others=0.50 (19.34%)|CurSamplesPerSec=12.48 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2985|ppo_ep: 1|act_loss: 0.02447509765625|cri_loss: 0.01253509521484375|unsuper_loss: 0.0 +average reward score: 6.1328125 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.84%) |Training time=0.46s (20.90%) |Others=0.12 (5.27%)|CurSamplesPerSec=14.49 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2986|ppo_ep: 1|act_loss: 0.01194000244140625|cri_loss: 0.007659912109375|unsuper_loss: 0.0 +average reward score: 4.7265625 +------------------------------------------------------------------------------------- +|E2E latency=2.39s |Gather latency=0.00s (0.00%) |Generate time=1.84s (76.86%) |Training time=0.45s (19.00%) |Others=0.10 (4.14%)|CurSamplesPerSec=13.40 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2987|ppo_ep: 1|act_loss: 0.0084075927734375|cri_loss: 0.0046234130859375|unsuper_loss: 0.0 +average reward score: 5.3203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.16%) |Training time=0.46s (21.27%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2988|ppo_ep: 1|act_loss: 0.04998779296875|cri_loss: 0.026336669921875|unsuper_loss: 0.0 +average reward score: 5.23046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.96%) |Training time=0.46s (21.48%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45 +[2023-04-14 10:37:48,439] [INFO] [logging.py:96:log_dist] [Rank 0] step=2990, skipped=40, lr=[7.025429220810784e-06, 7.025429220810784e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:37:48,458] [INFO] [timer.py:199:stop] epoch=0/micro_step=2990/global_step=2990, RunningAvgSamplesPerSec=106.20281446283651, CurrSamplesPerSec=107.48080335693007, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:37:48,550] [INFO] [logging.py:96:log_dist] [Rank 0] step=2990, skipped=46, lr=[3.6452552014134834e-06, 3.6452552014134834e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2989|ppo_ep: 1|act_loss: 0.017669677734375|cri_loss: 0.00970458984375|unsuper_loss: 0.0 +average reward score: 6.1328125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.09%) |Training time=0.46s (21.36%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2990|ppo_ep: 1|act_loss: -0.0224609375|cri_loss: -0.01080322265625|unsuper_loss: 0.0 +average reward score: 6.0859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.96%) |Training time=0.46s (21.48%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2991|ppo_ep: 1|act_loss: -0.01204681396484375|cri_loss: -0.00508880615234375|unsuper_loss: 0.0 +average reward score: 5.8046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.02%) |Training time=0.46s (21.41%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2992|ppo_ep: 1|act_loss: 0.0279541015625|cri_loss: 0.01557159423828125|unsuper_loss: 0.0 +average reward score: 5.4921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.92%) |Training time=0.47s (21.52%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2993|ppo_ep: 1|act_loss: 0.0250701904296875|cri_loss: 0.015655517578125|unsuper_loss: 0.0 +average reward score: 5.125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.96%) |Training time=0.46s (21.49%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2994|ppo_ep: 1|act_loss: -0.00518035888671875|cri_loss: -0.00228118896484375|unsuper_loss: 0.0 +average reward score: 5.3046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.46s (21.44%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2995|ppo_ep: 1|act_loss: -0.0118560791015625|cri_loss: -0.00574493408203125|unsuper_loss: 0.0 +average reward score: 5.39453125 +------------------------------------------------------------------------------------- +|E2E latency=2.08s |Gather latency=0.00s (0.00%) |Generate time=1.60s (76.67%) |Training time=0.39s (18.57%) |Others=0.10 (4.76%)|CurSamplesPerSec=15.37 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2996|ppo_ep: 1|act_loss: 0.04974365234375|cri_loss: 0.03350830078125|unsuper_loss: 0.0 +average reward score: 5.8125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.43%) |Training time=0.45s (21.01%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2997|ppo_ep: 1|act_loss: 0.0386962890625|cri_loss: 0.020751953125|unsuper_loss: 0.0 +average reward score: 5.3125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.68%) |Training time=0.45s (20.76%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45 +epoch: 0|step: 2998|ppo_ep: 1|act_loss: -0.0017347335815429688|cri_loss: -0.000568389892578125|unsuper_loss: 0.0 +average reward score: 5.55078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.26%) |Training time=0.46s (21.17%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45 +[2023-04-14 10:38:10,330] [INFO] [logging.py:96:log_dist] [Rank 0] step=3000, skipped=40, lr=[7.008880957876916e-06, 7.008880957876916e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:38:10,349] [INFO] [timer.py:199:stop] epoch=0/micro_step=3000/global_step=3000, RunningAvgSamplesPerSec=106.215874634767, CurrSamplesPerSec=105.73411932827526, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:38:10,442] [INFO] [logging.py:96:log_dist] [Rank 0] step=3000, skipped=46, lr=[3.6366910962654785e-06, 3.6366910962654785e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 2999|ppo_ep: 1|act_loss: -0.0080108642578125|cri_loss: -0.0035762786865234375|unsuper_loss: 0.0 +average reward score: 5.44921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.05%) |Training time=0.47s (21.43%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3000|ppo_ep: 1|act_loss: 0.01529693603515625|cri_loss: 0.009674072265625|unsuper_loss: 0.0 +average reward score: 5.953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.07%) |Training time=0.46s (21.29%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3001|ppo_ep: 1|act_loss: 0.02740478515625|cri_loss: 0.01422882080078125|unsuper_loss: 0.0 +average reward score: 5.08203125 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.25%) |Training time=0.45s (19.50%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.82 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3002|ppo_ep: 1|act_loss: 0.023345947265625|cri_loss: 0.01261138916015625|unsuper_loss: 0.0 +average reward score: 5.57421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.16%) |Training time=0.46s (21.28%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3003|ppo_ep: 1|act_loss: 0.005279541015625|cri_loss: 0.00919342041015625|unsuper_loss: 0.0 +average reward score: 5.19921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.55%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3004|ppo_ep: 1|act_loss: -0.025146484375|cri_loss: -0.01141357421875|unsuper_loss: 0.0 +average reward score: 5.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.81s |Gather latency=0.00s (0.00%) |Generate time=1.58s (56.22%) |Training time=0.47s (16.82%) |Others=0.76 (26.95%)|CurSamplesPerSec=11.40 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3005|ppo_ep: 1|act_loss: 0.056640625|cri_loss: 0.031097412109375|unsuper_loss: 0.0 +average reward score: 5.23046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.57%) |Training time=0.47s (21.85%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3006|ppo_ep: 1|act_loss: -0.0225372314453125|cri_loss: -0.0108642578125|unsuper_loss: 0.0 +average reward score: 5.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (22.00%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45 +[2023-04-14 10:38:28,354] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384 +[2023-04-14 10:38:28,440] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384 +epoch: 0|step: 3007|ppo_ep: 1|act_loss: 0.048126220703125|cri_loss: 0.026763916015625|unsuper_loss: 0.0 +average reward score: 5.30859375 +------------------------------------------------------------------------------------- +|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.66%) |Training time=0.45s (21.13%) |Others=0.09 (4.22%)|CurSamplesPerSec=15.13 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3008|ppo_ep: 1|act_loss: -0.053314208984375|cri_loss: -0.0260467529296875|unsuper_loss: 0.0 +average reward score: 5.1328125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.76%) |Training time=0.46s (21.67%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45 +[2023-04-14 10:38:32,622] [INFO] [logging.py:96:log_dist] [Rank 0] step=3010, skipped=41, lr=[6.993959837557451e-06, 6.993959837557451e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:38:32,640] [INFO] [timer.py:199:stop] epoch=0/micro_step=3010/global_step=3010, RunningAvgSamplesPerSec=106.2159635052104, CurrSamplesPerSec=102.30953312005147, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:38:32,733] [INFO] [logging.py:96:log_dist] [Rank 0] step=3010, skipped=47, lr=[3.628968992538233e-06, 3.628968992538233e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3009|ppo_ep: 1|act_loss: 0.02825927734375|cri_loss: 0.01508331298828125|unsuper_loss: 0.0 +average reward score: 4.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.34%) |Training time=0.48s (22.10%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3010|ppo_ep: 1|act_loss: -0.006519317626953125|cri_loss: -0.001796722412109375|unsuper_loss: 0.0 +average reward score: 5.40234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.48s (22.07%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3011|ppo_ep: 1|act_loss: -0.007564544677734375|cri_loss: -0.0034637451171875|unsuper_loss: 0.0 +average reward score: 5.6328125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.39%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3012|ppo_ep: 1|act_loss: 0.0518798828125|cri_loss: 0.0274505615234375|unsuper_loss: 0.0 +average reward score: 4.03515625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.96%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3013|ppo_ep: 1|act_loss: -0.00540924072265625|cri_loss: 0.00273895263671875|unsuper_loss: 0.0 +average reward score: 4.515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.28%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3014|ppo_ep: 1|act_loss: -0.0091400146484375|cri_loss: -0.003551483154296875|unsuper_loss: 0.0 +average reward score: 5.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.69%) |Training time=0.46s (20.81%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3015|ppo_ep: 1|act_loss: 0.028167724609375|cri_loss: 0.015838623046875|unsuper_loss: 0.0 +average reward score: 4.796875 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.69s (75.25%) |Training time=0.46s (20.36%) |Others=0.10 (4.39%)|CurSamplesPerSec=14.28 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3016|ppo_ep: 1|act_loss: 0.06402587890625|cri_loss: 0.035064697265625|unsuper_loss: 0.0 +average reward score: 4.43359375 +------------------------------------------------------------------------------------- +|E2E latency=2.45s |Gather latency=0.00s (0.00%) |Generate time=1.75s (71.39%) |Training time=0.47s (19.05%) |Others=0.23 (9.56%)|CurSamplesPerSec=13.05 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3017|ppo_ep: 1|act_loss: -0.022430419921875|cri_loss: -0.00969696044921875|unsuper_loss: 0.0 +average reward score: 4.5703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.60%) |Training time=0.47s (21.83%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3018|ppo_ep: 1|act_loss: 0.001911163330078125|cri_loss: 0.0020999908447265625|unsuper_loss: 0.0 +average reward score: 5.29296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.73%) |Training time=0.47s (21.69%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45 +[2023-04-14 10:38:54,564] [INFO] [logging.py:96:log_dist] [Rank 0] step=3020, skipped=41, lr=[6.977350281009214e-06, 6.977350281009214e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:38:54,583] [INFO] [timer.py:199:stop] epoch=0/micro_step=3020/global_step=3020, RunningAvgSamplesPerSec=106.21456349820467, CurrSamplesPerSec=103.59215705645721, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:38:54,675] [INFO] [logging.py:96:log_dist] [Rank 0] step=3020, skipped=47, lr=[3.6203729839730567e-06, 3.6203729839730567e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3019|ppo_ep: 1|act_loss: 0.04150390625|cri_loss: 0.02349853515625|unsuper_loss: 0.0 +average reward score: 5.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.55%) |Training time=0.47s (21.90%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3020|ppo_ep: 1|act_loss: 0.0745849609375|cri_loss: 0.041107177734375|unsuper_loss: 0.0 +average reward score: 4.56640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.70%) |Training time=0.47s (21.73%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3021|ppo_ep: 1|act_loss: 0.0186767578125|cri_loss: 0.0098724365234375|unsuper_loss: 0.0 +average reward score: 5.328125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.64%) |Training time=0.47s (21.79%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3022|ppo_ep: 1|act_loss: -0.0171051025390625|cri_loss: -0.00785064697265625|unsuper_loss: 0.0 +average reward score: 5.6640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.97%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3023|ppo_ep: 1|act_loss: 0.01088714599609375|cri_loss: 0.006252288818359375|unsuper_loss: 0.0 +average reward score: 5.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.61%) |Training time=0.47s (21.82%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3024|ppo_ep: 1|act_loss: 0.0006403923034667969|cri_loss: 0.0006952285766601562|unsuper_loss: 0.0 +average reward score: 4.79296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.54%) |Training time=0.47s (21.89%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3025|ppo_ep: 1|act_loss: -0.03167724609375|cri_loss: -0.01506805419921875|unsuper_loss: 0.0 +average reward score: 5.296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.80%) |Training time=0.47s (21.64%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3026|ppo_ep: 1|act_loss: -0.0455322265625|cri_loss: -0.021087646484375|unsuper_loss: 0.0 +average reward score: 5.57421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.74%) |Training time=0.47s (21.68%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3027|ppo_ep: 1|act_loss: -0.01092529296875|cri_loss: -0.00153350830078125|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.34%) |Training time=0.48s (22.09%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3028|ppo_ep: 1|act_loss: -0.04278564453125|cri_loss: -0.0206298828125|unsuper_loss: 0.0 +average reward score: 5.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.58s (66.99%) |Training time=0.47s (19.91%) |Others=0.31 (13.10%)|CurSamplesPerSec=13.54 |AvgSamplesPerSec=14.45 +[2023-04-14 10:39:16,295] [INFO] [logging.py:96:log_dist] [Rank 0] step=3030, skipped=41, lr=[6.96070882143755e-06, 6.96070882143755e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:39:16,313] [INFO] [timer.py:199:stop] epoch=0/micro_step=3030/global_step=3030, RunningAvgSamplesPerSec=106.2086986497776, CurrSamplesPerSec=107.58660138577608, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:39:16,406] [INFO] [logging.py:96:log_dist] [Rank 0] step=3030, skipped=47, lr=[3.6117603687775048e-06, 3.6117603687775048e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3029|ppo_ep: 1|act_loss: 0.015106201171875|cri_loss: 0.007755279541015625|unsuper_loss: 0.0 +average reward score: 4.8203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.18%) |Training time=0.46s (21.27%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3030|ppo_ep: 1|act_loss: 0.0186767578125|cri_loss: 0.0110931396484375|unsuper_loss: 0.0 +average reward score: 4.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.56%) |Training time=0.47s (21.87%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3031|ppo_ep: 1|act_loss: 0.0296630859375|cri_loss: 0.016082763671875|unsuper_loss: 0.0 +average reward score: 4.80078125 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.74s (75.28%) |Training time=0.47s (20.47%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3032|ppo_ep: 1|act_loss: 0.04290771484375|cri_loss: 0.0226593017578125|unsuper_loss: 0.0 +average reward score: 5.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.62%) |Training time=0.47s (21.81%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3033|ppo_ep: 1|act_loss: 0.0767822265625|cri_loss: 0.041412353515625|unsuper_loss: 0.0 +average reward score: 5.0078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.86%) |Training time=0.46s (21.58%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3034|ppo_ep: 1|act_loss: 0.033294677734375|cri_loss: 0.0171356201171875|unsuper_loss: 0.0 +average reward score: 6.3203125 +------------------------------------------------------------------------------------- +|E2E latency=2.44s |Gather latency=0.00s (0.00%) |Generate time=1.58s (64.85%) |Training time=0.47s (19.34%) |Others=0.39 (15.81%)|CurSamplesPerSec=13.13 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3035|ppo_ep: 1|act_loss: 0.04876708984375|cri_loss: 0.027008056640625|unsuper_loss: 0.0 +average reward score: 5.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.50%) |Training time=0.47s (21.93%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3036|ppo_ep: 1|act_loss: -0.03179931640625|cri_loss: -0.015411376953125|unsuper_loss: 0.0 +average reward score: 5.8828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.41%) |Training time=0.47s (21.87%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3037|ppo_ep: 1|act_loss: -0.0127716064453125|cri_loss: -0.00620269775390625|unsuper_loss: 0.0 +average reward score: 5.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.47s (22.07%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3038|ppo_ep: 1|act_loss: 0.031005859375|cri_loss: 0.0171051025390625|unsuper_loss: 0.0 +average reward score: 4.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.24%) |Training time=0.46s (21.21%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.45 +[2023-04-14 10:39:38,258] [INFO] [logging.py:96:log_dist] [Rank 0] step=3040, skipped=41, lr=[6.944035705509035e-06, 6.944035705509035e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:39:38,277] [INFO] [timer.py:199:stop] epoch=0/micro_step=3040/global_step=3040, RunningAvgSamplesPerSec=106.20304657307675, CurrSamplesPerSec=106.6089857439244, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:39:38,455] [INFO] [logging.py:96:log_dist] [Rank 0] step=3040, skipped=47, lr=[3.603131274611312e-06, 3.603131274611312e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3039|ppo_ep: 1|act_loss: -0.053680419921875|cri_loss: -0.0252685546875|unsuper_loss: 0.0 +average reward score: 5.46484375 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.13%) |Training time=0.55s (24.52%) |Others=0.10 (4.35%)|CurSamplesPerSec=14.29 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3040|ppo_ep: 1|act_loss: -0.044677734375|cri_loss: -0.021453857421875|unsuper_loss: 0.0 +average reward score: 5.109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.84%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3041|ppo_ep: 1|act_loss: 0.014801025390625|cri_loss: 0.0083770751953125|unsuper_loss: 0.0 +average reward score: 4.95703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.75%) |Training time=0.47s (21.67%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3042|ppo_ep: 1|act_loss: -0.0051422119140625|cri_loss: -0.001735687255859375|unsuper_loss: 0.0 +average reward score: 4.796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.45%) |Training time=0.47s (22.00%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3043|ppo_ep: 1|act_loss: 0.008453369140625|cri_loss: 0.00479888916015625|unsuper_loss: 0.0 +average reward score: 5.5546875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.49%) |Training time=0.48s (22.00%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3044|ppo_ep: 1|act_loss: -0.00157928466796875|cri_loss: 4.9591064453125e-05|unsuper_loss: 0.0 +average reward score: 5.6328125 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.42%) |Training time=0.46s (20.21%) |Others=0.10 (4.37%)|CurSamplesPerSec=14.10 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3045|ppo_ep: 1|act_loss: 0.012237548828125|cri_loss: 0.00685882568359375|unsuper_loss: 0.0 +average reward score: 4.84765625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.93%) |Training time=0.52s (23.55%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3046|ppo_ep: 1|act_loss: 0.008270263671875|cri_loss: 0.004291534423828125|unsuper_loss: 0.0 +average reward score: 5.58984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.56%) |Training time=0.47s (21.86%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3047|ppo_ep: 1|act_loss: -3.62396240234375e-05|cri_loss: 0.0002925395965576172|unsuper_loss: 0.0 +average reward score: 4.546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.50%) |Training time=0.47s (21.94%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3048|ppo_ep: 1|act_loss: 0.0095367431640625|cri_loss: 0.005397796630859375|unsuper_loss: 0.0 +average reward score: 5.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.68s |Gather latency=0.00s (0.00%) |Generate time=1.58s (58.98%) |Training time=0.47s (17.46%) |Others=0.63 (23.56%)|CurSamplesPerSec=11.93 |AvgSamplesPerSec=14.45 +[2023-04-14 10:40:00,590] [INFO] [logging.py:96:log_dist] [Rank 0] step=3050, skipped=41, lr=[6.92733118035947e-06, 6.92733118035947e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:40:00,608] [INFO] [timer.py:199:stop] epoch=0/micro_step=3050/global_step=3050, RunningAvgSamplesPerSec=106.19061216110067, CurrSamplesPerSec=103.65696085806124, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:40:00,701] [INFO] [logging.py:96:log_dist] [Rank 0] step=3050, skipped=47, lr=[3.59448582937847e-06, 3.59448582937847e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3049|ppo_ep: 1|act_loss: 0.01837158203125|cri_loss: 0.0163421630859375|unsuper_loss: 0.0 +average reward score: 4.5546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.53%) |Training time=0.47s (21.92%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3050|ppo_ep: 1|act_loss: 0.0167694091796875|cri_loss: 0.00876617431640625|unsuper_loss: 0.0 +average reward score: 5.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.44%) |Training time=0.47s (22.00%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3051|ppo_ep: 1|act_loss: 0.0504150390625|cri_loss: 0.0277099609375|unsuper_loss: 0.0 +average reward score: 4.2109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.44%) |Training time=0.47s (21.99%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3052|ppo_ep: 1|act_loss: 0.044677734375|cri_loss: 0.0234375|unsuper_loss: 0.0 +average reward score: 5.44921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.37%) |Training time=0.47s (22.07%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3053|ppo_ep: 1|act_loss: 0.0081939697265625|cri_loss: 0.0041656494140625|unsuper_loss: 0.0 +average reward score: 5.09765625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.99%) |Training time=0.46s (21.44%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3054|ppo_ep: 1|act_loss: 0.019744873046875|cri_loss: 0.01055145263671875|unsuper_loss: 0.0 +average reward score: 5.13671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.52%) |Training time=0.47s (21.90%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3055|ppo_ep: 1|act_loss: 0.00218963623046875|cri_loss: 0.001293182373046875|unsuper_loss: 0.0 +average reward score: 5.0 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.11%) |Training time=0.48s (22.35%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3056|ppo_ep: 1|act_loss: -0.0211181640625|cri_loss: -0.01013946533203125|unsuper_loss: 0.0 +average reward score: 5.37109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.47s (22.05%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3057|ppo_ep: 1|act_loss: 0.020233154296875|cri_loss: 0.0114288330078125|unsuper_loss: 0.0 +average reward score: 4.92578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.48s (22.09%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3058|ppo_ep: 1|act_loss: 0.026153564453125|cri_loss: 0.01445770263671875|unsuper_loss: 0.0 +average reward score: 4.8203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.14%) |Training time=0.46s (21.32%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.45 +[2023-04-14 10:40:22,136] [INFO] [logging.py:96:log_dist] [Rank 0] step=3060, skipped=41, lr=[6.910595493590213e-06, 6.910595493590213e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:40:22,155] [INFO] [timer.py:199:stop] epoch=0/micro_step=3060/global_step=3060, RunningAvgSamplesPerSec=106.18132775281478, CurrSamplesPerSec=103.46374930236718, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:40:22,248] [INFO] [logging.py:96:log_dist] [Rank 0] step=3060, skipped=47, lr=[3.585824161225334e-06, 3.585824161225334e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3059|ppo_ep: 1|act_loss: -0.00453948974609375|cri_loss: -0.0020771026611328125|unsuper_loss: 0.0 +average reward score: 5.078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.72%) |Training time=0.47s (21.74%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3060|ppo_ep: 1|act_loss: -0.017425537109375|cri_loss: -0.00827789306640625|unsuper_loss: 0.0 +average reward score: 5.203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.45%) |Training time=0.47s (21.98%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3061|ppo_ep: 1|act_loss: 0.00922393798828125|cri_loss: 0.004856109619140625|unsuper_loss: 0.0 +average reward score: 5.1015625 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.33%) |Training time=0.47s (20.44%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.80 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3062|ppo_ep: 1|act_loss: -0.030548095703125|cri_loss: -0.0146484375|unsuper_loss: 0.0 +average reward score: 4.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.42%) |Training time=0.47s (22.02%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3063|ppo_ep: 1|act_loss: 0.0113067626953125|cri_loss: 0.00652313232421875|unsuper_loss: 0.0 +average reward score: 4.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.57%) |Training time=0.47s (21.87%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3064|ppo_ep: 1|act_loss: 0.04510498046875|cri_loss: 0.024749755859375|unsuper_loss: 0.0 +average reward score: 4.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.58s (69.11%) |Training time=0.48s (20.89%) |Others=0.23 (10.00%)|CurSamplesPerSec=14.03 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3065|ppo_ep: 1|act_loss: -0.027740478515625|cri_loss: -0.0132293701171875|unsuper_loss: 0.0 +average reward score: 4.984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.48s (21.99%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3066|ppo_ep: 1|act_loss: 0.017669677734375|cri_loss: 0.0101318359375|unsuper_loss: 0.0 +average reward score: 4.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.86%) |Training time=0.46s (21.56%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3067|ppo_ep: 1|act_loss: -0.018463134765625|cri_loss: -0.009002685546875|unsuper_loss: 0.0 +average reward score: 5.54296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.47s (21.92%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3068|ppo_ep: 1|act_loss: -0.0201416015625|cri_loss: -0.009796142578125|unsuper_loss: 0.0 +average reward score: 5.29296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.68%) |Training time=0.47s (21.75%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45 +[2023-04-14 10:40:43,959] [INFO] [logging.py:96:log_dist] [Rank 0] step=3070, skipped=41, lr=[6.893828893264516e-06, 6.893828893264516e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:40:43,978] [INFO] [timer.py:199:stop] epoch=0/micro_step=3070/global_step=3070, RunningAvgSamplesPerSec=106.17196197617312, CurrSamplesPerSec=103.21756958575386, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:40:44,070] [INFO] [logging.py:96:log_dist] [Rank 0] step=3070, skipped=47, lr=[3.57714639853872e-06, 3.57714639853872e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3069|ppo_ep: 1|act_loss: 0.0130615234375|cri_loss: 0.00708770751953125|unsuper_loss: 0.0 +average reward score: 5.18359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.90%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3070|ppo_ep: 1|act_loss: -0.003082275390625|cri_loss: -0.0013217926025390625|unsuper_loss: 0.0 +average reward score: 4.98046875 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.58s (69.34%) |Training time=0.47s (20.42%) |Others=0.23 (10.24%)|CurSamplesPerSec=14.02 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3071|ppo_ep: 1|act_loss: 0.0140533447265625|cri_loss: 0.007343292236328125|unsuper_loss: 0.0 +average reward score: 4.70703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.78%) |Training time=0.46s (21.64%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3072|ppo_ep: 1|act_loss: 0.0193023681640625|cri_loss: 0.01043701171875|unsuper_loss: 0.0 +average reward score: 4.890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.92%) |Training time=0.46s (21.54%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3073|ppo_ep: 1|act_loss: 0.00827789306640625|cri_loss: 0.00452423095703125|unsuper_loss: 0.0 +average reward score: 5.23828125 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.62s (72.16%) |Training time=0.47s (20.93%) |Others=0.15 (6.90%)|CurSamplesPerSec=14.25 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3074|ppo_ep: 1|act_loss: -0.0176544189453125|cri_loss: -0.00861358642578125|unsuper_loss: 0.0 +average reward score: 4.89453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.30%) |Training time=0.46s (21.10%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3075|ppo_ep: 1|act_loss: -0.0271453857421875|cri_loss: -0.013336181640625|unsuper_loss: 0.0 +average reward score: 5.03125 +------------------------------------------------------------------------------------- +|E2E latency=2.86s |Gather latency=0.00s (0.00%) |Generate time=1.76s (61.70%) |Training time=0.47s (16.32%) |Others=0.63 (21.97%)|CurSamplesPerSec=11.20 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3076|ppo_ep: 1|act_loss: 0.005237579345703125|cri_loss: 0.003314971923828125|unsuper_loss: 0.0 +average reward score: 5.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.45%) |Training time=0.47s (21.97%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3077|ppo_ep: 1|act_loss: -0.03656005859375|cri_loss: -0.0175323486328125|unsuper_loss: 0.0 +average reward score: 4.71875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.13%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3078|ppo_ep: 1|act_loss: 0.0016117095947265625|cri_loss: 0.00209808349609375|unsuper_loss: 0.0 +average reward score: 4.59375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.75%) |Training time=0.47s (21.68%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45 +[2023-04-14 10:41:06,415] [INFO] [logging.py:96:log_dist] [Rank 0] step=3080, skipped=41, lr=[6.8770316279038435e-06, 6.8770316279038435e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:41:06,433] [INFO] [timer.py:199:stop] epoch=0/micro_step=3080/global_step=3080, RunningAvgSamplesPerSec=106.16864078143239, CurrSamplesPerSec=106.1309223235332, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:41:06,525] [INFO] [logging.py:96:log_dist] [Rank 0] step=3080, skipped=47, lr=[3.568452669944006e-06, 3.568452669944006e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3079|ppo_ep: 1|act_loss: 0.00951385498046875|cri_loss: 0.01016998291015625|unsuper_loss: 0.0 +average reward score: 4.57421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.68%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3080|ppo_ep: 1|act_loss: 0.019134521484375|cri_loss: 0.00991058349609375|unsuper_loss: 0.0 +average reward score: 4.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.53%) |Training time=0.47s (21.90%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3081|ppo_ep: 1|act_loss: 0.0150146484375|cri_loss: 0.0078277587890625|unsuper_loss: 0.0 +average reward score: 4.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.53s |Gather latency=0.00s (0.00%) |Generate time=1.58s (62.59%) |Training time=0.46s (18.38%) |Others=0.48 (19.03%)|CurSamplesPerSec=12.66 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3082|ppo_ep: 1|act_loss: 0.027618408203125|cri_loss: 0.01419830322265625|unsuper_loss: 0.0 +average reward score: 5.3828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.97%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3083|ppo_ep: 1|act_loss: -0.0113525390625|cri_loss: -0.00547027587890625|unsuper_loss: 0.0 +average reward score: 5.3828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.98%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3084|ppo_ep: 1|act_loss: -0.04022216796875|cri_loss: -0.019439697265625|unsuper_loss: 0.0 +average reward score: 4.796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.42%) |Training time=0.47s (22.03%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3085|ppo_ep: 1|act_loss: 0.004192352294921875|cri_loss: 0.0023860931396484375|unsuper_loss: 0.0 +average reward score: 4.58984375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.70%) |Training time=0.47s (21.74%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3086|ppo_ep: 1|act_loss: -0.01071929931640625|cri_loss: -0.00405120849609375|unsuper_loss: 0.0 +average reward score: 5.36328125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.34%) |Training time=0.47s (22.09%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3087|ppo_ep: 1|act_loss: -0.0206756591796875|cri_loss: -0.010009765625|unsuper_loss: 0.0 +average reward score: 4.9453125 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.58s (70.70%) |Training time=0.48s (21.37%) |Others=0.18 (7.92%)|CurSamplesPerSec=14.35 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3088|ppo_ep: 1|act_loss: -0.0251617431640625|cri_loss: -0.01202392578125|unsuper_loss: 0.0 +average reward score: 4.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.10%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45 +[2023-04-14 10:41:28,381] [INFO] [logging.py:96:log_dist] [Rank 0] step=3090, skipped=41, lr=[6.860203946484189e-06, 6.860203946484189e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:41:28,399] [INFO] [timer.py:199:stop] epoch=0/micro_step=3090/global_step=3090, RunningAvgSamplesPerSec=106.15902057491887, CurrSamplesPerSec=102.21190862761702, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:41:28,492] [INFO] [logging.py:96:log_dist] [Rank 0] step=3090, skipped=47, lr=[3.5597431043032205e-06, 3.5597431043032205e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3089|ppo_ep: 1|act_loss: -0.025787353515625|cri_loss: -0.01262664794921875|unsuper_loss: 0.0 +average reward score: 5.16796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.48s (21.97%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3090|ppo_ep: 1|act_loss: -0.0203094482421875|cri_loss: -0.009735107421875|unsuper_loss: 0.0 +average reward score: 4.5546875 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.37%) |Training time=0.47s (20.41%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.77 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3091|ppo_ep: 1|act_loss: 0.02978515625|cri_loss: 0.0167083740234375|unsuper_loss: 0.0 +average reward score: 4.8125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.56%) |Training time=0.47s (21.88%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3092|ppo_ep: 1|act_loss: -0.0156097412109375|cri_loss: -0.007183074951171875|unsuper_loss: 0.0 +average reward score: 4.328125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.52%) |Training time=0.47s (21.90%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3093|ppo_ep: 1|act_loss: -0.031402587890625|cri_loss: -0.01488494873046875|unsuper_loss: 0.0 +average reward score: 4.7109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.29%) |Training time=0.46s (21.17%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3094|ppo_ep: 1|act_loss: -0.032623291015625|cri_loss: -0.0154571533203125|unsuper_loss: 0.0 +average reward score: 5.45703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.54%) |Training time=0.47s (21.90%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3095|ppo_ep: 1|act_loss: -0.0212554931640625|cri_loss: -0.01013946533203125|unsuper_loss: 0.0 +average reward score: 4.9296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.73%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3096|ppo_ep: 1|act_loss: -0.03173828125|cri_loss: -0.0146636962890625|unsuper_loss: 0.0 +average reward score: 4.67578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.74%) |Training time=0.47s (21.70%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3097|ppo_ep: 1|act_loss: 0.00027441978454589844|cri_loss: 0.0005178451538085938|unsuper_loss: 0.0 +average reward score: 5.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.63%) |Training time=0.47s (21.81%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3098|ppo_ep: 1|act_loss: 0.000667572021484375|cri_loss: 0.0013866424560546875|unsuper_loss: 0.0 +average reward score: 4.4296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.72%) |Training time=0.47s (21.71%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.45 +[2023-04-14 10:41:50,068] [INFO] [logging.py:96:log_dist] [Rank 0] step=3100, skipped=41, lr=[6.843346098432385e-06, 6.843346098432385e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:41:50,371] [INFO] [timer.py:199:stop] epoch=0/micro_step=3100/global_step=3100, RunningAvgSamplesPerSec=106.1222394732359, CurrSamplesPerSec=54.794599648087136, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:41:50,464] [INFO] [logging.py:96:log_dist] [Rank 0] step=3100, skipped=47, lr=[3.551017830713139e-06, 3.551017830713139e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3099|ppo_ep: 1|act_loss: 0.014007568359375|cri_loss: 0.007511138916015625|unsuper_loss: 0.0 +average reward score: 4.71875 +------------------------------------------------------------------------------------- +|E2E latency=2.44s |Gather latency=0.00s (0.00%) |Generate time=1.59s (65.33%) |Training time=0.75s (30.65%) |Others=0.10 (4.02%)|CurSamplesPerSec=13.13 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3100|ppo_ep: 1|act_loss: 0.05084228515625|cri_loss: 0.0269622802734375|unsuper_loss: 0.0 +average reward score: 4.69140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.57%) |Training time=0.47s (21.86%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3101|ppo_ep: 1|act_loss: 0.000579833984375|cri_loss: 0.0024852752685546875|unsuper_loss: 0.0 +average reward score: 4.7578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.62%) |Training time=0.47s (21.85%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3102|ppo_ep: 1|act_loss: 0.0220184326171875|cri_loss: 0.0114593505859375|unsuper_loss: 0.0 +average reward score: 4.90625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.90%) |Training time=0.47s (21.60%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3103|ppo_ep: 1|act_loss: 0.01221466064453125|cri_loss: 0.006389617919921875|unsuper_loss: 0.0 +average reward score: 4.40625 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (75.06%) |Training time=0.46s (20.58%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.19 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3104|ppo_ep: 1|act_loss: -0.019287109375|cri_loss: -0.00951385498046875|unsuper_loss: 0.0 +average reward score: 5.40625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.33%) |Training time=0.46s (21.12%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3105|ppo_ep: 1|act_loss: -0.0160675048828125|cri_loss: -0.007110595703125|unsuper_loss: 0.0 +average reward score: 5.3671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.57%) |Training time=0.47s (21.87%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3106|ppo_ep: 1|act_loss: -0.027008056640625|cri_loss: -0.01226806640625|unsuper_loss: 0.0 +average reward score: 4.2734375 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.41%) |Training time=0.47s (20.34%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.77 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3107|ppo_ep: 1|act_loss: -0.032196044921875|cri_loss: -0.0145111083984375|unsuper_loss: 0.0 +average reward score: 4.69921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.48%) |Training time=0.47s (21.97%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3108|ppo_ep: 1|act_loss: -0.005462646484375|cri_loss: -0.002117156982421875|unsuper_loss: 0.0 +average reward score: 4.90234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.17%) |Training time=0.48s (22.26%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.45 +[2023-04-14 10:42:12,205] [INFO] [logging.py:96:log_dist] [Rank 0] step=3110, skipped=41, lr=[6.826458333622407e-06, 6.826458333622407e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:42:12,224] [INFO] [timer.py:199:stop] epoch=0/micro_step=3110/global_step=3110, RunningAvgSamplesPerSec=106.113810899425, CurrSamplesPerSec=99.49490248637686, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:42:12,316] [INFO] [logging.py:96:log_dist] [Rank 0] step=3110, skipped=47, lr=[3.542276978503364e-06, 3.542276978503364e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3109|ppo_ep: 1|act_loss: 0.0574951171875|cri_loss: 0.030975341796875|unsuper_loss: 0.0 +average reward score: 4.80078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.10%) |Training time=0.48s (22.38%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3110|ppo_ep: 1|act_loss: 0.02020263671875|cri_loss: 0.01190948486328125|unsuper_loss: 0.0 +average reward score: 4.9296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.20%) |Training time=0.48s (22.23%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3111|ppo_ep: 1|act_loss: 0.019256591796875|cri_loss: 0.0103302001953125|unsuper_loss: 0.0 +average reward score: 4.6953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.07%) |Training time=0.46s (21.39%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3112|ppo_ep: 1|act_loss: -0.05108642578125|cri_loss: -0.0231170654296875|unsuper_loss: 0.0 +average reward score: 4.80078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.22%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3113|ppo_ep: 1|act_loss: -0.0019350051879882812|cri_loss: -0.0007886886596679688|unsuper_loss: 0.0 +average reward score: 5.60546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.28%) |Training time=0.48s (22.17%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3114|ppo_ep: 1|act_loss: 0.04736328125|cri_loss: 0.024871826171875|unsuper_loss: 0.0 +average reward score: 4.69140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.19%) |Training time=0.48s (22.26%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3115|ppo_ep: 1|act_loss: 0.0191650390625|cri_loss: 0.01056671142578125|unsuper_loss: 0.0 +average reward score: 5.125 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.58s (70.34%) |Training time=0.47s (21.09%) |Others=0.19 (8.57%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3116|ppo_ep: 1|act_loss: 0.0114898681640625|cri_loss: 0.006500244140625|unsuper_loss: 0.0 +average reward score: 5.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.24%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3117|ppo_ep: 1|act_loss: -0.003936767578125|cri_loss: -0.001834869384765625|unsuper_loss: 0.0 +average reward score: 5.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.30%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3118|ppo_ep: 1|act_loss: -0.01453399658203125|cri_loss: -0.0068359375|unsuper_loss: 0.0 +average reward score: 4.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.48%) |Training time=0.47s (21.96%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46 +[2023-04-14 10:42:33,852] [INFO] [logging.py:96:log_dist] [Rank 0] step=3120, skipped=41, lr=[6.80954090237167e-06, 6.80954090237167e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:42:33,870] [INFO] [timer.py:199:stop] epoch=0/micro_step=3120/global_step=3120, RunningAvgSamplesPerSec=106.10118858785661, CurrSamplesPerSec=105.24239249756924, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:42:33,963] [INFO] [logging.py:96:log_dist] [Rank 0] step=3120, skipped=47, lr=[3.533520677234411e-06, 3.533520677234411e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3119|ppo_ep: 1|act_loss: 0.0082855224609375|cri_loss: 0.004329681396484375|unsuper_loss: 0.0 +average reward score: 6.32421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.47s (21.63%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3120|ppo_ep: 1|act_loss: 0.00994873046875|cri_loss: 0.005306243896484375|unsuper_loss: 0.0 +average reward score: 5.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.72%) |Training time=0.47s (21.73%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3121|ppo_ep: 1|act_loss: 0.0122833251953125|cri_loss: 0.00635528564453125|unsuper_loss: 0.0 +average reward score: 4.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.44%) |Training time=0.47s (20.31%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3122|ppo_ep: 1|act_loss: 0.018890380859375|cri_loss: 0.00965118408203125|unsuper_loss: 0.0 +average reward score: 5.16015625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.70%) |Training time=0.47s (21.72%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3123|ppo_ep: 1|act_loss: 0.012359619140625|cri_loss: 0.00652313232421875|unsuper_loss: 0.0 +average reward score: 4.65234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.64%) |Training time=0.47s (21.79%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3124|ppo_ep: 1|act_loss: 0.0006108283996582031|cri_loss: 0.0007014274597167969|unsuper_loss: 0.0 +average reward score: 4.7265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.50%) |Training time=0.47s (21.95%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3125|ppo_ep: 1|act_loss: -0.0137481689453125|cri_loss: -0.006610870361328125|unsuper_loss: 0.0 +average reward score: 5.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.45%) |Training time=0.47s (22.00%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3126|ppo_ep: 1|act_loss: 0.0255279541015625|cri_loss: 0.015472412109375|unsuper_loss: 0.0 +average reward score: 5.25390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.55%) |Training time=0.47s (21.89%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3127|ppo_ep: 1|act_loss: 0.00579833984375|cri_loss: 0.0034637451171875|unsuper_loss: 0.0 +average reward score: 5.83203125 +------------------------------------------------------------------------------------- +|E2E latency=2.69s |Gather latency=0.00s (0.00%) |Generate time=1.58s (58.80%) |Training time=0.47s (17.47%) |Others=0.64 (23.74%)|CurSamplesPerSec=11.88 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3128|ppo_ep: 1|act_loss: -0.0211181640625|cri_loss: -0.0097198486328125|unsuper_loss: 0.0 +average reward score: 5.33984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.47s (22.04%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46 +[2023-04-14 10:42:56,080] [INFO] [logging.py:96:log_dist] [Rank 0] step=3130, skipped=41, lr=[6.792594055437316e-06, 6.792594055437316e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:42:56,098] [INFO] [timer.py:199:stop] epoch=0/micro_step=3130/global_step=3130, RunningAvgSamplesPerSec=106.09292350573443, CurrSamplesPerSec=101.92254909405708, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:42:56,191] [INFO] [logging.py:96:log_dist] [Rank 0] step=3130, skipped=47, lr=[3.524749056695789e-06, 3.524749056695789e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3129|ppo_ep: 1|act_loss: -0.0255126953125|cri_loss: -0.01183319091796875|unsuper_loss: 0.0 +average reward score: 4.42578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.48s (22.08%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3130|ppo_ep: 1|act_loss: -0.01776123046875|cri_loss: -0.00868988037109375|unsuper_loss: 0.0 +average reward score: 5.953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.42%) |Training time=0.48s (22.03%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3131|ppo_ep: 1|act_loss: -0.011505126953125|cri_loss: -0.0054473876953125|unsuper_loss: 0.0 +average reward score: 5.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.16%) |Training time=0.49s (22.35%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3132|ppo_ep: 1|act_loss: 0.0022106170654296875|cri_loss: 0.001453399658203125|unsuper_loss: 0.0 +average reward score: 4.6875 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.68s (74.39%) |Training time=0.48s (21.27%) |Others=0.10 (4.34%)|CurSamplesPerSec=14.16 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3133|ppo_ep: 1|act_loss: 0.0005121231079101562|cri_loss: 0.0004792213439941406|unsuper_loss: 0.0 +average reward score: 4.72265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.89%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3134|ppo_ep: 1|act_loss: 0.01171875|cri_loss: 0.006145477294921875|unsuper_loss: 0.0 +average reward score: 4.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (22.04%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3135|ppo_ep: 1|act_loss: -0.0181884765625|cri_loss: -0.0087127685546875|unsuper_loss: 0.0 +average reward score: 4.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.68%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3136|ppo_ep: 1|act_loss: 0.018280029296875|cri_loss: 0.0098419189453125|unsuper_loss: 0.0 +average reward score: 4.9609375 +------------------------------------------------------------------------------------- +|E2E latency=2.59s |Gather latency=0.00s (0.00%) |Generate time=1.75s (67.58%) |Training time=0.47s (18.03%) |Others=0.37 (14.39%)|CurSamplesPerSec=12.34 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3137|ppo_ep: 1|act_loss: 0.006168365478515625|cri_loss: 0.0038909912109375|unsuper_loss: 0.0 +average reward score: 5.43359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.86%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3138|ppo_ep: 1|act_loss: -0.0243682861328125|cri_loss: -0.01153564453125|unsuper_loss: 0.0 +average reward score: 4.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.83%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +[2023-04-14 10:43:18,216] [INFO] [logging.py:96:log_dist] [Rank 0] step=3140, skipped=41, lr=[6.775618044012496e-06, 6.775618044012496e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:43:18,234] [INFO] [timer.py:199:stop] epoch=0/micro_step=3140/global_step=3140, RunningAvgSamplesPerSec=106.08196738154236, CurrSamplesPerSec=103.5805649257089, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:43:18,327] [INFO] [logging.py:96:log_dist] [Rank 0] step=3140, skipped=47, lr=[3.5159622469040734e-06, 3.5159622469040734e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3139|ppo_ep: 1|act_loss: -0.0006122589111328125|cri_loss: 0.000782012939453125|unsuper_loss: 0.0 +average reward score: 4.59765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.86%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3140|ppo_ep: 1|act_loss: -0.0027523040771484375|cri_loss: -0.000537872314453125|unsuper_loss: 0.0 +average reward score: 4.63671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.47s (21.63%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3141|ppo_ep: 1|act_loss: 0.009124755859375|cri_loss: 0.0054473876953125|unsuper_loss: 0.0 +average reward score: 4.98046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.74%) |Training time=0.47s (21.65%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3142|ppo_ep: 1|act_loss: 0.027740478515625|cri_loss: 0.01450347900390625|unsuper_loss: 0.0 +average reward score: 4.7109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.75%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3143|ppo_ep: 1|act_loss: 0.0130767822265625|cri_loss: 0.00711822509765625|unsuper_loss: 0.0 +average reward score: 4.8203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.56%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3144|ppo_ep: 1|act_loss: 0.0394287109375|cri_loss: 0.0209197998046875|unsuper_loss: 0.0 +average reward score: 5.16796875 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.07%) |Training time=0.53s (23.64%) |Others=0.12 (5.28%)|CurSamplesPerSec=14.40 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3145|ppo_ep: 1|act_loss: -0.09375|cri_loss: -0.044036865234375|unsuper_loss: 0.0 +average reward score: 4.8515625 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.58s (70.92%) |Training time=0.55s (24.58%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.33 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3146|ppo_ep: 1|act_loss: -0.1318359375|cri_loss: -0.0621337890625|unsuper_loss: 0.0 +average reward score: 4.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.49%) |Training time=0.46s (20.99%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3147|ppo_ep: 1|act_loss: 0.0472412109375|cri_loss: 0.02520751953125|unsuper_loss: 0.0 +average reward score: 5.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.48s (22.15%) |Others=0.10 (4.84%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3148|ppo_ep: 1|act_loss: -0.037689208984375|cri_loss: -0.017608642578125|unsuper_loss: 0.0 +average reward score: 5.6015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.90%) |Training time=0.46s (21.37%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46 +[2023-04-14 10:43:39,941] [INFO] [logging.py:96:log_dist] [Rank 0] step=3150, skipped=41, lr=[6.758613119722655e-06, 6.758613119722655e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:43:39,958] [INFO] [timer.py:199:stop] epoch=0/micro_step=3150/global_step=3150, RunningAvgSamplesPerSec=106.06281589531704, CurrSamplesPerSec=101.26017409621011, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:43:40,051] [INFO] [logging.py:96:log_dist] [Rank 0] step=3150, skipped=47, lr=[3.5071603781009834e-06, 3.5071603781009834e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3149|ppo_ep: 1|act_loss: -0.029327392578125|cri_loss: -0.01125335693359375|unsuper_loss: 0.0 +average reward score: 4.6484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (22.04%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3150|ppo_ep: 1|act_loss: 0.031280517578125|cri_loss: 0.017364501953125|unsuper_loss: 0.0 +average reward score: 4.5 +------------------------------------------------------------------------------------- +|E2E latency=2.42s |Gather latency=0.00s (0.00%) |Generate time=1.60s (66.03%) |Training time=0.49s (20.17%) |Others=0.33 (13.79%)|CurSamplesPerSec=13.24 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3151|ppo_ep: 1|act_loss: 0.063720703125|cri_loss: 0.034332275390625|unsuper_loss: 0.0 +average reward score: 5.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.53%) |Training time=0.43s (19.75%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3152|ppo_ep: 1|act_loss: 0.0151519775390625|cri_loss: 0.00836944580078125|unsuper_loss: 0.0 +average reward score: 4.9765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.40%) |Training time=0.44s (20.03%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3153|ppo_ep: 1|act_loss: 0.010284423828125|cri_loss: 0.0057373046875|unsuper_loss: 0.0 +average reward score: 6.6640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.87%) |Training time=0.45s (20.58%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3154|ppo_ep: 1|act_loss: 0.01165771484375|cri_loss: 0.006153106689453125|unsuper_loss: 0.0 +average reward score: 5.12109375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.37%) |Training time=0.46s (21.10%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3155|ppo_ep: 1|act_loss: -0.015777587890625|cri_loss: -0.00711822509765625|unsuper_loss: 0.0 +average reward score: 4.53515625 +------------------------------------------------------------------------------------- +|E2E latency=2.52s |Gather latency=0.00s (0.00%) |Generate time=1.61s (63.95%) |Training time=0.45s (18.02%) |Others=0.45 (18.03%)|CurSamplesPerSec=12.71 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3156|ppo_ep: 1|act_loss: -0.019256591796875|cri_loss: -0.00878143310546875|unsuper_loss: 0.0 +average reward score: 4.68359375 +------------------------------------------------------------------------------------- +|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.63s (77.07%) |Training time=0.39s (18.27%) |Others=0.10 (4.66%)|CurSamplesPerSec=15.10 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3157|ppo_ep: 1|act_loss: -0.0109405517578125|cri_loss: -0.004871368408203125|unsuper_loss: 0.0 +average reward score: 4.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.08%) |Training time=0.44s (20.40%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3158|ppo_ep: 1|act_loss: -0.0211181640625|cri_loss: -0.01001739501953125|unsuper_loss: 0.0 +average reward score: 5.1953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.34%) |Training time=0.46s (21.08%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +[2023-04-14 10:44:02,182] [INFO] [logging.py:96:log_dist] [Rank 0] step=3160, skipped=41, lr=[6.741579534621794e-06, 6.741579534621794e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:44:02,201] [INFO] [timer.py:199:stop] epoch=0/micro_step=3160/global_step=3160, RunningAvgSamplesPerSec=106.08499964575394, CurrSamplesPerSec=108.97365410549887, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:44:02,293] [INFO] [logging.py:96:log_dist] [Rank 0] step=3160, skipped=47, lr=[3.4983435807514484e-06, 3.4983435807514484e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3159|ppo_ep: 1|act_loss: -0.02764892578125|cri_loss: -0.0126495361328125|unsuper_loss: 0.0 +average reward score: 4.7265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.34%) |Training time=0.46s (21.14%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3160|ppo_ep: 1|act_loss: -0.0206298828125|cri_loss: -0.009796142578125|unsuper_loss: 0.0 +average reward score: 4.6328125 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.77%) |Training time=0.47s (21.12%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.51 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3161|ppo_ep: 1|act_loss: 0.0300445556640625|cri_loss: 0.015899658203125|unsuper_loss: 0.0 +average reward score: 4.94921875 +------------------------------------------------------------------------------------- +|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.71s (74.42%) |Training time=0.49s (21.31%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.91 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3162|ppo_ep: 1|act_loss: 0.069091796875|cri_loss: 0.036834716796875|unsuper_loss: 0.0 +average reward score: 5.2734375 +------------------------------------------------------------------------------------- +|E2E latency=2.44s |Gather latency=0.00s (0.00%) |Generate time=1.62s (66.42%) |Training time=0.45s (18.26%) |Others=0.37 (15.33%)|CurSamplesPerSec=13.09 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3163|ppo_ep: 1|act_loss: -0.01104736328125|cri_loss: -0.00527191162109375|unsuper_loss: 0.0 +average reward score: 5.25 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.37%) |Training time=0.46s (21.08%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3164|ppo_ep: 1|act_loss: -0.017486572265625|cri_loss: -0.0081787109375|unsuper_loss: 0.0 +average reward score: 4.87890625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.48%) |Training time=0.46s (21.01%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3165|ppo_ep: 1|act_loss: -0.029510498046875|cri_loss: -0.0138702392578125|unsuper_loss: 0.0 +average reward score: 5.1640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.82%) |Training time=0.47s (21.66%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3166|ppo_ep: 1|act_loss: -0.00011587142944335938|cri_loss: 0.00028586387634277344|unsuper_loss: 0.0 +average reward score: 4.5859375 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.62s (71.38%) |Training time=0.55s (24.28%) |Others=0.10 (4.34%)|CurSamplesPerSec=14.08 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3167|ppo_ep: 1|act_loss: 0.0069122314453125|cri_loss: 0.0042724609375|unsuper_loss: 0.0 +average reward score: 4.91015625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.39%) |Training time=0.48s (22.07%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3168|ppo_ep: 1|act_loss: 0.004741668701171875|cri_loss: 0.0040435791015625|unsuper_loss: 0.0 +average reward score: 5.3828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.85%) |Training time=0.47s (21.62%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +[2023-04-14 10:44:24,441] [INFO] [logging.py:96:log_dist] [Rank 0] step=3170, skipped=41, lr=[6.724517541188731e-06, 6.724517541188731e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:44:24,509] [INFO] [timer.py:199:stop] epoch=0/micro_step=3170/global_step=3170, RunningAvgSamplesPerSec=106.07379913337502, CurrSamplesPerSec=87.51397169668613, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:44:24,602] [INFO] [logging.py:96:log_dist] [Rank 0] step=3170, skipped=47, lr=[3.4895119855416737e-06, 3.4895119855416737e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3169|ppo_ep: 1|act_loss: 0.00868988037109375|cri_loss: 0.00487518310546875|unsuper_loss: 0.0 +average reward score: 5.484375 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.85%) |Training time=0.53s (23.74%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.39 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3170|ppo_ep: 1|act_loss: 0.0268707275390625|cri_loss: 0.014068603515625|unsuper_loss: 0.0 +average reward score: 4.93359375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.05%) |Training time=0.49s (22.44%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3171|ppo_ep: 1|act_loss: 0.0170440673828125|cri_loss: 0.009185791015625|unsuper_loss: 0.0 +average reward score: 5.8515625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.46%) |Training time=0.50s (23.05%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3172|ppo_ep: 1|act_loss: 0.01983642578125|cri_loss: 0.0110321044921875|unsuper_loss: 0.0 +average reward score: 4.79296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.99%) |Training time=0.49s (22.50%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3173|ppo_ep: 1|act_loss: 0.046783447265625|cri_loss: 0.02850341796875|unsuper_loss: 0.0 +average reward score: 4.71484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.29%) |Training time=0.48s (22.17%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3174|ppo_ep: 1|act_loss: 0.0023670196533203125|cri_loss: 0.001544952392578125|unsuper_loss: 0.0 +average reward score: 5.47265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.32%) |Training time=0.48s (22.14%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3175|ppo_ep: 1|act_loss: -0.0003724098205566406|cri_loss: 0.00018739700317382812|unsuper_loss: 0.0 +average reward score: 5.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.46%) |Training time=0.48s (22.04%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3176|ppo_ep: 1|act_loss: -0.0162811279296875|cri_loss: -0.0079498291015625|unsuper_loss: 0.0 +average reward score: 5.13671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.48s (22.11%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3177|ppo_ep: 1|act_loss: 0.011505126953125|cri_loss: 0.00592803955078125|unsuper_loss: 0.0 +average reward score: 4.97265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (22.10%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3178|ppo_ep: 1|act_loss: 0.051971435546875|cri_loss: 0.02734375|unsuper_loss: 0.0 +average reward score: 4.78515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.14%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +[2023-04-14 10:44:46,203] [INFO] [logging.py:96:log_dist] [Rank 0] step=3180, skipped=41, lr=[6.707427392323372e-06, 6.707427392323372e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:44:46,221] [INFO] [timer.py:199:stop] epoch=0/micro_step=3180/global_step=3180, RunningAvgSamplesPerSec=106.05239888050805, CurrSamplesPerSec=101.55715126687541, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:44:46,314] [INFO] [logging.py:96:log_dist] [Rank 0] step=3180, skipped=47, lr=[3.480665723377205e-06, 3.480665723377205e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3179|ppo_ep: 1|act_loss: 0.07061767578125|cri_loss: 0.0386962890625|unsuper_loss: 0.0 +average reward score: 4.91796875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.49%) |Training time=0.48s (21.99%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3180|ppo_ep: 1|act_loss: 0.0008573532104492188|cri_loss: 0.0007100105285644531|unsuper_loss: 0.0 +average reward score: 5.58984375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.27%) |Training time=0.45s (21.13%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3181|ppo_ep: 1|act_loss: -0.035064697265625|cri_loss: -0.01690673828125|unsuper_loss: 0.0 +average reward score: 5.3203125 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.40%) |Training time=0.48s (20.39%) |Others=0.10 (4.21%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3182|ppo_ep: 1|act_loss: 0.032012939453125|cri_loss: 0.0175628662109375|unsuper_loss: 0.0 +average reward score: 5.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.42%) |Training time=0.45s (20.98%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3183|ppo_ep: 1|act_loss: 0.0159912109375|cri_loss: 0.01290130615234375|unsuper_loss: 0.0 +average reward score: 5.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.66%) |Training time=0.44s (20.74%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3184|ppo_ep: 1|act_loss: -0.00478363037109375|cri_loss: -0.0021953582763671875|unsuper_loss: 0.0 +average reward score: 5.68359375 +------------------------------------------------------------------------------------- +|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.60s (69.27%) |Training time=0.45s (19.35%) |Others=0.26 (11.38%)|CurSamplesPerSec=13.89 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3185|ppo_ep: 1|act_loss: -0.005828857421875|cri_loss: -0.0022563934326171875|unsuper_loss: 0.0 +average reward score: 5.35546875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.65%) |Training time=0.44s (20.76%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3186|ppo_ep: 1|act_loss: 0.0272064208984375|cri_loss: 0.0141143798828125|unsuper_loss: 0.0 +average reward score: 6.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.71%) |Training time=0.44s (20.61%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3187|ppo_ep: 1|act_loss: 0.0460205078125|cri_loss: 0.0245819091796875|unsuper_loss: 0.0 +average reward score: 5.48828125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.37%) |Training time=0.45s (21.04%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3188|ppo_ep: 1|act_loss: 0.00832366943359375|cri_loss: 0.0048675537109375|unsuper_loss: 0.0 +average reward score: 5.15625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.29%) |Training time=0.45s (21.13%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46 +[2023-04-14 10:45:07,978] [INFO] [logging.py:96:log_dist] [Rank 0] step=3190, skipped=41, lr=[6.690309341342949e-06, 6.690309341342949e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:45:07,996] [INFO] [timer.py:199:stop] epoch=0/micro_step=3190/global_step=3190, RunningAvgSamplesPerSec=106.06791101364033, CurrSamplesPerSec=112.8028891206655, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:45:08,088] [INFO] [logging.py:96:log_dist] [Rank 0] step=3190, skipped=47, lr=[3.4718049253809894e-06, 3.4718049253809894e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3189|ppo_ep: 1|act_loss: 0.0041351318359375|cri_loss: 0.0023345947265625|unsuper_loss: 0.0 +average reward score: 5.49609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.76%) |Training time=0.45s (20.69%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3190|ppo_ep: 1|act_loss: 0.01507568359375|cri_loss: 0.0077056884765625|unsuper_loss: 0.0 +average reward score: 6.13671875 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.63s (72.68%) |Training time=0.46s (20.58%) |Others=0.15 (6.74%)|CurSamplesPerSec=14.25 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3191|ppo_ep: 1|act_loss: -0.01251220703125|cri_loss: -0.004730224609375|unsuper_loss: 0.0 +average reward score: 4.625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.53%) |Training time=0.48s (21.94%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3192|ppo_ep: 1|act_loss: 0.019500732421875|cri_loss: 0.01045989990234375|unsuper_loss: 0.0 +average reward score: 4.90234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.90%) |Training time=0.47s (21.55%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3193|ppo_ep: 1|act_loss: -0.024658203125|cri_loss: -0.0119476318359375|unsuper_loss: 0.0 +average reward score: 5.515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.71%) |Training time=0.47s (21.73%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3194|ppo_ep: 1|act_loss: -0.0204010009765625|cri_loss: -0.00936126708984375|unsuper_loss: 0.0 +average reward score: 4.765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.45%) |Training time=0.45s (20.91%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3195|ppo_ep: 1|act_loss: -0.005889892578125|cri_loss: -0.0024585723876953125|unsuper_loss: 0.0 +average reward score: 4.60546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.41%) |Training time=0.45s (21.04%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3196|ppo_ep: 1|act_loss: 0.000583648681640625|cri_loss: 0.0017080307006835938|unsuper_loss: 0.0 +average reward score: 5.15234375 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.04%) |Training time=0.46s (19.73%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3197|ppo_ep: 1|act_loss: -0.024444580078125|cri_loss: -0.01142120361328125|unsuper_loss: 0.0 +average reward score: 4.7265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.28%) |Training time=0.46s (21.16%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3198|ppo_ep: 1|act_loss: -0.00677490234375|cri_loss: -0.0029048919677734375|unsuper_loss: 0.0 +average reward score: 4.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.46s (21.24%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +[2023-04-14 10:45:29,972] [INFO] [logging.py:96:log_dist] [Rank 0] step=3200, skipped=41, lr=[6.673163641978276e-06, 6.673163641978276e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:45:30,562] [INFO] [timer.py:199:stop] epoch=0/micro_step=3200/global_step=3200, RunningAvgSamplesPerSec=105.9945701207933, CurrSamplesPerSec=32.19384756571583, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:45:30,656] [INFO] [logging.py:96:log_dist] [Rank 0] step=3200, skipped=47, lr=[3.462929722891427e-06, 3.462929722891427e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3199|ppo_ep: 1|act_loss: -0.01418304443359375|cri_loss: -0.00630950927734375|unsuper_loss: 0.0 +average reward score: 5.1328125 +------------------------------------------------------------------------------------- +|E2E latency=2.87s |Gather latency=0.00s (0.00%) |Generate time=1.60s (55.75%) |Training time=1.16s (40.33%) |Others=0.11 (3.92%)|CurSamplesPerSec=11.16 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3200|ppo_ep: 1|act_loss: 0.036529541015625|cri_loss: 0.0186614990234375|unsuper_loss: 0.0 +average reward score: 5.20703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.45%) |Training time=0.45s (20.99%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3201|ppo_ep: 1|act_loss: 0.0728759765625|cri_loss: 0.04498291015625|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.39%) |Training time=0.48s (22.06%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3202|ppo_ep: 1|act_loss: 0.003406524658203125|cri_loss: 0.00264739990234375|unsuper_loss: 0.0 +average reward score: 5.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.92%) |Training time=0.47s (21.54%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3203|ppo_ep: 1|act_loss: 0.021942138671875|cri_loss: 0.01192474365234375|unsuper_loss: 0.0 +average reward score: 5.671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (22.06%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3204|ppo_ep: 1|act_loss: -0.02435302734375|cri_loss: -0.01165771484375|unsuper_loss: 0.0 +average reward score: 5.75 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.55%) |Training time=0.47s (21.88%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3205|ppo_ep: 1|act_loss: 0.027679443359375|cri_loss: 0.0162811279296875|unsuper_loss: 0.0 +average reward score: 5.1953125 +------------------------------------------------------------------------------------- +|E2E latency=2.76s |Gather latency=0.00s (0.00%) |Generate time=1.58s (57.30%) |Training time=0.47s (16.98%) |Others=0.71 (25.72%)|CurSamplesPerSec=11.58 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3206|ppo_ep: 1|act_loss: -0.04412841796875|cri_loss: -0.0216217041015625|unsuper_loss: 0.0 +average reward score: 5.85546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.98%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3207|ppo_ep: 1|act_loss: -0.0545654296875|cri_loss: -0.0265045166015625|unsuper_loss: 0.0 +average reward score: 4.78515625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.45%) |Training time=0.47s (21.99%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +[2023-04-14 10:45:50,602] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +[2023-04-14 10:45:50,686] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 3208|ppo_ep: 1|act_loss: -0.046234130859375|cri_loss: -0.02215576171875|unsuper_loss: 0.0 +average reward score: 5.7421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.13%) |Training time=0.47s (21.79%) |Others=0.09 (4.08%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46 +[2023-04-14 10:45:52,725] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +[2023-04-14 10:45:52,725] [INFO] [logging.py:96:log_dist] [Rank 0] step=3210, skipped=43, lr=[6.659427346418702e-06, 6.659427346418702e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:45:52,725] [INFO] [timer.py:199:stop] epoch=0/micro_step=3210/global_step=3210, RunningAvgSamplesPerSec=105.99191837382371, CurrSamplesPerSec=116.19224419656577, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:45:52,808] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +[2023-04-14 10:45:52,809] [INFO] [logging.py:96:log_dist] [Rank 0] step=3210, skipped=49, lr=[3.4558192780603294e-06, 3.4558192780603294e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3209|ppo_ep: 1|act_loss: -0.0201263427734375|cri_loss: -0.008544921875|unsuper_loss: 0.0 +average reward score: 5.08203125 +------------------------------------------------------------------------------------- +|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.27%) |Training time=0.44s (20.60%) |Others=0.09 (4.12%)|CurSamplesPerSec=15.08 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3210|ppo_ep: 1|act_loss: -0.02313232421875|cri_loss: -0.01012420654296875|unsuper_loss: 0.0 +average reward score: 5.01953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.70%) |Training time=0.47s (21.73%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3211|ppo_ep: 1|act_loss: -0.0171661376953125|cri_loss: -0.008209228515625|unsuper_loss: 0.0 +average reward score: 5.80078125 +------------------------------------------------------------------------------------- +|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.25%) |Training time=0.51s (21.60%) |Others=0.10 (4.15%)|CurSamplesPerSec=13.51 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3212|ppo_ep: 1|act_loss: 0.002307891845703125|cri_loss: 0.0018768310546875|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.93%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3213|ppo_ep: 1|act_loss: 0.046966552734375|cri_loss: 0.024261474609375|unsuper_loss: 0.0 +average reward score: 5.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.58%) |Training time=0.47s (21.90%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3214|ppo_ep: 1|act_loss: 0.0421142578125|cri_loss: 0.02203369140625|unsuper_loss: 0.0 +average reward score: 6.125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.48s (21.96%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3215|ppo_ep: 1|act_loss: 0.04229736328125|cri_loss: 0.02191162109375|unsuper_loss: 0.0 +average reward score: 5.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (22.06%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3216|ppo_ep: 1|act_loss: 0.041778564453125|cri_loss: 0.0214996337890625|unsuper_loss: 0.0 +average reward score: 5.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.26%) |Training time=0.48s (22.12%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3217|ppo_ep: 1|act_loss: -0.0154876708984375|cri_loss: -0.00630950927734375|unsuper_loss: 0.0 +average reward score: 5.609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (22.05%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3218|ppo_ep: 1|act_loss: -0.0030689239501953125|cri_loss: -0.0011997222900390625|unsuper_loss: 0.0 +average reward score: 5.98046875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.57%) |Training time=0.48s (21.88%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46 +[2023-04-14 10:46:14,630] [INFO] [logging.py:96:log_dist] [Rank 0] step=3220, skipped=43, lr=[6.642232520669742e-06, 6.642232520669742e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:46:14,648] [INFO] [timer.py:199:stop] epoch=0/micro_step=3220/global_step=3220, RunningAvgSamplesPerSec=105.97259385289864, CurrSamplesPerSec=94.68141865104559, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:46:14,741] [INFO] [logging.py:96:log_dist] [Rank 0] step=3220, skipped=49, lr=[3.4469184791357944e-06, 3.4469184791357944e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3219|ppo_ep: 1|act_loss: -0.0013322830200195312|cri_loss: 6.29425048828125e-05|unsuper_loss: 0.0 +average reward score: 5.74609375 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.12%) |Training time=0.50s (22.47%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.37 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3220|ppo_ep: 1|act_loss: -0.049072265625|cri_loss: -0.0238494873046875|unsuper_loss: 0.0 +average reward score: 5.3828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.27%) |Training time=0.48s (22.17%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3221|ppo_ep: 1|act_loss: -0.05157470703125|cri_loss: -0.022247314453125|unsuper_loss: 0.0 +average reward score: 4.953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.22%) |Training time=0.48s (22.11%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3222|ppo_ep: 1|act_loss: 0.00411224365234375|cri_loss: 0.003429412841796875|unsuper_loss: 0.0 +average reward score: 5.85546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.92%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3223|ppo_ep: 1|act_loss: -0.045166015625|cri_loss: -0.02154541015625|unsuper_loss: 0.0 +average reward score: 5.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.64%) |Training time=0.50s (22.80%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3224|ppo_ep: 1|act_loss: -0.01055908203125|cri_loss: -0.0046844482421875|unsuper_loss: 0.0 +average reward score: 5.9140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.27%) |Training time=0.48s (22.21%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3225|ppo_ep: 1|act_loss: -0.03643798828125|cri_loss: -0.017333984375|unsuper_loss: 0.0 +average reward score: 4.9765625 +------------------------------------------------------------------------------------- +|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.59s (67.61%) |Training time=0.54s (22.98%) |Others=0.22 (9.41%)|CurSamplesPerSec=13.59 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3226|ppo_ep: 1|act_loss: 0.02813720703125|cri_loss: 0.014892578125|unsuper_loss: 0.0 +average reward score: 5.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.33%) |Training time=0.48s (22.14%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3227|ppo_ep: 1|act_loss: 0.068115234375|cri_loss: 0.035675048828125|unsuper_loss: 0.0 +average reward score: 4.8984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.92%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3228|ppo_ep: 1|act_loss: 0.10015869140625|cri_loss: 0.0543212890625|unsuper_loss: 0.0 +average reward score: 5.58203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.23%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +[2023-04-14 10:46:36,491] [INFO] [logging.py:96:log_dist] [Rank 0] step=3230, skipped=43, lr=[6.625010759150993e-06, 6.625010759150993e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:46:36,510] [INFO] [timer.py:199:stop] epoch=0/micro_step=3230/global_step=3230, RunningAvgSamplesPerSec=105.94732779611219, CurrSamplesPerSec=99.21879437735863, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:46:36,602] [INFO] [logging.py:96:log_dist] [Rank 0] step=3230, skipped=49, lr=[3.4380036445950826e-06, 3.4380036445950826e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3229|ppo_ep: 1|act_loss: 0.0136871337890625|cri_loss: 0.00812530517578125|unsuper_loss: 0.0 +average reward score: 5.30078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.10%) |Training time=0.48s (22.40%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3230|ppo_ep: 1|act_loss: 0.028350830078125|cri_loss: 0.01450347900390625|unsuper_loss: 0.0 +average reward score: 5.3515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.37%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3231|ppo_ep: 1|act_loss: -0.0118408203125|cri_loss: -0.00531768798828125|unsuper_loss: 0.0 +average reward score: 4.86328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.31%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3232|ppo_ep: 1|act_loss: -0.00424957275390625|cri_loss: -0.0014390945434570312|unsuper_loss: 0.0 +average reward score: 4.9375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.03%) |Training time=0.49s (22.45%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3233|ppo_ep: 1|act_loss: -0.02142333984375|cri_loss: -0.01001739501953125|unsuper_loss: 0.0 +average reward score: 5.3203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.49s (22.46%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3234|ppo_ep: 1|act_loss: -0.0184478759765625|cri_loss: -0.00812530517578125|unsuper_loss: 0.0 +average reward score: 4.515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.22%) |Training time=0.48s (22.24%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3235|ppo_ep: 1|act_loss: -0.0230255126953125|cri_loss: -0.01080322265625|unsuper_loss: 0.0 +average reward score: 4.6328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.48s (22.31%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3236|ppo_ep: 1|act_loss: 0.011444091796875|cri_loss: 0.0060272216796875|unsuper_loss: 0.0 +average reward score: 4.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.13%) |Training time=0.48s (22.34%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3237|ppo_ep: 1|act_loss: -0.037109375|cri_loss: -0.01812744140625|unsuper_loss: 0.0 +average reward score: 5.5859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.09%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3238|ppo_ep: 1|act_loss: 0.016021728515625|cri_loss: 0.00850677490234375|unsuper_loss: 0.0 +average reward score: 5.20703125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.76%) |Training time=0.49s (22.68%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46 +[2023-04-14 10:46:58,161] [INFO] [logging.py:96:log_dist] [Rank 0] step=3240, skipped=43, lr=[6.6077623171305024e-06, 6.6077623171305024e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:46:58,179] [INFO] [timer.py:199:stop] epoch=0/micro_step=3240/global_step=3240, RunningAvgSamplesPerSec=105.92459917021735, CurrSamplesPerSec=96.84810997952171, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:46:58,274] [INFO] [logging.py:96:log_dist] [Rank 0] step=3240, skipped=49, lr=[3.4290749065775475e-06, 3.4290749065775475e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3239|ppo_ep: 1|act_loss: -0.0272216796875|cri_loss: -0.0132293701171875|unsuper_loss: 0.0 +average reward score: 5.76953125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.79%) |Training time=0.49s (22.62%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3240|ppo_ep: 1|act_loss: 0.01654052734375|cri_loss: 0.00868988037109375|unsuper_loss: 0.0 +average reward score: 5.5859375 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.81%) |Training time=0.47s (21.39%) |Others=0.15 (6.80%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3241|ppo_ep: 1|act_loss: 0.05535888671875|cri_loss: 0.02935791015625|unsuper_loss: 0.0 +average reward score: 5.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.58%) |Training time=0.48s (21.08%) |Others=0.10 (4.34%)|CurSamplesPerSec=14.10 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3242|ppo_ep: 1|act_loss: -0.00484466552734375|cri_loss: -0.002094268798828125|unsuper_loss: 0.0 +average reward score: 5.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.37%) |Training time=0.48s (22.07%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3243|ppo_ep: 1|act_loss: 0.03314208984375|cri_loss: 0.0167999267578125|unsuper_loss: 0.0 +average reward score: 5.23046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.94%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3244|ppo_ep: 1|act_loss: 0.009765625|cri_loss: 0.005191802978515625|unsuper_loss: 0.0 +average reward score: 5.6171875 +------------------------------------------------------------------------------------- +|E2E latency=2.65s |Gather latency=0.00s (0.00%) |Generate time=1.58s (59.59%) |Training time=0.48s (18.08%) |Others=0.59 (22.32%)|CurSamplesPerSec=12.06 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3245|ppo_ep: 1|act_loss: -0.00560760498046875|cri_loss: -0.002361297607421875|unsuper_loss: 0.0 +average reward score: 5.38671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.28%) |Training time=0.48s (22.14%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3246|ppo_ep: 1|act_loss: -0.0221405029296875|cri_loss: -0.01041412353515625|unsuper_loss: 0.0 +average reward score: 5.17578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.28%) |Training time=0.48s (22.18%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3247|ppo_ep: 1|act_loss: -0.023529052734375|cri_loss: -0.010528564453125|unsuper_loss: 0.0 +average reward score: 4.90625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.70%) |Training time=0.47s (21.74%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3248|ppo_ep: 1|act_loss: -0.033416748046875|cri_loss: -0.0162506103515625|unsuper_loss: 0.0 +average reward score: 4.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.77%) |Training time=0.48s (21.75%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.46 +[2023-04-14 10:47:20,543] [INFO] [logging.py:96:log_dist] [Rank 0] step=3250, skipped=43, lr=[6.590487450271792e-06, 6.590487450271792e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:47:20,561] [INFO] [timer.py:199:stop] epoch=0/micro_step=3250/global_step=3250, RunningAvgSamplesPerSec=105.9117506771129, CurrSamplesPerSec=100.8401444032472, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:47:20,654] [INFO] [logging.py:96:log_dist] [Rank 0] step=3250, skipped=49, lr=[3.420132397428625e-06, 3.420132397428625e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3249|ppo_ep: 1|act_loss: -0.01910400390625|cri_loss: -0.00933837890625|unsuper_loss: 0.0 +average reward score: 5.56640625 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.51%) |Training time=0.48s (21.18%) |Others=0.10 (4.32%)|CurSamplesPerSec=14.13 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3250|ppo_ep: 1|act_loss: 0.024017333984375|cri_loss: 0.01232147216796875|unsuper_loss: 0.0 +average reward score: 5.7265625 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.58s (68.45%) |Training time=0.48s (20.75%) |Others=0.25 (10.80%)|CurSamplesPerSec=13.87 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3251|ppo_ep: 1|act_loss: 0.0186614990234375|cri_loss: 0.00963592529296875|unsuper_loss: 0.0 +average reward score: 5.00390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.31%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3252|ppo_ep: 1|act_loss: -0.0013141632080078125|cri_loss: -0.0002598762512207031|unsuper_loss: 0.0 +average reward score: 4.7265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.42%) |Training time=0.48s (22.03%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3253|ppo_ep: 1|act_loss: 0.00670623779296875|cri_loss: 0.0039520263671875|unsuper_loss: 0.0 +average reward score: 5.0234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.81%) |Training time=0.49s (22.64%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3254|ppo_ep: 1|act_loss: 0.005558013916015625|cri_loss: 0.00290679931640625|unsuper_loss: 0.0 +average reward score: 5.546875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.49s (22.46%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3255|ppo_ep: 1|act_loss: 0.01549530029296875|cri_loss: 0.0082550048828125|unsuper_loss: 0.0 +average reward score: 4.7421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.89%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3256|ppo_ep: 1|act_loss: -0.0132904052734375|cri_loss: -0.00640869140625|unsuper_loss: 0.0 +average reward score: 5.2109375 +------------------------------------------------------------------------------------- +|E2E latency=2.40s |Gather latency=0.00s (0.00%) |Generate time=1.58s (65.96%) |Training time=0.49s (20.36%) |Others=0.33 (13.68%)|CurSamplesPerSec=13.36 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3257|ppo_ep: 1|act_loss: 0.006717681884765625|cri_loss: 0.00504302978515625|unsuper_loss: 0.0 +average reward score: 5.13671875 +------------------------------------------------------------------------------------- +|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.33%) |Training time=0.51s (21.53%) |Others=0.10 (4.15%)|CurSamplesPerSec=13.48 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3258|ppo_ep: 1|act_loss: -0.035125732421875|cri_loss: -0.01690673828125|unsuper_loss: 0.0 +average reward score: 5.609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.31%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +[2023-04-14 10:47:42,752] [INFO] [logging.py:96:log_dist] [Rank 0] step=3260, skipped=43, lr=[6.573186414630062e-06, 6.573186414630062e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:47:42,770] [INFO] [timer.py:199:stop] epoch=0/micro_step=3260/global_step=3260, RunningAvgSamplesPerSec=105.8897419676038, CurrSamplesPerSec=100.19568274861223, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:47:42,863] [INFO] [logging.py:96:log_dist] [Rank 0] step=3260, skipped=49, lr=[3.4111762496978753e-06, 3.4111762496978753e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3259|ppo_ep: 1|act_loss: -0.01148223876953125|cri_loss: -0.005428314208984375|unsuper_loss: 0.0 +average reward score: 5.5234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.21%) |Training time=0.48s (22.27%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3260|ppo_ep: 1|act_loss: 0.01776123046875|cri_loss: 0.00902557373046875|unsuper_loss: 0.0 +average reward score: 5.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.31%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3261|ppo_ep: 1|act_loss: -0.0051727294921875|cri_loss: -0.0015697479248046875|unsuper_loss: 0.0 +average reward score: 4.79296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.38%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3262|ppo_ep: 1|act_loss: 0.0318603515625|cri_loss: 0.017333984375|unsuper_loss: 0.0 +average reward score: 4.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.25%) |Training time=0.48s (22.21%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3263|ppo_ep: 1|act_loss: 0.01312255859375|cri_loss: 0.006908416748046875|unsuper_loss: 0.0 +average reward score: 4.88671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.91%) |Training time=0.49s (22.57%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3264|ppo_ep: 1|act_loss: 0.036163330078125|cri_loss: 0.0189971923828125|unsuper_loss: 0.0 +average reward score: 5.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.80%) |Training time=0.49s (22.68%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3265|ppo_ep: 1|act_loss: 0.01306915283203125|cri_loss: 0.006793975830078125|unsuper_loss: 0.0 +average reward score: 5.68359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.34%) |Training time=0.48s (22.13%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3266|ppo_ep: 1|act_loss: -0.026123046875|cri_loss: -0.01198577880859375|unsuper_loss: 0.0 +average reward score: 5.3515625 +------------------------------------------------------------------------------------- +|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.58s (66.72%) |Training time=0.48s (20.25%) |Others=0.31 (13.03%)|CurSamplesPerSec=13.51 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3267|ppo_ep: 1|act_loss: -0.0018949508666992188|cri_loss: -0.0007877349853515625|unsuper_loss: 0.0 +average reward score: 5.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.26%) |Training time=0.48s (22.16%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3268|ppo_ep: 1|act_loss: -0.002704620361328125|cri_loss: 0.0006694793701171875|unsuper_loss: 0.0 +average reward score: 5.3046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.16%) |Training time=0.48s (22.24%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46 +[2023-04-14 10:48:04,610] [INFO] [logging.py:96:log_dist] [Rank 0] step=3270, skipped=43, lr=[6.555859466648397e-06, 6.555859466648397e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:48:04,628] [INFO] [timer.py:199:stop] epoch=0/micro_step=3270/global_step=3270, RunningAvgSamplesPerSec=105.86868857095156, CurrSamplesPerSec=97.09289883127744, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:48:04,721] [INFO] [logging.py:96:log_dist] [Rank 0] step=3270, skipped=49, lr=[3.4022065961370106e-06, 3.4022065961370106e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3269|ppo_ep: 1|act_loss: -0.00811004638671875|cri_loss: -0.003635406494140625|unsuper_loss: 0.0 +average reward score: 5.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.86%) |Training time=0.49s (22.63%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3270|ppo_ep: 1|act_loss: 0.01363372802734375|cri_loss: 0.007175445556640625|unsuper_loss: 0.0 +average reward score: 5.62890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.95%) |Training time=0.49s (22.52%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3271|ppo_ep: 1|act_loss: 0.05487060546875|cri_loss: 0.0282440185546875|unsuper_loss: 0.0 +average reward score: 5.11328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.09%) |Training time=0.49s (22.40%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3272|ppo_ep: 1|act_loss: -0.015411376953125|cri_loss: -0.007541656494140625|unsuper_loss: 0.0 +average reward score: 5.40234375 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.58s (70.69%) |Training time=0.50s (22.16%) |Others=0.16 (7.16%)|CurSamplesPerSec=14.30 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3273|ppo_ep: 1|act_loss: 0.034271240234375|cri_loss: 0.0180206298828125|unsuper_loss: 0.0 +average reward score: 5.1171875 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.90%) |Training time=0.50s (22.62%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3274|ppo_ep: 1|act_loss: 0.0107421875|cri_loss: 0.006000518798828125|unsuper_loss: 0.0 +average reward score: 5.23046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.31%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3275|ppo_ep: 1|act_loss: 0.02215576171875|cri_loss: 0.011566162109375|unsuper_loss: 0.0 +average reward score: 5.30859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.59%) |Training time=0.50s (22.81%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3276|ppo_ep: 1|act_loss: -0.01959228515625|cri_loss: -0.009613037109375|unsuper_loss: 0.0 +average reward score: 5.484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.90%) |Training time=0.49s (22.55%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3277|ppo_ep: 1|act_loss: 0.0102691650390625|cri_loss: 0.00539398193359375|unsuper_loss: 0.0 +average reward score: 5.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.72%) |Training time=0.50s (22.81%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3278|ppo_ep: 1|act_loss: 0.01190185546875|cri_loss: 0.0086517333984375|unsuper_loss: 0.0 +average reward score: 4.77734375 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.68s (73.95%) |Training time=0.49s (21.68%) |Others=0.10 (4.38%)|CurSamplesPerSec=14.05 |AvgSamplesPerSec=14.46 +[2023-04-14 10:48:26,538] [INFO] [logging.py:96:log_dist] [Rank 0] step=3280, skipped=43, lr=[6.538506863153967e-06, 6.538506863153967e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:48:26,556] [INFO] [timer.py:199:stop] epoch=0/micro_step=3280/global_step=3280, RunningAvgSamplesPerSec=105.8395083408653, CurrSamplesPerSec=98.7977475322228, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:48:26,649] [INFO] [logging.py:96:log_dist] [Rank 0] step=3280, skipped=49, lr=[3.3932235696979367e-06, 3.3932235696979367e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3279|ppo_ep: 1|act_loss: 0.033966064453125|cri_loss: 0.01898193359375|unsuper_loss: 0.0 +average reward score: 5.12890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.02%) |Training time=0.49s (22.47%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3280|ppo_ep: 1|act_loss: -0.03369140625|cri_loss: -0.0164642333984375|unsuper_loss: 0.0 +average reward score: 5.21484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.85%) |Training time=0.49s (22.62%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3281|ppo_ep: 1|act_loss: -0.0006170272827148438|cri_loss: -0.00013208389282226562|unsuper_loss: 0.0 +average reward score: 5.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.95%) |Training time=0.49s (22.39%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3282|ppo_ep: 1|act_loss: 3.743171691894531e-05|cri_loss: 0.00023567676544189453|unsuper_loss: 0.0 +average reward score: 5.03515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.49s (22.42%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3283|ppo_ep: 1|act_loss: 0.01375579833984375|cri_loss: 0.007175445556640625|unsuper_loss: 0.0 +average reward score: 5.859375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.83%) |Training time=0.49s (22.68%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3284|ppo_ep: 1|act_loss: 0.027130126953125|cri_loss: 0.01393890380859375|unsuper_loss: 0.0 +average reward score: 5.32421875 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.32%) |Training time=0.49s (22.14%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3285|ppo_ep: 1|act_loss: 0.01788330078125|cri_loss: 0.0091400146484375|unsuper_loss: 0.0 +average reward score: 6.1328125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.21%) |Training time=0.49s (22.29%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3286|ppo_ep: 1|act_loss: 0.0101470947265625|cri_loss: 0.005603790283203125|unsuper_loss: 0.0 +average reward score: 5.80078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.10%) |Training time=0.46s (21.16%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3287|ppo_ep: 1|act_loss: -0.020721435546875|cri_loss: -0.008544921875|unsuper_loss: 0.0 +average reward score: 4.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.43s |Gather latency=0.00s (0.00%) |Generate time=1.77s (72.74%) |Training time=0.50s (20.42%) |Others=0.17 (6.83%)|CurSamplesPerSec=13.15 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3288|ppo_ep: 1|act_loss: -0.0085296630859375|cri_loss: -0.00321197509765625|unsuper_loss: 0.0 +average reward score: 5.203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.99%) |Training time=0.47s (21.47%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +[2023-04-14 10:48:48,531] [INFO] [logging.py:96:log_dist] [Rank 0] step=3290, skipped=43, lr=[6.521128861354213e-06, 6.521128861354213e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:48:48,549] [INFO] [timer.py:199:stop] epoch=0/micro_step=3290/global_step=3290, RunningAvgSamplesPerSec=105.82224074120158, CurrSamplesPerSec=108.69055545522937, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:48:48,641] [INFO] [logging.py:96:log_dist] [Rank 0] step=3290, skipped=49, lr=[3.384227303530776e-06, 3.384227303530776e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3289|ppo_ep: 1|act_loss: -0.001251220703125|cri_loss: -0.00023698806762695312|unsuper_loss: 0.0 +average reward score: 4.7421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.28%) |Training time=0.46s (21.19%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3290|ppo_ep: 1|act_loss: -0.016754150390625|cri_loss: -0.007965087890625|unsuper_loss: 0.0 +average reward score: 4.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.31%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3291|ppo_ep: 1|act_loss: 0.060546875|cri_loss: 0.032806396484375|unsuper_loss: 0.0 +average reward score: 5.28515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.13%) |Training time=0.46s (21.25%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3292|ppo_ep: 1|act_loss: 0.0125274658203125|cri_loss: 0.00705718994140625|unsuper_loss: 0.0 +average reward score: 5.5546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.91%) |Training time=0.47s (21.55%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3293|ppo_ep: 1|act_loss: 0.06329345703125|cri_loss: 0.032257080078125|unsuper_loss: 0.0 +average reward score: 5.08203125 +------------------------------------------------------------------------------------- +|E2E latency=3.53s |Gather latency=0.00s (0.00%) |Generate time=1.60s (45.37%) |Training time=0.46s (13.04%) |Others=1.47 (41.59%)|CurSamplesPerSec=9.07 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3294|ppo_ep: 1|act_loss: 0.00348663330078125|cri_loss: 0.00228118896484375|unsuper_loss: 0.0 +average reward score: 4.83984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.46%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3295|ppo_ep: 1|act_loss: 0.033905029296875|cri_loss: 0.0184326171875|unsuper_loss: 0.0 +average reward score: 5.15234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.28%) |Training time=0.46s (21.15%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3296|ppo_ep: 1|act_loss: -0.00469970703125|cri_loss: -0.0013952255249023438|unsuper_loss: 0.0 +average reward score: 5.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.46s (21.29%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3297|ppo_ep: 1|act_loss: 0.0008997917175292969|cri_loss: 0.0009350776672363281|unsuper_loss: 0.0 +average reward score: 4.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.13%) |Training time=0.46s (21.32%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3298|ppo_ep: 1|act_loss: -0.03765869140625|cri_loss: -0.0171966552734375|unsuper_loss: 0.0 +average reward score: 4.66015625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.64%) |Training time=0.48s (21.85%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.46 +[2023-04-14 10:49:11,545] [INFO] [logging.py:96:log_dist] [Rank 0] step=3300, skipped=43, lr=[6.503725718833046e-06, 6.503725718833046e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:49:11,564] [INFO] [timer.py:199:stop] epoch=0/micro_step=3300/global_step=3300, RunningAvgSamplesPerSec=105.82462227179246, CurrSamplesPerSec=108.39885509746532, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:49:11,656] [INFO] [logging.py:96:log_dist] [Rank 0] step=3300, skipped=49, lr=[3.375217930981894e-06, 3.375217930981894e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3299|ppo_ep: 1|act_loss: -0.0273895263671875|cri_loss: -0.013458251953125|unsuper_loss: 0.0 +average reward score: 5.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.31%) |Training time=0.46s (21.15%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3300|ppo_ep: 1|act_loss: 0.088134765625|cri_loss: 0.050750732421875|unsuper_loss: 0.0 +average reward score: 5.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.04%) |Training time=0.46s (21.41%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3301|ppo_ep: 1|act_loss: 0.0094757080078125|cri_loss: 0.00507354736328125|unsuper_loss: 0.0 +average reward score: 5.82421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.07%) |Training time=0.46s (21.40%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3302|ppo_ep: 1|act_loss: 0.05609130859375|cri_loss: 0.029510498046875|unsuper_loss: 0.0 +average reward score: 5.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.17%) |Training time=0.46s (19.63%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3303|ppo_ep: 1|act_loss: 0.0234527587890625|cri_loss: 0.0122833251953125|unsuper_loss: 0.0 +average reward score: 4.52734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.40%) |Training time=0.46s (21.08%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3304|ppo_ep: 1|act_loss: 0.040771484375|cri_loss: 0.0233306884765625|unsuper_loss: 0.0 +average reward score: 4.78515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.89%) |Training time=0.47s (21.57%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3305|ppo_ep: 1|act_loss: -0.0093994140625|cri_loss: -0.003631591796875|unsuper_loss: 0.0 +average reward score: 4.43359375 +------------------------------------------------------------------------------------- +|E2E latency=3.12s |Gather latency=0.00s (0.00%) |Generate time=1.61s (51.71%) |Training time=0.46s (14.70%) |Others=1.05 (33.59%)|CurSamplesPerSec=10.25 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3306|ppo_ep: 1|act_loss: -0.03729248046875|cri_loss: -0.0177459716796875|unsuper_loss: 0.0 +average reward score: 4.79296875 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.68s (73.49%) |Training time=0.51s (22.17%) |Others=0.10 (4.34%)|CurSamplesPerSec=14.01 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3307|ppo_ep: 1|act_loss: -0.025238037109375|cri_loss: -0.0121307373046875|unsuper_loss: 0.0 +average reward score: 4.80078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.03%) |Training time=0.46s (21.43%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3308|ppo_ep: 1|act_loss: -0.038482666015625|cri_loss: -0.018707275390625|unsuper_loss: 0.0 +average reward score: 4.92578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.08%) |Training time=0.46s (21.37%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +[2023-04-14 10:49:34,441] [INFO] [logging.py:96:log_dist] [Rank 0] step=3310, skipped=43, lr=[6.4862976935470215e-06, 6.4862976935470215e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:49:34,459] [INFO] [timer.py:199:stop] epoch=0/micro_step=3310/global_step=3310, RunningAvgSamplesPerSec=105.82343541265251, CurrSamplesPerSec=106.25190725481177, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:49:34,552] [INFO] [logging.py:96:log_dist] [Rank 0] step=3310, skipped=49, lr=[3.366195585591927e-06, 3.366195585591927e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3309|ppo_ep: 1|act_loss: -0.03485107421875|cri_loss: -0.0152740478515625|unsuper_loss: 0.0 +average reward score: 4.28125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.42%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +[2023-04-14 10:49:36,594] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +[2023-04-14 10:49:36,677] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 3310|ppo_ep: 1|act_loss: 0.00023245811462402344|cri_loss: 0.0003600120544433594|unsuper_loss: 0.0 +average reward score: 5.21875 +------------------------------------------------------------------------------------- +|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.50%) |Training time=0.43s (20.39%) |Others=0.09 (4.11%)|CurSamplesPerSec=15.07 |AvgSamplesPerSec=14.46 +[2023-04-14 10:49:38,737] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +[2023-04-14 10:49:38,820] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 3311|ppo_ep: 1|act_loss: -0.0281982421875|cri_loss: -0.01326751708984375|unsuper_loss: 0.0 +average reward score: 5.37890625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.75%) |Training time=0.43s (20.17%) |Others=0.09 (4.09%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3312|ppo_ep: 1|act_loss: -0.026275634765625|cri_loss: -0.01253509521484375|unsuper_loss: 0.0 +average reward score: 5.015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.12%) |Training time=0.46s (21.25%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3313|ppo_ep: 1|act_loss: 0.033477783203125|cri_loss: 0.017578125|unsuper_loss: 0.0 +average reward score: 4.15625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.41%) |Training time=0.45s (21.00%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3314|ppo_ep: 1|act_loss: 0.049713134765625|cri_loss: 0.0262298583984375|unsuper_loss: 0.0 +average reward score: 4.9296875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.64%) |Training time=0.47s (21.81%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3315|ppo_ep: 1|act_loss: -0.009429931640625|cri_loss: -0.004192352294921875|unsuper_loss: 0.0 +average reward score: 6.03515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.95%) |Training time=0.46s (21.41%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3316|ppo_ep: 1|act_loss: 0.0149383544921875|cri_loss: 0.007770538330078125|unsuper_loss: 0.0 +average reward score: 4.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.60s (70.84%) |Training time=0.48s (21.38%) |Others=0.18 (7.78%)|CurSamplesPerSec=14.16 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3317|ppo_ep: 1|act_loss: -0.0093841552734375|cri_loss: -0.004528045654296875|unsuper_loss: 0.0 +average reward score: 5.19921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.95%) |Training time=0.47s (21.42%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3318|ppo_ep: 1|act_loss: 0.027679443359375|cri_loss: 0.01447296142578125|unsuper_loss: 0.0 +average reward score: 4.43359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.55%) |Training time=0.48s (21.90%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +[2023-04-14 10:49:56,165] [INFO] [logging.py:96:log_dist] [Rank 0] step=3320, skipped=45, lr=[6.472337531309861e-06, 6.472337531309861e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:49:56,578] [INFO] [timer.py:199:stop] epoch=0/micro_step=3320/global_step=3320, RunningAvgSamplesPerSec=105.78531663292775, CurrSamplesPerSec=45.46805004077017, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:49:56,671] [INFO] [logging.py:96:log_dist] [Rank 0] step=3320, skipped=51, lr=[3.358968458696486e-06, 3.358968458696486e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3319|ppo_ep: 1|act_loss: -0.0241241455078125|cri_loss: -0.00957489013671875|unsuper_loss: 0.0 +average reward score: 4.80078125 +------------------------------------------------------------------------------------- +|E2E latency=2.57s |Gather latency=0.00s (0.00%) |Generate time=1.60s (62.44%) |Training time=0.87s (33.74%) |Others=0.10 (3.82%)|CurSamplesPerSec=12.46 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3320|ppo_ep: 1|act_loss: -0.02484130859375|cri_loss: -0.01227569580078125|unsuper_loss: 0.0 +average reward score: 5.1328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.58%) |Training time=0.47s (21.87%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3321|ppo_ep: 1|act_loss: -0.024169921875|cri_loss: -0.01117706298828125|unsuper_loss: 0.0 +average reward score: 4.96484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.60%) |Training time=0.47s (21.83%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3322|ppo_ep: 1|act_loss: -0.0293121337890625|cri_loss: -0.0143890380859375|unsuper_loss: 0.0 +average reward score: 5.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.53%) |Training time=0.48s (21.91%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3323|ppo_ep: 1|act_loss: 0.0419921875|cri_loss: 0.022247314453125|unsuper_loss: 0.0 +average reward score: 5.13671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.62%) |Training time=0.47s (21.85%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3324|ppo_ep: 1|act_loss: 0.067626953125|cri_loss: 0.0350341796875|unsuper_loss: 0.0 +average reward score: 5.4296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.67%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3325|ppo_ep: 1|act_loss: -0.007579803466796875|cri_loss: -0.003299713134765625|unsuper_loss: 0.0 +average reward score: 6.6953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.94%) |Training time=0.47s (21.49%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3326|ppo_ep: 1|act_loss: 0.01213836669921875|cri_loss: 0.006786346435546875|unsuper_loss: 0.0 +average reward score: 5.61328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.47s (21.45%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3327|ppo_ep: 1|act_loss: -0.00235748291015625|cri_loss: -0.0010938644409179688|unsuper_loss: 0.0 +average reward score: 5.87109375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.05%) |Training time=0.46s (21.27%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3328|ppo_ep: 1|act_loss: -0.007213592529296875|cri_loss: -0.003448486328125|unsuper_loss: 0.0 +average reward score: 5.1953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.16%) |Training time=0.44s (20.27%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +[2023-04-14 10:50:18,251] [INFO] [logging.py:96:log_dist] [Rank 0] step=3330, skipped=45, lr=[6.454865368272448e-06, 6.454865368272448e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:50:18,270] [INFO] [timer.py:199:stop] epoch=0/micro_step=3330/global_step=3330, RunningAvgSamplesPerSec=105.78267192941124, CurrSamplesPerSec=102.44838222396594, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:50:18,363] [INFO] [logging.py:96:log_dist] [Rank 0] step=3330, skipped=51, lr=[3.3499230993274857e-06, 3.3499230993274857e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3329|ppo_ep: 1|act_loss: -0.0174560546875|cri_loss: -0.00833892822265625|unsuper_loss: 0.0 +average reward score: 4.8203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.59%) |Training time=0.48s (21.86%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3330|ppo_ep: 1|act_loss: 0.0152130126953125|cri_loss: 0.00783538818359375|unsuper_loss: 0.0 +average reward score: 5.546875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.17%) |Training time=0.47s (21.28%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3331|ppo_ep: 1|act_loss: -0.003993988037109375|cri_loss: -0.0018301010131835938|unsuper_loss: 0.0 +average reward score: 5.2109375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.87%) |Training time=0.47s (21.59%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3332|ppo_ep: 1|act_loss: 0.0043182373046875|cri_loss: 0.002300262451171875|unsuper_loss: 0.0 +average reward score: 5.41015625 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.66%) |Training time=0.47s (20.11%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.70 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3333|ppo_ep: 1|act_loss: 0.006855010986328125|cri_loss: 0.00409698486328125|unsuper_loss: 0.0 +average reward score: 5.30859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.62%) |Training time=0.47s (21.79%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3334|ppo_ep: 1|act_loss: -0.03057861328125|cri_loss: -0.014862060546875|unsuper_loss: 0.0 +average reward score: 5.078125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.82%) |Training time=0.47s (21.64%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3335|ppo_ep: 1|act_loss: -0.01727294921875|cri_loss: -0.007293701171875|unsuper_loss: 0.0 +average reward score: 5.046875 +------------------------------------------------------------------------------------- +|E2E latency=2.50s |Gather latency=0.00s (0.00%) |Generate time=1.64s (65.62%) |Training time=0.47s (18.62%) |Others=0.39 (15.76%)|CurSamplesPerSec=12.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3336|ppo_ep: 1|act_loss: -0.0207061767578125|cri_loss: -0.0089874267578125|unsuper_loss: 0.0 +average reward score: 5.515625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.39%) |Training time=0.48s (22.03%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3337|ppo_ep: 1|act_loss: -0.00792694091796875|cri_loss: -0.0018768310546875|unsuper_loss: 0.0 +average reward score: 4.9453125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.46%) |Training time=0.48s (21.97%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3338|ppo_ep: 1|act_loss: -0.0087890625|cri_loss: -0.004047393798828125|unsuper_loss: 0.0 +average reward score: 5.15625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.09%) |Training time=0.47s (21.38%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.46 +[2023-04-14 10:50:40,513] [INFO] [logging.py:96:log_dist] [Rank 0] step=3340, skipped=45, lr=[6.437369046698439e-06, 6.437369046698439e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:50:40,532] [INFO] [timer.py:199:stop] epoch=0/micro_step=3340/global_step=3340, RunningAvgSamplesPerSec=105.77579222271166, CurrSamplesPerSec=102.9543915524116, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:50:40,626] [INFO] [logging.py:96:log_dist] [Rank 0] step=3340, skipped=51, lr=[3.3408651420478265e-06, 3.3408651420478265e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3339|ppo_ep: 1|act_loss: 0.002201080322265625|cri_loss: 0.0015249252319335938|unsuper_loss: 0.0 +average reward score: 5.5 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.63%) |Training time=0.47s (21.78%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3340|ppo_ep: 1|act_loss: 0.00557708740234375|cri_loss: 0.004730224609375|unsuper_loss: 0.0 +average reward score: 5.35546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.32%) |Training time=0.46s (21.10%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3341|ppo_ep: 1|act_loss: 0.05126953125|cri_loss: 0.0268707275390625|unsuper_loss: 0.0 +average reward score: 5.6640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.62%) |Training time=0.47s (21.85%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3342|ppo_ep: 1|act_loss: 0.00695037841796875|cri_loss: 0.004302978515625|unsuper_loss: 0.0 +average reward score: 4.93359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.22%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3343|ppo_ep: 1|act_loss: -0.030364990234375|cri_loss: -0.01422882080078125|unsuper_loss: 0.0 +average reward score: 5.19140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.30%) |Training time=0.46s (21.11%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3344|ppo_ep: 1|act_loss: -0.0138092041015625|cri_loss: -0.006580352783203125|unsuper_loss: 0.0 +average reward score: 5.484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.90%) |Training time=0.49s (22.55%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3345|ppo_ep: 1|act_loss: -0.01995849609375|cri_loss: -0.0095062255859375|unsuper_loss: 0.0 +average reward score: 5.5703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.00%) |Training time=0.49s (22.43%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3346|ppo_ep: 1|act_loss: -0.03643798828125|cri_loss: -0.017333984375|unsuper_loss: 0.0 +average reward score: 5.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.88%) |Training time=0.49s (20.90%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3347|ppo_ep: 1|act_loss: -0.01500701904296875|cri_loss: -0.00693511962890625|unsuper_loss: 0.0 +average reward score: 6.03125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.25%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3348|ppo_ep: 1|act_loss: -0.028533935546875|cri_loss: -0.0125885009765625|unsuper_loss: 0.0 +average reward score: 6.01171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.93%) |Training time=0.49s (22.50%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +[2023-04-14 10:51:02,346] [INFO] [logging.py:96:log_dist] [Rank 0] step=3350, skipped=45, lr=[6.41984882592553e-06, 6.41984882592553e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:51:02,364] [INFO] [timer.py:199:stop] epoch=0/micro_step=3350/global_step=3350, RunningAvgSamplesPerSec=105.76279625643068, CurrSamplesPerSec=96.08421076745537, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:51:02,457] [INFO] [logging.py:96:log_dist] [Rank 0] step=3350, skipped=51, lr=[3.3317947211182846e-06, 3.3317947211182846e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3349|ppo_ep: 1|act_loss: 0.027587890625|cri_loss: 0.0145263671875|unsuper_loss: 0.0 +average reward score: 5.62890625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.73%) |Training time=0.50s (22.78%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3350|ppo_ep: 1|act_loss: 0.043243408203125|cri_loss: 0.0227203369140625|unsuper_loss: 0.0 +average reward score: 5.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.06%) |Training time=0.48s (22.39%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3351|ppo_ep: 1|act_loss: 0.0242156982421875|cri_loss: 0.0243988037109375|unsuper_loss: 0.0 +average reward score: 6.07421875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.77%) |Training time=0.49s (22.72%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3352|ppo_ep: 1|act_loss: -0.011138916015625|cri_loss: -0.00481414794921875|unsuper_loss: 0.0 +average reward score: 4.90625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.36%) |Training time=0.51s (23.15%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3353|ppo_ep: 1|act_loss: -0.0261077880859375|cri_loss: -0.01076507568359375|unsuper_loss: 0.0 +average reward score: 5.7578125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.93%) |Training time=0.49s (22.52%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3354|ppo_ep: 1|act_loss: 0.0078125|cri_loss: 0.004383087158203125|unsuper_loss: 0.0 +average reward score: 4.98046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.92%) |Training time=0.47s (21.52%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3355|ppo_ep: 1|act_loss: -0.020904541015625|cri_loss: -0.0100555419921875|unsuper_loss: 0.0 +average reward score: 5.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.61s (68.06%) |Training time=0.46s (19.38%) |Others=0.30 (12.56%)|CurSamplesPerSec=13.51 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3356|ppo_ep: 1|act_loss: 0.001373291015625|cri_loss: 0.00147247314453125|unsuper_loss: 0.0 +average reward score: 5.21875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.69%) |Training time=0.41s (18.72%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3357|ppo_ep: 1|act_loss: 0.03924560546875|cri_loss: 0.02197265625|unsuper_loss: 0.0 +average reward score: 3.931640625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.27%) |Training time=0.46s (21.20%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3358|ppo_ep: 1|act_loss: 0.00650787353515625|cri_loss: 0.0037593841552734375|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.35%) |Training time=0.48s (22.11%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.46 +[2023-04-14 10:51:24,302] [INFO] [logging.py:96:log_dist] [Rank 0] step=3360, skipped=45, lr=[6.402304965645658e-06, 6.402304965645658e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:51:24,320] [INFO] [timer.py:199:stop] epoch=0/micro_step=3360/global_step=3360, RunningAvgSamplesPerSec=105.75462987127783, CurrSamplesPerSec=103.27904435641491, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:51:24,413] [INFO] [logging.py:96:log_dist] [Rank 0] step=3360, skipped=51, lr=[3.32271197098438e-06, 3.32271197098438e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3359|ppo_ep: 1|act_loss: -0.0139617919921875|cri_loss: -0.00681304931640625|unsuper_loss: 0.0 +average reward score: 5.40625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.69%) |Training time=0.47s (21.79%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3360|ppo_ep: 1|act_loss: 0.014404296875|cri_loss: 0.00774383544921875|unsuper_loss: 0.0 +average reward score: 4.80078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.28%) |Training time=0.46s (21.18%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3361|ppo_ep: 1|act_loss: -0.02081298828125|cri_loss: -0.0099029541015625|unsuper_loss: 0.0 +average reward score: 4.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.38%) |Training time=0.46s (19.43%) |Others=0.10 (4.19%)|CurSamplesPerSec=13.65 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3362|ppo_ep: 1|act_loss: 0.01715087890625|cri_loss: 0.0088348388671875|unsuper_loss: 0.0 +average reward score: 6.21875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.65%) |Training time=0.47s (21.82%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3363|ppo_ep: 1|act_loss: 0.0213470458984375|cri_loss: 0.01126861572265625|unsuper_loss: 0.0 +average reward score: 4.8359375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.58%) |Training time=0.48s (21.91%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3364|ppo_ep: 1|act_loss: -0.0362548828125|cri_loss: -0.0171661376953125|unsuper_loss: 0.0 +average reward score: 4.921875 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.91%) |Training time=0.48s (21.63%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3365|ppo_ep: 1|act_loss: -0.009552001953125|cri_loss: -0.00391387939453125|unsuper_loss: 0.0 +average reward score: 5.21875 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.21%) |Training time=0.49s (21.48%) |Others=0.10 (4.31%)|CurSamplesPerSec=14.01 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3366|ppo_ep: 1|act_loss: 0.05206298828125|cri_loss: 0.02874755859375|unsuper_loss: 0.0 +average reward score: 5.08984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.27%) |Training time=0.48s (22.21%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3367|ppo_ep: 1|act_loss: 0.013092041015625|cri_loss: 0.007740020751953125|unsuper_loss: 0.0 +average reward score: 5.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.85%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3368|ppo_ep: 1|act_loss: 0.0022411346435546875|cri_loss: 0.0022373199462890625|unsuper_loss: 0.0 +average reward score: 5.6640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.51%) |Training time=0.48s (21.94%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +[2023-04-14 10:51:46,316] [INFO] [logging.py:96:log_dist] [Rank 0] step=3370, skipped=45, lr=[6.384737725901154e-06, 6.384737725901154e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:51:46,334] [INFO] [timer.py:199:stop] epoch=0/micro_step=3370/global_step=3370, RunningAvgSamplesPerSec=105.74734784413464, CurrSamplesPerSec=107.7754519627477, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:51:46,426] [INFO] [logging.py:96:log_dist] [Rank 0] step=3370, skipped=51, lr=[3.3136170262743816e-06, 3.3136170262743816e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3369|ppo_ep: 1|act_loss: -0.03704833984375|cri_loss: -0.016998291015625|unsuper_loss: 0.0 +average reward score: 4.9765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.46s (21.27%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3370|ppo_ep: 1|act_loss: 0.003391265869140625|cri_loss: 0.0019388198852539062|unsuper_loss: 0.0 +average reward score: 4.89453125 +------------------------------------------------------------------------------------- +|E2E latency=2.47s |Gather latency=0.00s (0.00%) |Generate time=1.60s (64.53%) |Training time=0.47s (18.98%) |Others=0.41 (16.49%)|CurSamplesPerSec=12.93 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3371|ppo_ep: 1|act_loss: 0.030426025390625|cri_loss: 0.0155181884765625|unsuper_loss: 0.0 +average reward score: 5.2109375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.47s (21.48%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3372|ppo_ep: 1|act_loss: -0.01071929931640625|cri_loss: -0.004619598388671875|unsuper_loss: 0.0 +average reward score: 5.71875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.98%) |Training time=0.46s (21.37%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3373|ppo_ep: 1|act_loss: -0.0019855499267578125|cri_loss: -0.00032901763916015625|unsuper_loss: 0.0 +average reward score: 4.890625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.84%) |Training time=0.50s (22.65%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3374|ppo_ep: 1|act_loss: 0.030792236328125|cri_loss: 0.018341064453125|unsuper_loss: 0.0 +average reward score: 5.0078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.01%) |Training time=0.49s (22.41%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3375|ppo_ep: 1|act_loss: -0.026336669921875|cri_loss: -0.01236724853515625|unsuper_loss: 0.0 +average reward score: 4.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.59%) |Training time=0.45s (20.88%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3376|ppo_ep: 1|act_loss: 0.0124359130859375|cri_loss: 0.00743865966796875|unsuper_loss: 0.0 +average reward score: 5.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.66s |Gather latency=0.00s (0.00%) |Generate time=1.79s (67.30%) |Training time=0.47s (17.60%) |Others=0.40 (15.10%)|CurSamplesPerSec=12.01 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3377|ppo_ep: 1|act_loss: 0.01904296875|cri_loss: 0.010223388671875|unsuper_loss: 0.0 +average reward score: 5.49609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.68s (77.47%) |Training time=0.39s (17.97%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3378|ppo_ep: 1|act_loss: 0.022216796875|cri_loss: 0.01214599609375|unsuper_loss: 0.0 +average reward score: 4.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.22%) |Training time=0.46s (21.19%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +[2023-04-14 10:52:08,821] [INFO] [logging.py:96:log_dist] [Rank 0] step=3380, skipped=45, lr=[6.367147367080889e-06, 6.367147367080889e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:52:08,839] [INFO] [timer.py:199:stop] epoch=0/micro_step=3380/global_step=3380, RunningAvgSamplesPerSec=105.75309889489947, CurrSamplesPerSec=113.50777958381468, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:52:08,932] [INFO] [logging.py:96:log_dist] [Rank 0] step=3380, skipped=51, lr=[3.3045100217973093e-06, 3.3045100217973093e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3379|ppo_ep: 1|act_loss: -0.0112152099609375|cri_loss: -0.005474090576171875|unsuper_loss: 0.0 +average reward score: 4.94140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.84%) |Training time=0.45s (20.59%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3380|ppo_ep: 1|act_loss: 0.055023193359375|cri_loss: 0.028564453125|unsuper_loss: 0.0 +average reward score: 4.8984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.38%) |Training time=0.43s (20.03%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3381|ppo_ep: 1|act_loss: 0.0017099380493164062|cri_loss: 0.00150299072265625|unsuper_loss: 0.0 +average reward score: 5.7109375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.80%) |Training time=0.45s (20.65%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3382|ppo_ep: 1|act_loss: 0.032318115234375|cri_loss: 0.016998291015625|unsuper_loss: 0.0 +average reward score: 4.984375 +------------------------------------------------------------------------------------- +|E2E latency=2.63s |Gather latency=0.00s (0.00%) |Generate time=1.66s (63.03%) |Training time=0.41s (15.45%) |Others=0.57 (21.51%)|CurSamplesPerSec=12.17 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3383|ppo_ep: 1|act_loss: -0.01104736328125|cri_loss: -0.005268096923828125|unsuper_loss: 0.0 +average reward score: 5.41015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.86%) |Training time=0.42s (19.56%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3384|ppo_ep: 1|act_loss: -0.00569915771484375|cri_loss: -0.00243377685546875|unsuper_loss: 0.0 +average reward score: 5.8046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.71%) |Training time=0.47s (21.59%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3385|ppo_ep: 1|act_loss: -7.152557373046875e-05|cri_loss: 0.0007443428039550781|unsuper_loss: 0.0 +average reward score: 5.109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.72%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3386|ppo_ep: 1|act_loss: -0.0491943359375|cri_loss: -0.0230255126953125|unsuper_loss: 0.0 +average reward score: 5.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.90%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3387|ppo_ep: 1|act_loss: -0.036865234375|cri_loss: -0.0172271728515625|unsuper_loss: 0.0 +average reward score: 5.65234375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.95%) |Training time=0.49s (22.51%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3388|ppo_ep: 1|act_loss: -0.03228759765625|cri_loss: -0.01568603515625|unsuper_loss: 0.0 +average reward score: 5.3828125 +------------------------------------------------------------------------------------- +|E2E latency=2.44s |Gather latency=0.00s (0.00%) |Generate time=1.59s (65.28%) |Training time=0.46s (18.96%) |Others=0.38 (15.76%)|CurSamplesPerSec=13.11 |AvgSamplesPerSec=14.46 +[2023-04-14 10:52:31,218] [INFO] [logging.py:96:log_dist] [Rank 0] step=3390, skipped=45, lr=[6.349534149916417e-06, 6.349534149916417e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:52:31,236] [INFO] [timer.py:199:stop] epoch=0/micro_step=3390/global_step=3390, RunningAvgSamplesPerSec=105.76502510812344, CurrSamplesPerSec=106.6209268758078, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:52:31,328] [INFO] [logging.py:96:log_dist] [Rank 0] step=3390, skipped=51, lr=[3.2953910925409387e-06, 3.2953910925409387e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3389|ppo_ep: 1|act_loss: -0.0055389404296875|cri_loss: -0.002254486083984375|unsuper_loss: 0.0 +average reward score: 5.296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.52%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3390|ppo_ep: 1|act_loss: -0.0277252197265625|cri_loss: -0.01322174072265625|unsuper_loss: 0.0 +average reward score: 5.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.37%) |Training time=0.45s (21.01%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3391|ppo_ep: 1|act_loss: -0.03997802734375|cri_loss: -0.018768310546875|unsuper_loss: 0.0 +average reward score: 4.984375 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.69%) |Training time=0.47s (20.06%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3392|ppo_ep: 1|act_loss: 0.0065765380859375|cri_loss: 0.0040283203125|unsuper_loss: 0.0 +average reward score: 5.6171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.41%) |Training time=0.45s (21.01%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3393|ppo_ep: 1|act_loss: 0.013397216796875|cri_loss: 0.007022857666015625|unsuper_loss: 0.0 +average reward score: 5.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.80%) |Training time=0.46s (20.74%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.54 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3394|ppo_ep: 1|act_loss: 0.007198333740234375|cri_loss: 0.00415802001953125|unsuper_loss: 0.0 +average reward score: 5.375 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.72%) |Training time=0.48s (20.94%) |Others=0.10 (4.34%)|CurSamplesPerSec=14.09 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3395|ppo_ep: 1|act_loss: 0.0068206787109375|cri_loss: 0.004573822021484375|unsuper_loss: 0.0 +average reward score: 5.94921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.83%) |Training time=0.46s (21.56%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3396|ppo_ep: 1|act_loss: 0.0765380859375|cri_loss: 0.0452880859375|unsuper_loss: 0.0 +average reward score: 5.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.73%) |Training time=0.47s (21.68%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3397|ppo_ep: 1|act_loss: 0.0015411376953125|cri_loss: 0.001132965087890625|unsuper_loss: 0.0 +average reward score: 5.71875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.75%) |Training time=0.46s (21.66%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3398|ppo_ep: 1|act_loss: -0.00408935546875|cri_loss: -0.0012340545654296875|unsuper_loss: 0.0 +average reward score: 5.08984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.54%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46 +[2023-04-14 10:52:53,057] [INFO] [logging.py:96:log_dist] [Rank 0] step=3400, skipped=45, lr=[6.331898335478102e-06, 6.331898335478102e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:52:53,075] [INFO] [timer.py:199:stop] epoch=0/micro_step=3400/global_step=3400, RunningAvgSamplesPerSec=105.76780238263315, CurrSamplesPerSec=106.7389575160306, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:52:53,167] [INFO] [logging.py:96:log_dist] [Rank 0] step=3400, skipped=51, lr=[3.2862603736698014e-06, 3.2862603736698014e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3399|ppo_ep: 1|act_loss: -0.0025119781494140625|cri_loss: 0.00067901611328125|unsuper_loss: 0.0 +average reward score: 5.29296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.89%) |Training time=0.46s (21.55%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3400|ppo_ep: 1|act_loss: 0.01654052734375|cri_loss: 0.0099945068359375|unsuper_loss: 0.0 +average reward score: 4.66015625 +------------------------------------------------------------------------------------- +|E2E latency=2.50s |Gather latency=0.00s (0.00%) |Generate time=1.59s (63.56%) |Training time=0.46s (18.35%) |Others=0.45 (18.09%)|CurSamplesPerSec=12.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3401|ppo_ep: 1|act_loss: 0.03460693359375|cri_loss: 0.0185394287109375|unsuper_loss: 0.0 +average reward score: 4.87109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.83%) |Training time=0.46s (21.59%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3402|ppo_ep: 1|act_loss: 0.0011186599731445312|cri_loss: 0.0008335113525390625|unsuper_loss: 0.0 +average reward score: 5.6796875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.56%) |Training time=0.48s (21.89%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3403|ppo_ep: 1|act_loss: -0.007110595703125|cri_loss: -0.003444671630859375|unsuper_loss: 0.0 +average reward score: 5.25 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.73%) |Training time=0.47s (21.71%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3404|ppo_ep: 1|act_loss: 0.01158905029296875|cri_loss: 0.006256103515625|unsuper_loss: 0.0 +average reward score: 5.59375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.74%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3405|ppo_ep: 1|act_loss: -0.0084381103515625|cri_loss: -0.0037441253662109375|unsuper_loss: 0.0 +average reward score: 5.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.62s |Gather latency=0.00s (0.00%) |Generate time=1.59s (60.72%) |Training time=0.46s (17.66%) |Others=0.57 (21.62%)|CurSamplesPerSec=12.22 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3406|ppo_ep: 1|act_loss: 0.0013408660888671875|cri_loss: 0.0010423660278320312|unsuper_loss: 0.0 +average reward score: 5.1953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (21.99%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3407|ppo_ep: 1|act_loss: 0.00499725341796875|cri_loss: 0.0028533935546875|unsuper_loss: 0.0 +average reward score: 5.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.57%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3408|ppo_ep: 1|act_loss: -0.006114959716796875|cri_loss: -0.001445770263671875|unsuper_loss: 0.0 +average reward score: 5.77734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.61%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +[2023-04-14 10:53:15,449] [INFO] [logging.py:96:log_dist] [Rank 0] step=3410, skipped=45, lr=[6.314240185171264e-06, 6.314240185171264e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:53:15,467] [INFO] [timer.py:199:stop] epoch=0/micro_step=3410/global_step=3410, RunningAvgSamplesPerSec=105.76521709293858, CurrSamplesPerSec=104.83900745727125, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:53:15,560] [INFO] [logging.py:96:log_dist] [Rank 0] step=3410, skipped=51, lr=[3.2771180005231746e-06, 3.2771180005231746e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3409|ppo_ep: 1|act_loss: -0.02142333984375|cri_loss: -0.0105133056640625|unsuper_loss: 0.0 +average reward score: 5.875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.81%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3410|ppo_ep: 1|act_loss: -0.02252197265625|cri_loss: -0.00972747802734375|unsuper_loss: 0.0 +average reward score: 4.953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.10%) |Training time=0.46s (21.35%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3411|ppo_ep: 1|act_loss: -0.0213623046875|cri_loss: -0.0104827880859375|unsuper_loss: 0.0 +average reward score: 5.67578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.98%) |Training time=0.46s (21.46%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46 +[2023-04-14 10:53:21,887] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +[2023-04-14 10:53:21,971] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 3412|ppo_ep: 1|act_loss: -0.006343841552734375|cri_loss: -0.00311279296875|unsuper_loss: 0.0 +average reward score: 6.52734375 +------------------------------------------------------------------------------------- +|E2E latency=2.10s |Gather latency=0.00s (0.00%) |Generate time=1.59s (75.58%) |Training time=0.43s (20.28%) |Others=0.09 (4.14%)|CurSamplesPerSec=15.20 |AvgSamplesPerSec=14.46 +[2023-04-14 10:53:23,996] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +[2023-04-14 10:53:24,080] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 3413|ppo_ep: 1|act_loss: 0.0185394287109375|cri_loss: 0.011444091796875|unsuper_loss: 0.0 +average reward score: 5.8984375 +------------------------------------------------------------------------------------- +|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.59s (75.31%) |Training time=0.43s (20.55%) |Others=0.09 (4.13%)|CurSamplesPerSec=15.17 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3414|ppo_ep: 1|act_loss: -0.0077972412109375|cri_loss: -0.0034694671630859375|unsuper_loss: 0.0 +average reward score: 5.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.74%) |Training time=0.47s (21.70%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3415|ppo_ep: 1|act_loss: -0.0147552490234375|cri_loss: -0.0070343017578125|unsuper_loss: 0.0 +average reward score: 5.66796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.21%) |Training time=0.45s (21.19%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3416|ppo_ep: 1|act_loss: -0.017578125|cri_loss: -0.0079498291015625|unsuper_loss: 0.0 +average reward score: 5.12109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.50%) |Training time=0.47s (21.94%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3417|ppo_ep: 1|act_loss: 0.03759765625|cri_loss: 0.02191162109375|unsuper_loss: 0.0 +average reward score: 4.7890625 +------------------------------------------------------------------------------------- +|E2E latency=3.73s |Gather latency=0.00s (0.00%) |Generate time=1.59s (42.64%) |Training time=0.46s (12.46%) |Others=1.68 (44.90%)|CurSamplesPerSec=8.58 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3418|ppo_ep: 1|act_loss: -0.01320648193359375|cri_loss: -0.005695343017578125|unsuper_loss: 0.0 +average reward score: 5.10546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.49%) |Training time=0.45s (20.94%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45 +[2023-04-14 10:53:38,444] [INFO] [logging.py:96:log_dist] [Rank 0] step=3420, skipped=47, lr=[6.300097758976281e-06, 6.300097758976281e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:53:38,462] [INFO] [timer.py:199:stop] epoch=0/micro_step=3420/global_step=3420, RunningAvgSamplesPerSec=105.77514620401836, CurrSamplesPerSec=106.69610729889406, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:53:38,555] [INFO] [logging.py:96:log_dist] [Rank 0] step=3420, skipped=53, lr=[3.2697958019858506e-06, 3.2697958019858506e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3419|ppo_ep: 1|act_loss: 0.02777099609375|cri_loss: 0.0148773193359375|unsuper_loss: 0.0 +average reward score: 4.8984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.87%) |Training time=0.46s (21.56%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3420|ppo_ep: 1|act_loss: 0.00020694732666015625|cri_loss: 0.0003859996795654297|unsuper_loss: 0.0 +average reward score: 5.90234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.21%) |Training time=0.46s (21.24%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3421|ppo_ep: 1|act_loss: -0.0121612548828125|cri_loss: -0.005523681640625|unsuper_loss: 0.0 +average reward score: 5.30859375 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.95%) |Training time=0.46s (19.82%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3422|ppo_ep: 1|act_loss: -0.0455322265625|cri_loss: -0.02069091796875|unsuper_loss: 0.0 +average reward score: 5.1015625 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.62%) |Training time=0.51s (22.96%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.42 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3423|ppo_ep: 1|act_loss: -0.022003173828125|cri_loss: -0.01036834716796875|unsuper_loss: 0.0 +average reward score: 4.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.91%) |Training time=0.46s (21.46%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3424|ppo_ep: 1|act_loss: 0.0006990432739257812|cri_loss: 0.000621795654296875|unsuper_loss: 0.0 +average reward score: 5.796875 +------------------------------------------------------------------------------------- +|E2E latency=3.00s |Gather latency=0.00s (0.00%) |Generate time=1.59s (53.00%) |Training time=0.46s (15.49%) |Others=0.94 (31.50%)|CurSamplesPerSec=10.68 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3425|ppo_ep: 1|act_loss: -0.023040771484375|cri_loss: -0.010498046875|unsuper_loss: 0.0 +average reward score: 5.58203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.47s (21.62%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3426|ppo_ep: 1|act_loss: -0.00516510009765625|cri_loss: -0.0021610260009765625|unsuper_loss: 0.0 +average reward score: 5.96875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.91%) |Training time=0.46s (21.51%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3427|ppo_ep: 1|act_loss: 0.0189208984375|cri_loss: 0.01000213623046875|unsuper_loss: 0.0 +average reward score: 5.53515625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.46s (21.61%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3428|ppo_ep: 1|act_loss: 0.0011386871337890625|cri_loss: 0.002056121826171875|unsuper_loss: 0.0 +average reward score: 4.28515625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.08%) |Training time=0.46s (21.30%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.45 +[2023-04-14 10:54:01,026] [INFO] [logging.py:96:log_dist] [Rank 0] step=3430, skipped=47, lr=[6.2824000639017895e-06, 6.2824000639017895e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:54:01,044] [INFO] [timer.py:199:stop] epoch=0/micro_step=3430/global_step=3430, RunningAvgSamplesPerSec=105.77657682475913, CurrSamplesPerSec=108.1306645290452, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:54:01,137] [INFO] [logging.py:96:log_dist] [Rank 0] step=3430, skipped=53, lr=[3.2606327927484516e-06, 3.2606327927484516e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3429|ppo_ep: 1|act_loss: 0.0309295654296875|cri_loss: 0.0161285400390625|unsuper_loss: 0.0 +average reward score: 5.03125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.39%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3430|ppo_ep: 1|act_loss: -0.03143310546875|cri_loss: -0.0149078369140625|unsuper_loss: 0.0 +average reward score: 5.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.46s (21.47%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3431|ppo_ep: 1|act_loss: 0.007232666015625|cri_loss: 0.003753662109375|unsuper_loss: 0.0 +average reward score: 4.51171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.21%) |Training time=0.46s (21.23%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3432|ppo_ep: 1|act_loss: 0.0101165771484375|cri_loss: 0.00543212890625|unsuper_loss: 0.0 +average reward score: 4.859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.99%) |Training time=0.46s (21.45%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3433|ppo_ep: 1|act_loss: -0.0355224609375|cri_loss: -0.01580810546875|unsuper_loss: 0.0 +average reward score: 4.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.28%) |Training time=0.45s (21.12%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3434|ppo_ep: 1|act_loss: -0.021392822265625|cri_loss: -0.01043701171875|unsuper_loss: 0.0 +average reward score: 4.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.26%) |Training time=0.46s (21.16%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3435|ppo_ep: 1|act_loss: 0.015380859375|cri_loss: 0.0092010498046875|unsuper_loss: 0.0 +average reward score: 5.02734375 +------------------------------------------------------------------------------------- +|E2E latency=2.94s |Gather latency=0.00s (0.00%) |Generate time=1.76s (59.75%) |Training time=0.47s (16.12%) |Others=0.71 (24.13%)|CurSamplesPerSec=10.88 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3436|ppo_ep: 1|act_loss: -0.01514434814453125|cri_loss: -0.0072021484375|unsuper_loss: 0.0 +average reward score: 4.5234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.83%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3437|ppo_ep: 1|act_loss: -0.004932403564453125|cri_loss: -0.0020294189453125|unsuper_loss: 0.0 +average reward score: 5.94921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.48s (21.92%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3438|ppo_ep: 1|act_loss: 0.00617218017578125|cri_loss: 0.0031795501708984375|unsuper_loss: 0.0 +average reward score: 5.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.43%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45 +[2023-04-14 10:54:23,363] [INFO] [logging.py:96:log_dist] [Rank 0] step=3440, skipped=47, lr=[6.264680766642565e-06, 6.264680766642565e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:54:23,375] [INFO] [timer.py:199:stop] epoch=0/micro_step=3440/global_step=3440, RunningAvgSamplesPerSec=105.77759385861118, CurrSamplesPerSec=104.42456438456774, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:54:23,467] [INFO] [logging.py:96:log_dist] [Rank 0] step=3440, skipped=53, lr=[3.251458509098137e-06, 3.251458509098137e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3439|ppo_ep: 1|act_loss: -0.005565643310546875|cri_loss: -0.002216339111328125|unsuper_loss: 0.0 +average reward score: 5.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.68%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3440|ppo_ep: 1|act_loss: -0.02606201171875|cri_loss: -0.01192474365234375|unsuper_loss: 0.0 +average reward score: 5.6640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.95%) |Training time=0.47s (21.52%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3441|ppo_ep: 1|act_loss: 0.01318359375|cri_loss: 0.00732421875|unsuper_loss: 0.0 +average reward score: 4.41015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.23%) |Training time=0.46s (21.21%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3442|ppo_ep: 1|act_loss: 0.0325927734375|cri_loss: 0.017913818359375|unsuper_loss: 0.0 +average reward score: 5.30859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.94%) |Training time=0.46s (21.51%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3443|ppo_ep: 1|act_loss: -0.014251708984375|cri_loss: -0.006694793701171875|unsuper_loss: 0.0 +average reward score: 5.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.94%) |Training time=0.46s (21.50%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3444|ppo_ep: 1|act_loss: 0.0066375732421875|cri_loss: 0.0035572052001953125|unsuper_loss: 0.0 +average reward score: 5.30859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.26%) |Training time=0.46s (21.17%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3445|ppo_ep: 1|act_loss: 0.015838623046875|cri_loss: 0.00957489013671875|unsuper_loss: 0.0 +average reward score: 5.92578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.46s (21.24%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3446|ppo_ep: 1|act_loss: -0.013275146484375|cri_loss: -0.00611114501953125|unsuper_loss: 0.0 +average reward score: 5.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.16%) |Training time=0.46s (21.28%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3447|ppo_ep: 1|act_loss: -0.018157958984375|cri_loss: -0.0085296630859375|unsuper_loss: 0.0 +average reward score: 5.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.45%) |Training time=0.46s (21.04%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3448|ppo_ep: 1|act_loss: -0.00926971435546875|cri_loss: -0.00391387939453125|unsuper_loss: 0.0 +average reward score: 5.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.96%) |Training time=0.46s (21.49%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45 +[2023-04-14 10:54:44,967] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384 +[2023-04-14 10:54:44,967] [INFO] [logging.py:96:log_dist] [Rank 0] step=3450, skipped=48, lr=[6.248715146308981e-06, 6.248715146308981e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:54:44,967] [INFO] [timer.py:199:stop] epoch=0/micro_step=3450/global_step=3450, RunningAvgSamplesPerSec=105.783939143326, CurrSamplesPerSec=117.16853453920088, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:54:45,059] [INFO] [logging.py:96:log_dist] [Rank 0] step=3450, skipped=53, lr=[3.2422730870199237e-06, 3.2422730870199237e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3449|ppo_ep: 1|act_loss: -0.015716552734375|cri_loss: -0.00740814208984375|unsuper_loss: 0.0 +average reward score: 5.8125 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.01%) |Training time=0.43s (20.38%) |Others=0.10 (4.60%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3450|ppo_ep: 1|act_loss: 0.03143310546875|cri_loss: 0.0171966552734375|unsuper_loss: 0.0 +average reward score: 4.921875 +------------------------------------------------------------------------------------- +|E2E latency=2.56s |Gather latency=0.00s (0.00%) |Generate time=1.71s (66.84%) |Training time=0.50s (19.54%) |Others=0.35 (13.62%)|CurSamplesPerSec=12.48 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3451|ppo_ep: 1|act_loss: 0.02996826171875|cri_loss: 0.01555633544921875|unsuper_loss: 0.0 +average reward score: 4.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.75%) |Training time=0.45s (19.91%) |Others=0.10 (4.34%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3452|ppo_ep: 1|act_loss: 0.035980224609375|cri_loss: 0.01849365234375|unsuper_loss: 0.0 +average reward score: 4.40625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.17%) |Training time=0.46s (21.26%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3453|ppo_ep: 1|act_loss: -0.0020885467529296875|cri_loss: -0.000255584716796875|unsuper_loss: 0.0 +average reward score: 6.40234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.46s (21.18%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3454|ppo_ep: 1|act_loss: -0.0262298583984375|cri_loss: -0.0119476318359375|unsuper_loss: 0.0 +average reward score: 4.9609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.90%) |Training time=0.47s (21.55%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3455|ppo_ep: 1|act_loss: -0.02520751953125|cri_loss: -0.01177215576171875|unsuper_loss: 0.0 +average reward score: 4.6328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.39%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3456|ppo_ep: 1|act_loss: -0.0152587890625|cri_loss: -0.0072784423828125|unsuper_loss: 0.0 +average reward score: 5.41015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.17%) |Training time=0.46s (21.27%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3457|ppo_ep: 1|act_loss: 0.0140533447265625|cri_loss: 0.00745391845703125|unsuper_loss: 0.0 +average reward score: 5.16015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.94%) |Training time=0.47s (21.52%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3458|ppo_ep: 1|act_loss: 0.0024566650390625|cri_loss: 0.0014514923095703125|unsuper_loss: 0.0 +average reward score: 5.3515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.80%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.45 +[2023-04-14 10:55:07,050] [INFO] [logging.py:96:log_dist] [Rank 0] step=3460, skipped=48, lr=[6.230955528740993e-06, 6.230955528740993e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:55:07,068] [INFO] [timer.py:199:stop] epoch=0/micro_step=3460/global_step=3460, RunningAvgSamplesPerSec=105.7849063342375, CurrSamplesPerSec=107.60592251472772, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:55:07,161] [INFO] [logging.py:96:log_dist] [Rank 0] step=3460, skipped=53, lr=[3.2330766626639233e-06, 3.2330766626639233e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3459|ppo_ep: 1|act_loss: -0.0158843994140625|cri_loss: -0.006961822509765625|unsuper_loss: 0.0 +average reward score: 5.1796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.31%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3460|ppo_ep: 1|act_loss: -0.018280029296875|cri_loss: -0.00885009765625|unsuper_loss: 0.0 +average reward score: 4.515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.95%) |Training time=0.46s (21.51%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3461|ppo_ep: 1|act_loss: 0.02325439453125|cri_loss: 0.0117645263671875|unsuper_loss: 0.0 +average reward score: 5.375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.22%) |Training time=0.46s (21.22%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3462|ppo_ep: 1|act_loss: -0.0025615692138671875|cri_loss: -0.0010423660278320312|unsuper_loss: 0.0 +average reward score: 4.71875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.61%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3463|ppo_ep: 1|act_loss: 0.0088958740234375|cri_loss: 0.0046844482421875|unsuper_loss: 0.0 +average reward score: 5.21875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.57%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3464|ppo_ep: 1|act_loss: -0.01629638671875|cri_loss: -0.00799560546875|unsuper_loss: 0.0 +average reward score: 5.26171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.26%) |Training time=0.48s (22.18%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3465|ppo_ep: 1|act_loss: 0.009490966796875|cri_loss: 0.0052642822265625|unsuper_loss: 0.0 +average reward score: 5.73828125 +------------------------------------------------------------------------------------- +|E2E latency=3.12s |Gather latency=0.00s (0.00%) |Generate time=1.61s (51.60%) |Training time=0.51s (16.36%) |Others=1.00 (32.04%)|CurSamplesPerSec=10.24 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3466|ppo_ep: 1|act_loss: 0.0145111083984375|cri_loss: 0.0078887939453125|unsuper_loss: 0.0 +average reward score: 5.1015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.73%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3467|ppo_ep: 1|act_loss: -0.015533447265625|cri_loss: -0.007503509521484375|unsuper_loss: 0.0 +average reward score: 4.9296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (22.07%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3468|ppo_ep: 1|act_loss: -0.014312744140625|cri_loss: -0.006694793701171875|unsuper_loss: 0.0 +average reward score: 4.9609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.48s (22.07%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45 +[2023-04-14 10:55:29,606] [INFO] [logging.py:96:log_dist] [Rank 0] step=3470, skipped=48, lr=[6.213175071520385e-06, 6.213175071520385e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:55:29,625] [INFO] [timer.py:199:stop] epoch=0/micro_step=3470/global_step=3470, RunningAvgSamplesPerSec=105.77427848375432, CurrSamplesPerSec=98.66774093949864, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:55:29,717] [INFO] [logging.py:96:log_dist] [Rank 0] step=3470, skipped=53, lr=[3.2238693723433317e-06, 3.2238693723433317e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3469|ppo_ep: 1|act_loss: -0.01213836669921875|cri_loss: -0.005840301513671875|unsuper_loss: 0.0 +average reward score: 4.79296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.97%) |Training time=0.49s (22.50%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3470|ppo_ep: 1|act_loss: -0.0100555419921875|cri_loss: -0.004817962646484375|unsuper_loss: 0.0 +average reward score: 5.42578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.14%) |Training time=0.48s (22.30%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3471|ppo_ep: 1|act_loss: -0.0176544189453125|cri_loss: -0.007389068603515625|unsuper_loss: 0.0 +average reward score: 5.41015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.39%) |Training time=0.48s (22.05%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3472|ppo_ep: 1|act_loss: -0.0164794921875|cri_loss: -0.007843017578125|unsuper_loss: 0.0 +average reward score: 4.82421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.91%) |Training time=0.49s (22.55%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3473|ppo_ep: 1|act_loss: 0.03594970703125|cri_loss: 0.018707275390625|unsuper_loss: 0.0 +average reward score: 4.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.48s (22.05%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3474|ppo_ep: 1|act_loss: 0.01245880126953125|cri_loss: 0.0072021484375|unsuper_loss: 0.0 +average reward score: 5.10546875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.38%) |Training time=0.48s (22.12%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3475|ppo_ep: 1|act_loss: 0.016845703125|cri_loss: 0.00959014892578125|unsuper_loss: 0.0 +average reward score: 5.5859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.35%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3476|ppo_ep: 1|act_loss: 0.026947021484375|cri_loss: 0.013885498046875|unsuper_loss: 0.0 +average reward score: 5.765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.04%) |Training time=0.49s (22.42%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3477|ppo_ep: 1|act_loss: -0.006870269775390625|cri_loss: -0.00278472900390625|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.52%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3478|ppo_ep: 1|act_loss: -0.011322021484375|cri_loss: -0.00521087646484375|unsuper_loss: 0.0 +average reward score: 5.41015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.55%) |Training time=0.47s (21.90%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.45 +[2023-04-14 10:55:51,216] [INFO] [logging.py:96:log_dist] [Rank 0] step=3480, skipped=48, lr=[6.195374038196429e-06, 6.195374038196429e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:55:51,234] [INFO] [timer.py:199:stop] epoch=0/micro_step=3480/global_step=3480, RunningAvgSamplesPerSec=105.76188277060592, CurrSamplesPerSec=105.97196140666698, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:55:51,378] [INFO] [logging.py:96:log_dist] [Rank 0] step=3480, skipped=53, lr=[3.2146513525324026e-06, 3.2146513525324026e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3479|ppo_ep: 1|act_loss: -0.03985595703125|cri_loss: -0.0194244384765625|unsuper_loss: 0.0 +average reward score: 5.69921875 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.28%) |Training time=0.46s (21.00%) |Others=0.15 (6.72%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3480|ppo_ep: 1|act_loss: -0.005619049072265625|cri_loss: -0.0025501251220703125|unsuper_loss: 0.0 +average reward score: 4.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.56%) |Training time=0.46s (19.97%) |Others=0.13 (5.47%)|CurSamplesPerSec=13.75 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3481|ppo_ep: 1|act_loss: -0.0032958984375|cri_loss: -0.0014524459838867188|unsuper_loss: 0.0 +average reward score: 5.3125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.86%) |Training time=0.47s (21.33%) |Others=0.11 (4.80%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3482|ppo_ep: 1|act_loss: -0.0078582763671875|cri_loss: -0.0034637451171875|unsuper_loss: 0.0 +average reward score: 5.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.47s (21.95%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.45 +epoch: 0|step: 3483|ppo_ep: 1|act_loss: 0.001445770263671875|cri_loss: 0.0008373260498046875|unsuper_loss: 0.0 +average reward score: 5.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.73%) |Training time=0.47s (21.71%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3484|ppo_ep: 1|act_loss: -0.03155517578125|cri_loss: -0.0147247314453125|unsuper_loss: 0.0 +average reward score: 5.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.37%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3485|ppo_ep: 1|act_loss: -0.00524139404296875|cri_loss: -0.002017974853515625|unsuper_loss: 0.0 +average reward score: 4.71875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.47s (22.08%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3486|ppo_ep: 1|act_loss: 0.011993408203125|cri_loss: 0.006175994873046875|unsuper_loss: 0.0 +average reward score: 5.54296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.14%) |Training time=0.48s (22.31%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3487|ppo_ep: 1|act_loss: 0.0201263427734375|cri_loss: 0.01052093505859375|unsuper_loss: 0.0 +average reward score: 5.1328125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.58%) |Training time=0.47s (21.84%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3488|ppo_ep: 1|act_loss: 0.029449462890625|cri_loss: 0.01541900634765625|unsuper_loss: 0.0 +average reward score: 4.85546875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.68%) |Training time=0.47s (21.74%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46 +[2023-04-14 10:56:12,986] [INFO] [logging.py:96:log_dist] [Rank 0] step=3490, skipped=48, lr=[6.177552692623376e-06, 6.177552692623376e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:56:13,005] [INFO] [timer.py:199:stop] epoch=0/micro_step=3490/global_step=3490, RunningAvgSamplesPerSec=105.75557515461509, CurrSamplesPerSec=105.23719384686797, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:56:13,097] [INFO] [logging.py:96:log_dist] [Rank 0] step=3490, skipped=53, lr=[3.2054227398644253e-06, 3.2054227398644253e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3489|ppo_ep: 1|act_loss: -0.0174713134765625|cri_loss: -0.00785064697265625|unsuper_loss: 0.0 +average reward score: 5.80859375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.66%) |Training time=0.47s (21.77%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3490|ppo_ep: 1|act_loss: -0.029205322265625|cri_loss: -0.0137939453125|unsuper_loss: 0.0 +average reward score: 5.65234375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.54%) |Training time=0.47s (21.89%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3491|ppo_ep: 1|act_loss: 0.018524169921875|cri_loss: 0.01007843017578125|unsuper_loss: 0.0 +average reward score: 4.5546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.33%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3492|ppo_ep: 1|act_loss: -0.0187835693359375|cri_loss: -0.00879669189453125|unsuper_loss: 0.0 +average reward score: 5.6171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.91%) |Training time=0.46s (21.26%) |Others=0.10 (4.83%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3493|ppo_ep: 1|act_loss: -0.01885986328125|cri_loss: -0.0089111328125|unsuper_loss: 0.0 +average reward score: 4.97265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.84%) |Training time=0.46s (21.59%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3494|ppo_ep: 1|act_loss: 0.0062103271484375|cri_loss: 0.0032501220703125|unsuper_loss: 0.0 +average reward score: 5.63671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.31%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3495|ppo_ep: 1|act_loss: 0.005767822265625|cri_loss: 0.0030612945556640625|unsuper_loss: 0.0 +average reward score: 4.77734375 +------------------------------------------------------------------------------------- +|E2E latency=2.47s |Gather latency=0.00s (0.00%) |Generate time=1.65s (66.68%) |Training time=0.59s (23.83%) |Others=0.23 (9.48%)|CurSamplesPerSec=12.94 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3496|ppo_ep: 1|act_loss: 0.029144287109375|cri_loss: 0.01495361328125|unsuper_loss: 0.0 +average reward score: 5.16015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.47s (21.57%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3497|ppo_ep: 1|act_loss: -0.01514434814453125|cri_loss: -0.0066375732421875|unsuper_loss: 0.0 +average reward score: 4.75390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.70%) |Training time=0.47s (21.75%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3498|ppo_ep: 1|act_loss: -0.020843505859375|cri_loss: -0.00994110107421875|unsuper_loss: 0.0 +average reward score: 4.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.48s (22.09%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +[2023-04-14 10:56:34,865] [INFO] [logging.py:96:log_dist] [Rank 0] step=3500, skipped=48, lr=[6.159711298956566e-06, 6.159711298956566e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:56:34,883] [INFO] [timer.py:199:stop] epoch=0/micro_step=3500/global_step=3500, RunningAvgSamplesPerSec=105.74144100983754, CurrSamplesPerSec=101.71870384138222, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:56:34,976] [INFO] [logging.py:96:log_dist] [Rank 0] step=3500, skipped=53, lr=[3.1961836711297046e-06, 3.1961836711297046e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3499|ppo_ep: 1|act_loss: 0.0005273818969726562|cri_loss: 0.0006098747253417969|unsuper_loss: 0.0 +average reward score: 4.453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (22.08%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3500|ppo_ep: 1|act_loss: 0.0246734619140625|cri_loss: 0.01354217529296875|unsuper_loss: 0.0 +average reward score: 5.3203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.30%) |Training time=0.48s (22.15%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3501|ppo_ep: 1|act_loss: -0.003116607666015625|cri_loss: -0.001251220703125|unsuper_loss: 0.0 +average reward score: 5.65625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.58%) |Training time=0.48s (22.14%) |Others=0.11 (5.28%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3502|ppo_ep: 1|act_loss: -0.0015268325805664062|cri_loss: -0.0006113052368164062|unsuper_loss: 0.0 +average reward score: 5.3046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.25%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3503|ppo_ep: 1|act_loss: -0.009063720703125|cri_loss: -0.0035953521728515625|unsuper_loss: 0.0 +average reward score: 5.234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.45%) |Training time=0.47s (22.00%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3504|ppo_ep: 1|act_loss: -0.01104736328125|cri_loss: -0.005390167236328125|unsuper_loss: 0.0 +average reward score: 5.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.08%) |Training time=0.48s (22.37%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3505|ppo_ep: 1|act_loss: 0.01168060302734375|cri_loss: 0.00601959228515625|unsuper_loss: 0.0 +average reward score: 4.6953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (22.02%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3506|ppo_ep: 1|act_loss: -0.01763916015625|cri_loss: -0.00856781005859375|unsuper_loss: 0.0 +average reward score: 6.0234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.94%) |Training time=0.49s (22.50%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3507|ppo_ep: 1|act_loss: -0.0230255126953125|cri_loss: -0.01116180419921875|unsuper_loss: 0.0 +average reward score: 4.8515625 +------------------------------------------------------------------------------------- +|E2E latency=2.44s |Gather latency=0.00s (0.00%) |Generate time=1.59s (65.11%) |Training time=0.47s (19.31%) |Others=0.38 (15.58%)|CurSamplesPerSec=13.13 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3508|ppo_ep: 1|act_loss: -0.006900787353515625|cri_loss: -0.002567291259765625|unsuper_loss: 0.0 +average reward score: 5.2734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.61%) |Others=0.10 (4.79%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +[2023-04-14 10:56:56,750] [INFO] [logging.py:96:log_dist] [Rank 0] step=3510, skipped=48, lr=[6.141850121648488e-06, 6.141850121648488e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:56:56,769] [INFO] [timer.py:199:stop] epoch=0/micro_step=3510/global_step=3510, RunningAvgSamplesPerSec=105.72984477658514, CurrSamplesPerSec=104.4290330386313, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:56:56,861] [INFO] [logging.py:96:log_dist] [Rank 0] step=3510, skipped=53, lr=[3.186934283273525e-06, 3.186934283273525e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3509|ppo_ep: 1|act_loss: -0.028656005859375|cri_loss: -0.01386260986328125|unsuper_loss: 0.0 +average reward score: 5.9375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.69%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3510|ppo_ep: 1|act_loss: 0.002899169921875|cri_loss: 0.0016813278198242188|unsuper_loss: 0.0 +average reward score: 5.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.68s (74.55%) |Training time=0.47s (21.08%) |Others=0.10 (4.37%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3511|ppo_ep: 1|act_loss: 0.035186767578125|cri_loss: 0.018035888671875|unsuper_loss: 0.0 +average reward score: 4.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.30%) |Training time=0.47s (20.47%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3512|ppo_ep: 1|act_loss: 0.01568603515625|cri_loss: 0.00904083251953125|unsuper_loss: 0.0 +average reward score: 4.7109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.48%) |Training time=0.47s (21.95%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3513|ppo_ep: 1|act_loss: 0.00518035888671875|cri_loss: 0.003559112548828125|unsuper_loss: 0.0 +average reward score: 5.59765625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.42%) |Training time=0.47s (22.01%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46 +[2023-04-14 10:57:07,867] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 3514|ppo_ep: 1|act_loss: 0.0416259765625|cri_loss: 0.0216064453125|unsuper_loss: 0.0 +average reward score: 5.515625 +------------------------------------------------------------------------------------- +|E2E latency=2.59s |Gather latency=0.00s (0.00%) |Generate time=1.58s (60.83%) |Training time=0.48s (18.46%) |Others=0.54 (20.70%)|CurSamplesPerSec=12.34 |AvgSamplesPerSec=14.46 +[2023-04-14 10:57:10,455] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 3515|ppo_ep: 1|act_loss: 0.0184783935546875|cri_loss: 0.0100860595703125|unsuper_loss: 0.0 +average reward score: 4.76953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.74%) |Training time=0.47s (22.17%) |Others=0.09 (4.09%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3516|ppo_ep: 1|act_loss: -0.000457763671875|cri_loss: 0.0002994537353515625|unsuper_loss: 0.0 +average reward score: 5.1171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.12%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3517|ppo_ep: 1|act_loss: 0.023529052734375|cri_loss: 0.01279449462890625|unsuper_loss: 0.0 +average reward score: 4.50390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.26%) |Training time=0.48s (22.12%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3518|ppo_ep: 1|act_loss: 0.00598907470703125|cri_loss: 0.003543853759765625|unsuper_loss: 0.0 +average reward score: 4.49609375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.51%) |Training time=0.47s (21.92%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46 +[2023-04-14 10:57:18,957] [INFO] [logging.py:96:log_dist] [Rank 0] step=3520, skipped=48, lr=[6.123969425444881e-06, 6.123969425444881e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:57:18,976] [INFO] [timer.py:199:stop] epoch=0/micro_step=3520/global_step=3520, RunningAvgSamplesPerSec=105.7193975041583, CurrSamplesPerSec=99.69532912568643, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:57:19,068] [INFO] [logging.py:96:log_dist] [Rank 0] step=3520, skipped=55, lr=[3.1795274353460633e-06, 3.1795274353460633e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3519|ppo_ep: 1|act_loss: -0.018646240234375|cri_loss: -0.00894927978515625|unsuper_loss: 0.0 +average reward score: 4.77734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.48s (22.39%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3520|ppo_ep: 1|act_loss: -0.04638671875|cri_loss: -0.0218658447265625|unsuper_loss: 0.0 +average reward score: 4.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.39%) |Training time=0.47s (22.05%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3521|ppo_ep: 1|act_loss: -0.018463134765625|cri_loss: -0.00884246826171875|unsuper_loss: 0.0 +average reward score: 5.203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.34%) |Training time=0.48s (22.10%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3522|ppo_ep: 1|act_loss: -0.014129638671875|cri_loss: -0.0068817138671875|unsuper_loss: 0.0 +average reward score: 5.65625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.49%) |Training time=0.47s (21.95%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3523|ppo_ep: 1|act_loss: -0.01030731201171875|cri_loss: -0.00466156005859375|unsuper_loss: 0.0 +average reward score: 5.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.37%) |Training time=0.48s (22.09%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3524|ppo_ep: 1|act_loss: 0.0089874267578125|cri_loss: 0.005382537841796875|unsuper_loss: 0.0 +average reward score: 5.046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.99%) |Training time=0.49s (22.46%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3525|ppo_ep: 1|act_loss: 0.0082855224609375|cri_loss: 0.004657745361328125|unsuper_loss: 0.0 +average reward score: 5.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.74s (75.04%) |Training time=0.48s (20.71%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3526|ppo_ep: 1|act_loss: 0.04345703125|cri_loss: 0.02294921875|unsuper_loss: 0.0 +average reward score: 4.01953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.48s (22.01%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3527|ppo_ep: 1|act_loss: 0.0106353759765625|cri_loss: 0.00634002685546875|unsuper_loss: 0.0 +average reward score: 5.14453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.48s (22.30%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3528|ppo_ep: 1|act_loss: 0.0018854141235351562|cri_loss: 0.0011796951293945312|unsuper_loss: 0.0 +average reward score: 5.015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.10%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +[2023-04-14 10:57:40,713] [INFO] [logging.py:96:log_dist] [Rank 0] step=3530, skipped=48, lr=[6.106069475380793e-06, 6.106069475380793e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:57:40,731] [INFO] [timer.py:199:stop] epoch=0/micro_step=3530/global_step=3530, RunningAvgSamplesPerSec=105.70634007207184, CurrSamplesPerSec=100.36808836011082, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:57:40,823] [INFO] [logging.py:96:log_dist] [Rank 0] step=3530, skipped=55, lr=[3.1702598186603152e-06, 3.1702598186603152e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3529|ppo_ep: 1|act_loss: 0.022003173828125|cri_loss: 0.011566162109375|unsuper_loss: 0.0 +average reward score: 6.10546875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.45%) |Training time=0.48s (22.08%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3530|ppo_ep: 1|act_loss: -0.027252197265625|cri_loss: -0.01322174072265625|unsuper_loss: 0.0 +average reward score: 5.08984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.25%) |Training time=0.48s (22.20%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3531|ppo_ep: 1|act_loss: -0.0112152099609375|cri_loss: -0.00482940673828125|unsuper_loss: 0.0 +average reward score: 5.36328125 +------------------------------------------------------------------------------------- +|E2E latency=2.54s |Gather latency=0.00s (0.00%) |Generate time=1.58s (62.12%) |Training time=0.49s (19.26%) |Others=0.47 (18.62%)|CurSamplesPerSec=12.59 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3532|ppo_ep: 1|act_loss: -0.002079010009765625|cri_loss: -0.0007619857788085938|unsuper_loss: 0.0 +average reward score: 5.5546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.80%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3533|ppo_ep: 1|act_loss: -0.0006546974182128906|cri_loss: -0.00023233890533447266|unsuper_loss: 0.0 +average reward score: 5.0 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.57%) |Training time=0.47s (21.87%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3534|ppo_ep: 1|act_loss: 0.02215576171875|cri_loss: 0.011871337890625|unsuper_loss: 0.0 +average reward score: 5.40625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.86%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3535|ppo_ep: 1|act_loss: -0.00395965576171875|cri_loss: -0.0013418197631835938|unsuper_loss: 0.0 +average reward score: 4.8125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.76%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3536|ppo_ep: 1|act_loss: -0.0135040283203125|cri_loss: -0.00617218017578125|unsuper_loss: 0.0 +average reward score: 5.51953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.73%) |Training time=0.47s (21.69%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3537|ppo_ep: 1|act_loss: 0.0075225830078125|cri_loss: 0.004032135009765625|unsuper_loss: 0.0 +average reward score: 6.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.81s |Gather latency=0.00s (0.00%) |Generate time=1.59s (56.84%) |Training time=0.47s (16.76%) |Others=0.74 (26.40%)|CurSamplesPerSec=11.41 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3538|ppo_ep: 1|act_loss: -0.02703857421875|cri_loss: -0.012420654296875|unsuper_loss: 0.0 +average reward score: 5.328125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.61%) |Training time=0.48s (21.90%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.46 +[2023-04-14 10:58:03,434] [INFO] [logging.py:96:log_dist] [Rank 0] step=3540, skipped=48, lr=[6.0881505367766705e-06, 6.0881505367766705e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:58:03,452] [INFO] [timer.py:199:stop] epoch=0/micro_step=3540/global_step=3540, RunningAvgSamplesPerSec=105.69823743299148, CurrSamplesPerSec=104.70218269755831, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:58:03,544] [INFO] [logging.py:96:log_dist] [Rank 0] step=3540, skipped=55, lr=[3.1609822671071404e-06, 3.1609822671071404e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3539|ppo_ep: 1|act_loss: -0.00218963623046875|cri_loss: -0.0005474090576171875|unsuper_loss: 0.0 +average reward score: 5.3125 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.91%) |Training time=0.47s (20.75%) |Others=0.10 (4.33%)|CurSamplesPerSec=14.19 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3540|ppo_ep: 1|act_loss: 0.0013484954833984375|cri_loss: 0.00098419189453125|unsuper_loss: 0.0 +average reward score: 5.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.53%) |Training time=0.49s (21.99%) |Others=0.14 (6.48%)|CurSamplesPerSec=14.44 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3541|ppo_ep: 1|act_loss: 0.004383087158203125|cri_loss: 0.00302886962890625|unsuper_loss: 0.0 +average reward score: 4.9453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.50%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3542|ppo_ep: 1|act_loss: 0.0372314453125|cri_loss: 0.0189666748046875|unsuper_loss: 0.0 +average reward score: 6.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.70%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3543|ppo_ep: 1|act_loss: -0.0019016265869140625|cri_loss: -0.0007863044738769531|unsuper_loss: 0.0 +average reward score: 4.609375 +------------------------------------------------------------------------------------- +|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.83%) |Training time=0.43s (20.51%) |Others=0.10 (4.65%)|CurSamplesPerSec=15.12 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3544|ppo_ep: 1|act_loss: 0.00246429443359375|cri_loss: 0.00146484375|unsuper_loss: 0.0 +average reward score: 5.890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.97%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3545|ppo_ep: 1|act_loss: 0.002471923828125|cri_loss: 0.0014495849609375|unsuper_loss: 0.0 +average reward score: 4.5234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.22%) |Training time=0.48s (22.23%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3546|ppo_ep: 1|act_loss: -1.728534698486328e-05|cri_loss: 0.00010848045349121094|unsuper_loss: 0.0 +average reward score: 5.078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.18%) |Training time=0.48s (22.27%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3547|ppo_ep: 1|act_loss: -0.004550933837890625|cri_loss: -0.002094268798828125|unsuper_loss: 0.0 +average reward score: 4.3203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.00%) |Training time=0.49s (22.46%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3548|ppo_ep: 1|act_loss: -0.0041656494140625|cri_loss: -0.001705169677734375|unsuper_loss: 0.0 +average reward score: 5.09765625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.18%) |Training time=0.48s (22.28%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46 +[2023-04-14 10:58:25,043] [INFO] [logging.py:96:log_dist] [Rank 0] step=3550, skipped=48, lr=[6.070212875234407e-06, 6.070212875234407e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:58:25,628] [INFO] [timer.py:199:stop] epoch=0/micro_step=3550/global_step=3550, RunningAvgSamplesPerSec=105.63486237523144, CurrSamplesPerSec=36.294179826075954, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:58:25,721] [INFO] [logging.py:96:log_dist] [Rank 0] step=3550, skipped=55, lr=[3.151694918202235e-06, 3.151694918202235e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3549|ppo_ep: 1|act_loss: -0.04327392578125|cri_loss: -0.0212249755859375|unsuper_loss: 0.0 +average reward score: 5.03515625 +------------------------------------------------------------------------------------- +|E2E latency=2.72s |Gather latency=0.00s (0.00%) |Generate time=1.58s (58.02%) |Training time=1.04s (38.39%) |Others=0.10 (3.59%)|CurSamplesPerSec=11.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3550|ppo_ep: 1|act_loss: -0.011199951171875|cri_loss: -0.005176544189453125|unsuper_loss: 0.0 +average reward score: 4.8984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.21%) |Training time=0.48s (22.23%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3551|ppo_ep: 1|act_loss: -0.00017547607421875|cri_loss: 0.0007619857788085938|unsuper_loss: 0.0 +average reward score: 4.36328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.06%) |Training time=0.48s (22.37%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3552|ppo_ep: 1|act_loss: 0.0054168701171875|cri_loss: 0.00298309326171875|unsuper_loss: 0.0 +average reward score: 4.94140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.11%) |Training time=0.46s (21.29%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3553|ppo_ep: 1|act_loss: -0.0256500244140625|cri_loss: -0.0125885009765625|unsuper_loss: 0.0 +average reward score: 4.7578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.74%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3554|ppo_ep: 1|act_loss: 0.0131378173828125|cri_loss: 0.007228851318359375|unsuper_loss: 0.0 +average reward score: 5.15625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.68%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3555|ppo_ep: 1|act_loss: 0.0072784423828125|cri_loss: 0.0038852691650390625|unsuper_loss: 0.0 +average reward score: 4.77734375 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.74s (75.03%) |Training time=0.48s (20.73%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3556|ppo_ep: 1|act_loss: 0.004425048828125|cri_loss: 0.0025787353515625|unsuper_loss: 0.0 +average reward score: 5.47265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.67%) |Training time=0.47s (21.82%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3557|ppo_ep: 1|act_loss: 0.022064208984375|cri_loss: 0.0125885009765625|unsuper_loss: 0.0 +average reward score: 5.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.55%) |Training time=0.47s (21.90%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3558|ppo_ep: 1|act_loss: -0.018798828125|cri_loss: -0.0089874267578125|unsuper_loss: 0.0 +average reward score: 4.953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.74%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +[2023-04-14 10:58:47,342] [INFO] [logging.py:96:log_dist] [Rank 0] step=3560, skipped=48, lr=[6.0522567566334204e-06, 6.0522567566334204e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:58:47,360] [INFO] [timer.py:199:stop] epoch=0/micro_step=3560/global_step=3560, RunningAvgSamplesPerSec=105.62931678444558, CurrSamplesPerSec=108.95321962132799, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:58:47,453] [INFO] [logging.py:96:log_dist] [Rank 0] step=3560, skipped=55, lr=[3.1423979096065134e-06, 3.1423979096065134e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3559|ppo_ep: 1|act_loss: -0.027008056640625|cri_loss: -0.0125732421875|unsuper_loss: 0.0 +average reward score: 4.98046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.22%) |Training time=0.46s (21.23%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3560|ppo_ep: 1|act_loss: 0.0114288330078125|cri_loss: 0.006137847900390625|unsuper_loss: 0.0 +average reward score: 5.59765625 +------------------------------------------------------------------------------------- +|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.59s (67.66%) |Training time=0.46s (19.78%) |Others=0.30 (12.57%)|CurSamplesPerSec=13.61 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3561|ppo_ep: 1|act_loss: 0.0050811767578125|cri_loss: 0.002773284912109375|unsuper_loss: 0.0 +average reward score: 4.75 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.95%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3562|ppo_ep: 1|act_loss: -0.010101318359375|cri_loss: -0.00379180908203125|unsuper_loss: 0.0 +average reward score: 4.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.76%) |Training time=0.47s (21.66%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3563|ppo_ep: 1|act_loss: 0.033172607421875|cri_loss: 0.017578125|unsuper_loss: 0.0 +average reward score: 5.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.11%) |Training time=0.46s (21.32%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3564|ppo_ep: 1|act_loss: 0.005565643310546875|cri_loss: 0.003925323486328125|unsuper_loss: 0.0 +average reward score: 5.7421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.84%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3565|ppo_ep: 1|act_loss: 0.01776123046875|cri_loss: 0.0090789794921875|unsuper_loss: 0.0 +average reward score: 4.484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.68%) |Training time=0.47s (21.76%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3566|ppo_ep: 1|act_loss: 0.0094451904296875|cri_loss: 0.00511932373046875|unsuper_loss: 0.0 +average reward score: 5.0 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.33%) |Training time=0.45s (21.06%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3567|ppo_ep: 1|act_loss: -0.00640869140625|cri_loss: -0.00267791748046875|unsuper_loss: 0.0 +average reward score: 4.79296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.07%) |Training time=0.46s (21.07%) |Others=0.11 (4.85%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3568|ppo_ep: 1|act_loss: -0.02154541015625|cri_loss: -0.01007843017578125|unsuper_loss: 0.0 +average reward score: 4.8984375 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.62s (72.70%) |Training time=0.51s (22.87%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.40 |AvgSamplesPerSec=14.46 +[2023-04-14 10:59:09,144] [INFO] [logging.py:96:log_dist] [Rank 0] step=3570, skipped=48, lr=[6.0342824471267055e-06, 6.0342824471267055e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:59:09,162] [INFO] [timer.py:199:stop] epoch=0/micro_step=3570/global_step=3570, RunningAvgSamplesPerSec=105.63078928049823, CurrSamplesPerSec=105.9774002645137, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:59:09,255] [INFO] [logging.py:96:log_dist] [Rank 0] step=3570, skipped=55, lr=[3.1330913791240707e-06, 3.1330913791240707e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3569|ppo_ep: 1|act_loss: -0.0247802734375|cri_loss: -0.01129150390625|unsuper_loss: 0.0 +average reward score: 5.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.58%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3570|ppo_ep: 1|act_loss: -0.0223846435546875|cri_loss: -0.01056671142578125|unsuper_loss: 0.0 +average reward score: 5.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.66%) |Training time=0.46s (19.63%) |Others=0.13 (5.72%)|CurSamplesPerSec=13.60 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3571|ppo_ep: 1|act_loss: 0.016448974609375|cri_loss: 0.00888824462890625|unsuper_loss: 0.0 +average reward score: 4.62890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.47s (21.94%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3572|ppo_ep: 1|act_loss: -0.0006151199340820312|cri_loss: 0.00021076202392578125|unsuper_loss: 0.0 +average reward score: 5.0234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.78%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3573|ppo_ep: 1|act_loss: -0.006267547607421875|cri_loss: -0.0024242401123046875|unsuper_loss: 0.0 +average reward score: 5.28515625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.29%) |Training time=0.45s (21.14%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3574|ppo_ep: 1|act_loss: -0.027252197265625|cri_loss: -0.01296234130859375|unsuper_loss: 0.0 +average reward score: 5.60546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.82%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3575|ppo_ep: 1|act_loss: -0.00567626953125|cri_loss: -0.002521514892578125|unsuper_loss: 0.0 +average reward score: 5.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.72%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3576|ppo_ep: 1|act_loss: -0.017608642578125|cri_loss: -0.0083770751953125|unsuper_loss: 0.0 +average reward score: 4.99609375 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.59s (68.85%) |Training time=0.46s (20.14%) |Others=0.25 (11.01%)|CurSamplesPerSec=13.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3577|ppo_ep: 1|act_loss: 0.00687408447265625|cri_loss: 0.0038509368896484375|unsuper_loss: 0.0 +average reward score: 4.546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.59%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3578|ppo_ep: 1|act_loss: -0.0240020751953125|cri_loss: -0.01154327392578125|unsuper_loss: 0.0 +average reward score: 5.15234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.70%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46 +[2023-04-14 10:59:31,042] [INFO] [logging.py:96:log_dist] [Rank 0] step=3580, skipped=48, lr=[6.016290213136888e-06, 6.016290213136888e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:59:31,060] [INFO] [timer.py:199:stop] epoch=0/micro_step=3580/global_step=3580, RunningAvgSamplesPerSec=105.62996466221267, CurrSamplesPerSec=105.79012340813594, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:59:31,152] [INFO] [logging.py:96:log_dist] [Rank 0] step=3580, skipped=55, lr=[3.1237754647001396e-06, 3.1237754647001396e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3579|ppo_ep: 1|act_loss: 0.00429534912109375|cri_loss: 0.00231170654296875|unsuper_loss: 0.0 +average reward score: 5.15234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.60%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3580|ppo_ep: 1|act_loss: 0.0301055908203125|cri_loss: 0.0154571533203125|unsuper_loss: 0.0 +average reward score: 4.7109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.80%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3581|ppo_ep: 1|act_loss: -0.0161895751953125|cri_loss: -0.00774383544921875|unsuper_loss: 0.0 +average reward score: 5.6484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.51%) |Training time=0.45s (20.94%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3582|ppo_ep: 1|act_loss: -0.019378662109375|cri_loss: -0.0086212158203125|unsuper_loss: 0.0 +average reward score: 5.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.01%) |Training time=0.44s (20.43%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3583|ppo_ep: 1|act_loss: 0.026580810546875|cri_loss: 0.0140228271484375|unsuper_loss: 0.0 +average reward score: 5.296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.67%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3584|ppo_ep: 1|act_loss: 0.031585693359375|cri_loss: 0.01611328125|unsuper_loss: 0.0 +average reward score: 5.546875 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.62s (71.67%) |Training time=0.47s (20.92%) |Others=0.17 (7.41%)|CurSamplesPerSec=14.19 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3585|ppo_ep: 1|act_loss: 0.004833221435546875|cri_loss: 0.0026397705078125|unsuper_loss: 0.0 +average reward score: 5.5546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.97%) |Training time=0.46s (21.47%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3586|ppo_ep: 1|act_loss: 0.031219482421875|cri_loss: 0.016754150390625|unsuper_loss: 0.0 +average reward score: 5.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.86%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3587|ppo_ep: 1|act_loss: 0.01354217529296875|cri_loss: 0.006961822509765625|unsuper_loss: 0.0 +average reward score: 5.35546875 +------------------------------------------------------------------------------------- +|E2E latency=2.44s |Gather latency=0.00s (0.00%) |Generate time=1.59s (65.22%) |Training time=0.47s (19.17%) |Others=0.38 (15.61%)|CurSamplesPerSec=13.13 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3588|ppo_ep: 1|act_loss: 0.003223419189453125|cri_loss: 0.002239227294921875|unsuper_loss: 0.0 +average reward score: 5.09765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.67%) |Training time=0.47s (21.78%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +[2023-04-14 10:59:52,984] [INFO] [logging.py:96:log_dist] [Rank 0] step=3590, skipped=48, lr=[5.998280321352281e-06, 5.998280321352281e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 10:59:53,003] [INFO] [timer.py:199:stop] epoch=0/micro_step=3590/global_step=3590, RunningAvgSamplesPerSec=105.63170114714389, CurrSamplesPerSec=103.337966759135, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 10:59:53,095] [INFO] [logging.py:96:log_dist] [Rank 0] step=3590, skipped=55, lr=[3.1144503044190456e-06, 3.1144503044190456e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3589|ppo_ep: 1|act_loss: -0.026947021484375|cri_loss: -0.0128936767578125|unsuper_loss: 0.0 +average reward score: 5.046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.91%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3590|ppo_ep: 1|act_loss: -0.0281524658203125|cri_loss: -0.0134124755859375|unsuper_loss: 0.0 +average reward score: 5.1953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.46s (21.49%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3591|ppo_ep: 1|act_loss: -0.03668212890625|cri_loss: -0.017578125|unsuper_loss: 0.0 +average reward score: 5.39453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.88%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3592|ppo_ep: 1|act_loss: -0.001079559326171875|cri_loss: 0.00086212158203125|unsuper_loss: 0.0 +average reward score: 4.828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.66%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3593|ppo_ep: 1|act_loss: 0.0159759521484375|cri_loss: 0.00830078125|unsuper_loss: 0.0 +average reward score: 5.3828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.88%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3594|ppo_ep: 1|act_loss: 0.0513916015625|cri_loss: 0.027069091796875|unsuper_loss: 0.0 +average reward score: 4.35546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.47s (21.62%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3595|ppo_ep: 1|act_loss: 0.01678466796875|cri_loss: 0.00872039794921875|unsuper_loss: 0.0 +average reward score: 5.67578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.86%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3596|ppo_ep: 1|act_loss: 0.02386474609375|cri_loss: 0.012664794921875|unsuper_loss: 0.0 +average reward score: 3.826171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.23%) |Training time=0.46s (21.20%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3597|ppo_ep: 1|act_loss: 0.0190582275390625|cri_loss: 0.00998687744140625|unsuper_loss: 0.0 +average reward score: 4.86328125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.16%) |Training time=0.45s (20.35%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3598|ppo_ep: 1|act_loss: -0.018890380859375|cri_loss: -0.0084686279296875|unsuper_loss: 0.0 +average reward score: 5.3125 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.69s (75.01%) |Training time=0.46s (20.62%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.46 +[2023-04-14 11:00:14,694] [INFO] [logging.py:96:log_dist] [Rank 0] step=3600, skipped=48, lr=[5.980253038722927e-06, 5.980253038722927e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:00:15,110] [INFO] [timer.py:199:stop] epoch=0/micro_step=3600/global_step=3600, RunningAvgSamplesPerSec=105.5920159904189, CurrSamplesPerSec=44.41792340872793, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:00:15,203] [INFO] [logging.py:96:log_dist] [Rank 0] step=3600, skipped=55, lr=[3.1051160365021587e-06, 3.1051160365021587e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3599|ppo_ep: 1|act_loss: -0.0035114288330078125|cri_loss: -0.00138092041015625|unsuper_loss: 0.0 +average reward score: 4.9375 +------------------------------------------------------------------------------------- +|E2E latency=2.56s |Gather latency=0.00s (0.00%) |Generate time=1.58s (61.70%) |Training time=0.88s (34.49%) |Others=0.10 (3.81%)|CurSamplesPerSec=12.49 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3600|ppo_ep: 1|act_loss: 0.02923583984375|cri_loss: 0.015167236328125|unsuper_loss: 0.0 +average reward score: 5.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.54%) |Training time=0.47s (20.24%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3601|ppo_ep: 1|act_loss: -0.040283203125|cri_loss: -0.0187835693359375|unsuper_loss: 0.0 +average reward score: 4.88671875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.02%) |Training time=0.49s (22.49%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3602|ppo_ep: 1|act_loss: -0.0059967041015625|cri_loss: -0.0027408599853515625|unsuper_loss: 0.0 +average reward score: 5.328125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.73%) |Training time=0.47s (21.72%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3603|ppo_ep: 1|act_loss: -0.01409149169921875|cri_loss: -0.00624847412109375|unsuper_loss: 0.0 +average reward score: 4.75390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.84%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3604|ppo_ep: 1|act_loss: -0.012481689453125|cri_loss: -0.00504302978515625|unsuper_loss: 0.0 +average reward score: 5.59375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.82%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3605|ppo_ep: 1|act_loss: 0.0126190185546875|cri_loss: 0.006801605224609375|unsuper_loss: 0.0 +average reward score: 4.953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.74%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3606|ppo_ep: 1|act_loss: -0.0205230712890625|cri_loss: -0.00936126708984375|unsuper_loss: 0.0 +average reward score: 5.51171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.81%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3607|ppo_ep: 1|act_loss: 0.03607177734375|cri_loss: 0.0196533203125|unsuper_loss: 0.0 +average reward score: 4.8828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.06%) |Training time=0.46s (21.39%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3608|ppo_ep: 1|act_loss: 0.00792694091796875|cri_loss: 0.004543304443359375|unsuper_loss: 0.0 +average reward score: 5.38671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.70%) |Training time=0.47s (21.75%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +[2023-04-14 11:00:36,841] [INFO] [logging.py:96:log_dist] [Rank 0] step=3610, skipped=48, lr=[5.962208632456643e-06, 5.962208632456643e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:00:37,031] [INFO] [timer.py:199:stop] epoch=0/micro_step=3610/global_step=3610, RunningAvgSamplesPerSec=105.57008798024897, CurrSamplesPerSec=66.34913324972403, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:00:37,124] [INFO] [logging.py:96:log_dist] [Rank 0] step=3610, skipped=55, lr=[3.095772799305849e-06, 3.095772799305849e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3609|ppo_ep: 1|act_loss: 0.00445556640625|cri_loss: 0.00432586669921875|unsuper_loss: 0.0 +average reward score: 4.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.59s (68.12%) |Training time=0.64s (27.68%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3610|ppo_ep: 1|act_loss: 0.0025844573974609375|cri_loss: 0.0023670196533203125|unsuper_loss: 0.0 +average reward score: 5.9453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.84%) |Training time=0.47s (21.60%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3611|ppo_ep: 1|act_loss: 0.001483917236328125|cri_loss: 0.0009298324584960938|unsuper_loss: 0.0 +average reward score: 4.76953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.36%) |Training time=0.46s (21.10%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3612|ppo_ep: 1|act_loss: -0.013458251953125|cri_loss: -0.00569915771484375|unsuper_loss: 0.0 +average reward score: 4.8828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.22%) |Training time=0.46s (21.23%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3613|ppo_ep: 1|act_loss: 0.023895263671875|cri_loss: 0.0121612548828125|unsuper_loss: 0.0 +average reward score: 5.64453125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.02%) |Training time=0.49s (22.46%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3614|ppo_ep: 1|act_loss: -0.01114654541015625|cri_loss: -0.00380706787109375|unsuper_loss: 0.0 +average reward score: 5.38671875 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.81s (76.90%) |Training time=0.45s (18.92%) |Others=0.10 (4.18%)|CurSamplesPerSec=13.58 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3615|ppo_ep: 1|act_loss: -0.00399017333984375|cri_loss: -0.0018091201782226562|unsuper_loss: 0.0 +average reward score: 4.8828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.54%) |Training time=0.41s (18.88%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +[2023-04-14 11:00:52,492] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 3616|ppo_ep: 1|act_loss: 0.008026123046875|cri_loss: 0.00434112548828125|unsuper_loss: 0.0 +average reward score: 4.6796875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.71%) |Training time=0.46s (21.22%) |Others=0.09 (4.07%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46 +[2023-04-14 11:00:54,667] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 3617|ppo_ep: 1|act_loss: 0.002307891845703125|cri_loss: 0.00164794921875|unsuper_loss: 0.0 +average reward score: 5.015625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.91%) |Training time=0.46s (21.00%) |Others=0.09 (4.09%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3618|ppo_ep: 1|act_loss: 0.0006394386291503906|cri_loss: 0.0004863739013671875|unsuper_loss: 0.0 +average reward score: 4.8125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.28%) |Training time=0.46s (21.21%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.46 +[2023-04-14 11:00:58,913] [INFO] [logging.py:96:log_dist] [Rank 0] step=3620, skipped=48, lr=[5.944147370015059e-06, 5.944147370015059e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:00:58,931] [INFO] [timer.py:199:stop] epoch=0/micro_step=3620/global_step=3620, RunningAvgSamplesPerSec=105.58135629427218, CurrSamplesPerSec=118.61425940393161, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:00:59,024] [INFO] [logging.py:96:log_dist] [Rank 0] step=3620, skipped=57, lr=[3.0882918447280204e-06, 3.0882918447280204e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3619|ppo_ep: 1|act_loss: -0.03326416015625|cri_loss: -0.01611328125|unsuper_loss: 0.0 +average reward score: 5.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.46%) |Training time=0.43s (19.96%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3620|ppo_ep: 1|act_loss: -0.02386474609375|cri_loss: -0.01171112060546875|unsuper_loss: 0.0 +average reward score: 5.25 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.70%) |Training time=0.43s (19.74%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3621|ppo_ep: 1|act_loss: 0.01934814453125|cri_loss: 0.01015472412109375|unsuper_loss: 0.0 +average reward score: 5.0 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.21%) |Training time=0.46s (21.23%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3622|ppo_ep: 1|act_loss: -0.01226806640625|cri_loss: -0.00591278076171875|unsuper_loss: 0.0 +average reward score: 4.8046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.31%) |Training time=0.46s (21.14%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3623|ppo_ep: 1|act_loss: -0.01050567626953125|cri_loss: -0.0050811767578125|unsuper_loss: 0.0 +average reward score: 5.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.17%) |Training time=0.46s (21.29%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3624|ppo_ep: 1|act_loss: -0.0005555152893066406|cri_loss: 0.0001010894775390625|unsuper_loss: 0.0 +average reward score: 5.59375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.24%) |Training time=0.46s (21.21%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3625|ppo_ep: 1|act_loss: 0.0088043212890625|cri_loss: 0.0050048828125|unsuper_loss: 0.0 +average reward score: 4.6640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.30%) |Training time=0.46s (21.14%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3626|ppo_ep: 1|act_loss: -0.041839599609375|cri_loss: -0.020050048828125|unsuper_loss: 0.0 +average reward score: 5.16015625 +------------------------------------------------------------------------------------- +|E2E latency=3.37s |Gather latency=0.00s (0.00%) |Generate time=1.62s (48.24%) |Training time=0.47s (13.97%) |Others=1.27 (37.79%)|CurSamplesPerSec=9.50 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3627|ppo_ep: 1|act_loss: -0.0139312744140625|cri_loss: -0.0064697265625|unsuper_loss: 0.0 +average reward score: 4.82421875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.17%) |Training time=0.46s (21.16%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3628|ppo_ep: 1|act_loss: -0.03765869140625|cri_loss: -0.0177001953125|unsuper_loss: 0.0 +average reward score: 5.5546875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.62%) |Training time=0.44s (20.77%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46 +[2023-04-14 11:01:21,955] [INFO] [logging.py:96:log_dist] [Rank 0] step=3630, skipped=48, lr=[5.926069519109657e-06, 5.926069519109657e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:01:21,973] [INFO] [timer.py:199:stop] epoch=0/micro_step=3630/global_step=3630, RunningAvgSamplesPerSec=105.5895945710597, CurrSamplesPerSec=105.85695582357191, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:01:22,071] [INFO] [logging.py:96:log_dist] [Rank 0] step=3630, skipped=57, lr=[3.078932811909849e-06, 3.078932811909849e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3629|ppo_ep: 1|act_loss: -0.034912109375|cri_loss: -0.0164337158203125|unsuper_loss: 0.0 +average reward score: 5.29296875 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.60%) |Training time=0.47s (19.87%) |Others=0.11 (4.53%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3630|ppo_ep: 1|act_loss: -0.0015287399291992188|cri_loss: 3.528594970703125e-05|unsuper_loss: 0.0 +average reward score: 4.5 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.70%) |Training time=0.44s (20.62%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3631|ppo_ep: 1|act_loss: 0.0217132568359375|cri_loss: 0.01214599609375|unsuper_loss: 0.0 +average reward score: 4.7421875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.87%) |Training time=0.44s (20.54%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3632|ppo_ep: 1|act_loss: -0.015869140625|cri_loss: -0.00717926025390625|unsuper_loss: 0.0 +average reward score: 4.9453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.23%) |Training time=0.45s (21.11%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3633|ppo_ep: 1|act_loss: 0.01430511474609375|cri_loss: 0.0074462890625|unsuper_loss: 0.0 +average reward score: 5.79296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.46s (21.47%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3634|ppo_ep: 1|act_loss: -0.0142669677734375|cri_loss: -0.006793975830078125|unsuper_loss: 0.0 +average reward score: 3.82421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.46s (21.47%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3635|ppo_ep: 1|act_loss: 0.01023101806640625|cri_loss: 0.00545501708984375|unsuper_loss: 0.0 +average reward score: 5.4296875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.44%) |Training time=0.45s (20.97%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3636|ppo_ep: 1|act_loss: 0.01654052734375|cri_loss: 0.0084381103515625|unsuper_loss: 0.0 +average reward score: 4.94921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.12%) |Training time=0.46s (21.31%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3637|ppo_ep: 1|act_loss: -0.031585693359375|cri_loss: -0.01507568359375|unsuper_loss: 0.0 +average reward score: 6.13671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.91%) |Training time=0.46s (21.51%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3638|ppo_ep: 1|act_loss: 0.0236053466796875|cri_loss: 0.01245880126953125|unsuper_loss: 0.0 +average reward score: 5.23828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.46%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +[2023-04-14 11:01:43,424] [INFO] [logging.py:96:log_dist] [Rank 0] step=3640, skipped=48, lr=[5.907975347697794e-06, 5.907975347697794e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:01:43,443] [INFO] [timer.py:199:stop] epoch=0/micro_step=3640/global_step=3640, RunningAvgSamplesPerSec=105.59923223574494, CurrSamplesPerSec=105.7564471787954, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:01:43,535] [INFO] [logging.py:96:log_dist] [Rank 0] step=3640, skipped=57, lr=[3.069565197910806e-06, 3.069565197910806e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3639|ppo_ep: 1|act_loss: 0.0116119384765625|cri_loss: 0.00627899169921875|unsuper_loss: 0.0 +average reward score: 5.40625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.77%) |Training time=0.47s (21.68%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3640|ppo_ep: 1|act_loss: -0.004119873046875|cri_loss: -0.0014057159423828125|unsuper_loss: 0.0 +average reward score: 4.40625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.88%) |Training time=0.46s (21.53%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3641|ppo_ep: 1|act_loss: 0.00606536865234375|cri_loss: 0.004093170166015625|unsuper_loss: 0.0 +average reward score: 4.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.46s (21.60%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3642|ppo_ep: 1|act_loss: 0.00603485107421875|cri_loss: 0.00371551513671875|unsuper_loss: 0.0 +average reward score: 4.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.57%) |Training time=0.45s (20.88%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3643|ppo_ep: 1|act_loss: 0.00029468536376953125|cri_loss: 0.0005202293395996094|unsuper_loss: 0.0 +average reward score: 4.921875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.92%) |Training time=0.46s (21.50%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3644|ppo_ep: 1|act_loss: -0.00589752197265625|cri_loss: -0.00200653076171875|unsuper_loss: 0.0 +average reward score: 4.6640625 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.89%) |Training time=0.46s (19.85%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3645|ppo_ep: 1|act_loss: 0.0184326171875|cri_loss: 0.009765625|unsuper_loss: 0.0 +average reward score: 5.7265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.72%) |Training time=0.44s (20.70%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3646|ppo_ep: 1|act_loss: 0.01568603515625|cri_loss: 0.00806427001953125|unsuper_loss: 0.0 +average reward score: 4.5234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.30%) |Training time=0.45s (21.13%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3647|ppo_ep: 1|act_loss: -0.00588226318359375|cri_loss: -0.0027637481689453125|unsuper_loss: 0.0 +average reward score: 5.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.49s |Gather latency=0.00s (0.00%) |Generate time=1.59s (63.95%) |Training time=0.46s (18.28%) |Others=0.44 (17.77%)|CurSamplesPerSec=12.85 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3648|ppo_ep: 1|act_loss: -0.0154571533203125|cri_loss: -0.00659942626953125|unsuper_loss: 0.0 +average reward score: 5.08203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.12%) |Training time=0.46s (21.30%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46 +[2023-04-14 11:02:05,410] [INFO] [logging.py:96:log_dist] [Rank 0] step=3650, skipped=48, lr=[5.889865123978745e-06, 5.889865123978745e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:02:05,429] [INFO] [timer.py:199:stop] epoch=0/micro_step=3650/global_step=3650, RunningAvgSamplesPerSec=105.60741290301202, CurrSamplesPerSec=106.42917645640826, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:02:05,521] [INFO] [logging.py:96:log_dist] [Rank 0] step=3650, skipped=57, lr=[3.0601891415815286e-06, 3.0601891415815286e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3649|ppo_ep: 1|act_loss: -0.001522064208984375|cri_loss: -0.0004944801330566406|unsuper_loss: 0.0 +average reward score: 5.62890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.90%) |Training time=0.46s (21.54%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46 +[2023-04-14 11:02:07,543] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 3650|ppo_ep: 1|act_loss: -0.002902984619140625|cri_loss: -0.0012159347534179688|unsuper_loss: 0.0 +average reward score: 5.1328125 +------------------------------------------------------------------------------------- +|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.59s (75.42%) |Training time=0.42s (19.95%) |Others=0.10 (4.63%)|CurSamplesPerSec=15.14 |AvgSamplesPerSec=14.46 +[2023-04-14 11:02:09,663] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 3651|ppo_ep: 1|act_loss: -0.030426025390625|cri_loss: -0.01433563232421875|unsuper_loss: 0.0 +average reward score: 4.93359375 +------------------------------------------------------------------------------------- +|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.59s (75.21%) |Training time=0.43s (20.16%) |Others=0.10 (4.62%)|CurSamplesPerSec=15.10 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3652|ppo_ep: 1|act_loss: -0.01470947265625|cri_loss: -0.00661468505859375|unsuper_loss: 0.0 +average reward score: 4.55859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.48%) |Training time=0.45s (20.93%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3653|ppo_ep: 1|act_loss: 0.00142669677734375|cri_loss: 0.0014524459838867188|unsuper_loss: 0.0 +average reward score: 5.515625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.37%) |Training time=0.45s (21.03%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3654|ppo_ep: 1|act_loss: 0.007843017578125|cri_loss: 0.0045013427734375|unsuper_loss: 0.0 +average reward score: 5.23828125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.11%) |Training time=0.46s (21.30%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3655|ppo_ep: 1|act_loss: 0.038421630859375|cri_loss: 0.020263671875|unsuper_loss: 0.0 +average reward score: 5.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.45s (20.67%) |Others=0.11 (5.18%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3656|ppo_ep: 1|act_loss: 0.031494140625|cri_loss: 0.016754150390625|unsuper_loss: 0.0 +average reward score: 4.28125 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.32%) |Training time=0.46s (20.29%) |Others=0.10 (4.39%)|CurSamplesPerSec=14.19 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3657|ppo_ep: 1|act_loss: 0.0087890625|cri_loss: 0.004703521728515625|unsuper_loss: 0.0 +average reward score: 4.9453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.46s (21.46%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3658|ppo_ep: 1|act_loss: 0.00679779052734375|cri_loss: 0.003719329833984375|unsuper_loss: 0.0 +average reward score: 4.96484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.57%) |Training time=0.40s (18.52%) |Others=0.17 (7.92%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +[2023-04-14 11:02:27,160] [INFO] [logging.py:96:log_dist] [Rank 0] step=3660, skipped=50, lr=[5.875365567724234e-06, 5.875365567724234e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:02:27,178] [INFO] [timer.py:199:stop] epoch=0/micro_step=3660/global_step=3660, RunningAvgSamplesPerSec=105.62991461506235, CurrSamplesPerSec=107.63474458970275, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:02:27,271] [INFO] [logging.py:96:log_dist] [Rank 0] step=3660, skipped=57, lr=[3.050804781897791e-06, 3.050804781897791e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3659|ppo_ep: 1|act_loss: -0.00601959228515625|cri_loss: -0.002696990966796875|unsuper_loss: 0.0 +average reward score: 5.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.01%) |Training time=0.46s (21.42%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3660|ppo_ep: 1|act_loss: -0.065185546875|cri_loss: -0.03179931640625|unsuper_loss: 0.0 +average reward score: 5.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.09%) |Training time=0.46s (21.33%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3661|ppo_ep: 1|act_loss: -0.0310211181640625|cri_loss: -0.013946533203125|unsuper_loss: 0.0 +average reward score: 5.1953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.61%) |Training time=0.47s (21.80%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3662|ppo_ep: 1|act_loss: -0.03411865234375|cri_loss: -0.01593017578125|unsuper_loss: 0.0 +average reward score: 5.3671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.65%) |Training time=0.47s (21.76%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3663|ppo_ep: 1|act_loss: 0.043853759765625|cri_loss: 0.0233917236328125|unsuper_loss: 0.0 +average reward score: 5.7109375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.73%) |Training time=0.47s (21.70%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3664|ppo_ep: 1|act_loss: 0.02154541015625|cri_loss: 0.0111846923828125|unsuper_loss: 0.0 +average reward score: 4.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.57%) |Training time=0.47s (21.85%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3665|ppo_ep: 1|act_loss: 0.023284912109375|cri_loss: 0.0124053955078125|unsuper_loss: 0.0 +average reward score: 5.203125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.56%) |Training time=0.47s (21.87%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3666|ppo_ep: 1|act_loss: -0.0031890869140625|cri_loss: -0.0012874603271484375|unsuper_loss: 0.0 +average reward score: 4.8203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.99%) |Training time=0.46s (21.46%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3667|ppo_ep: 1|act_loss: 0.03533935546875|cri_loss: 0.0186767578125|unsuper_loss: 0.0 +average reward score: 4.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.62%) |Training time=0.47s (21.77%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3668|ppo_ep: 1|act_loss: -0.030364990234375|cri_loss: -0.01441192626953125|unsuper_loss: 0.0 +average reward score: 5.51953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.64%) |Training time=0.47s (21.79%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46 +[2023-04-14 11:02:48,613] [INFO] [logging.py:96:log_dist] [Rank 0] step=3670, skipped=50, lr=[5.85722712647135e-06, 5.85722712647135e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:02:48,631] [INFO] [timer.py:199:stop] epoch=0/micro_step=3670/global_step=3670, RunningAvgSamplesPerSec=105.6294239805947, CurrSamplesPerSec=105.60508599933907, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:02:48,724] [INFO] [logging.py:96:log_dist] [Rank 0] step=3670, skipped=57, lr=[3.0414122579584403e-06, 3.0414122579584403e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3669|ppo_ep: 1|act_loss: -0.0185546875|cri_loss: -0.0088653564453125|unsuper_loss: 0.0 +average reward score: 5.953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.69%) |Training time=0.47s (21.72%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3670|ppo_ep: 1|act_loss: -0.027069091796875|cri_loss: -0.01312255859375|unsuper_loss: 0.0 +average reward score: 5.16015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.44%) |Training time=0.47s (21.98%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3671|ppo_ep: 1|act_loss: -0.01959228515625|cri_loss: -0.00933074951171875|unsuper_loss: 0.0 +average reward score: 5.1796875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.79%) |Training time=0.46s (21.45%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3672|ppo_ep: 1|act_loss: -0.012786865234375|cri_loss: -0.005970001220703125|unsuper_loss: 0.0 +average reward score: 4.66015625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.10%) |Training time=0.49s (22.29%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3673|ppo_ep: 1|act_loss: 0.0139007568359375|cri_loss: 0.00730133056640625|unsuper_loss: 0.0 +average reward score: 5.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.53s |Gather latency=0.00s (0.00%) |Generate time=1.59s (62.96%) |Training time=0.48s (19.10%) |Others=0.45 (17.94%)|CurSamplesPerSec=12.66 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3674|ppo_ep: 1|act_loss: 0.0052490234375|cri_loss: 0.0029621124267578125|unsuper_loss: 0.0 +average reward score: 5.703125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.48s (22.29%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3675|ppo_ep: 1|act_loss: 0.02960205078125|cri_loss: 0.01507568359375|unsuper_loss: 0.0 +average reward score: 4.94140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.28%) |Training time=0.48s (22.13%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3676|ppo_ep: 1|act_loss: 0.0081329345703125|cri_loss: 0.00453948974609375|unsuper_loss: 0.0 +average reward score: 5.44921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.90%) |Training time=0.49s (22.50%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3677|ppo_ep: 1|act_loss: 0.030242919921875|cri_loss: 0.01534271240234375|unsuper_loss: 0.0 +average reward score: 4.87890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.10%) |Training time=0.48s (22.34%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3678|ppo_ep: 1|act_loss: 0.009796142578125|cri_loss: 0.005096435546875|unsuper_loss: 0.0 +average reward score: 4.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (22.02%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +[2023-04-14 11:03:10,632] [INFO] [logging.py:96:log_dist] [Rank 0] step=3680, skipped=50, lr=[5.8390733851223396e-06, 5.8390733851223396e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:03:10,651] [INFO] [timer.py:199:stop] epoch=0/micro_step=3680/global_step=3680, RunningAvgSamplesPerSec=105.61558640234166, CurrSamplesPerSec=100.31370274593043, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:03:10,744] [INFO] [logging.py:96:log_dist] [Rank 0] step=3680, skipped=57, lr=[3.0320117089833414e-06, 3.0320117089833414e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3679|ppo_ep: 1|act_loss: -0.0201873779296875|cri_loss: -0.0089111328125|unsuper_loss: 0.0 +average reward score: 5.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.23%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3680|ppo_ep: 1|act_loss: -0.060821533203125|cri_loss: -0.028778076171875|unsuper_loss: 0.0 +average reward score: 5.39453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.02%) |Training time=0.49s (22.39%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3681|ppo_ep: 1|act_loss: 0.003955841064453125|cri_loss: 0.004291534423828125|unsuper_loss: 0.0 +average reward score: 4.5 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.61%) |Training time=0.47s (21.84%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3682|ppo_ep: 1|act_loss: -0.01025390625|cri_loss: -0.0044708251953125|unsuper_loss: 0.0 +average reward score: 4.9609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.97%) |Training time=0.49s (22.48%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3683|ppo_ep: 1|act_loss: -0.026702880859375|cri_loss: -0.0126190185546875|unsuper_loss: 0.0 +average reward score: 5.83984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.48s (21.94%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3684|ppo_ep: 1|act_loss: -0.0031147003173828125|cri_loss: -0.0009250640869140625|unsuper_loss: 0.0 +average reward score: 5.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.30%) |Training time=0.48s (22.19%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3685|ppo_ep: 1|act_loss: 0.017364501953125|cri_loss: 0.00927734375|unsuper_loss: 0.0 +average reward score: 5.3671875 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.26%) |Training time=0.49s (22.07%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3686|ppo_ep: 1|act_loss: 0.017181396484375|cri_loss: 0.0089111328125|unsuper_loss: 0.0 +average reward score: 5.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.67s (74.08%) |Training time=0.49s (21.48%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.16 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3687|ppo_ep: 1|act_loss: 0.025054931640625|cri_loss: 0.01331329345703125|unsuper_loss: 0.0 +average reward score: 5.5 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.61%) |Training time=0.47s (21.82%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3688|ppo_ep: 1|act_loss: 0.032684326171875|cri_loss: 0.0173187255859375|unsuper_loss: 0.0 +average reward score: 5.28515625 +------------------------------------------------------------------------------------- +|E2E latency=3.45s |Gather latency=0.00s (0.00%) |Generate time=1.59s (45.98%) |Training time=0.49s (14.10%) |Others=1.38 (39.92%)|CurSamplesPerSec=9.28 |AvgSamplesPerSec=14.46 +[2023-04-14 11:03:33,753] [INFO] [logging.py:96:log_dist] [Rank 0] step=3690, skipped=50, lr=[5.820904612759442e-06, 5.820904612759442e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:03:33,772] [INFO] [timer.py:199:stop] epoch=0/micro_step=3690/global_step=3690, RunningAvgSamplesPerSec=105.59878176187217, CurrSamplesPerSec=96.78853632804528, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:03:33,865] [INFO] [logging.py:96:log_dist] [Rank 0] step=3690, skipped=57, lr=[3.022603274311307e-06, 3.022603274311307e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3689|ppo_ep: 1|act_loss: -0.01277923583984375|cri_loss: -0.0057220458984375|unsuper_loss: 0.0 +average reward score: 5.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.77%) |Training time=0.49s (22.68%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3690|ppo_ep: 1|act_loss: 0.0052642822265625|cri_loss: 0.00312042236328125|unsuper_loss: 0.0 +average reward score: 4.484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.29%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3691|ppo_ep: 1|act_loss: -0.028533935546875|cri_loss: -0.01375579833984375|unsuper_loss: 0.0 +average reward score: 5.3125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.87%) |Training time=0.49s (22.59%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3692|ppo_ep: 1|act_loss: -0.03851318359375|cri_loss: -0.0184783935546875|unsuper_loss: 0.0 +average reward score: 5.12109375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.64%) |Training time=0.49s (22.79%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3693|ppo_ep: 1|act_loss: 0.05462646484375|cri_loss: 0.031951904296875|unsuper_loss: 0.0 +average reward score: 5.15625 +------------------------------------------------------------------------------------- +|E2E latency=3.03s |Gather latency=0.00s (0.00%) |Generate time=1.59s (52.57%) |Training time=0.49s (16.28%) |Others=0.94 (31.15%)|CurSamplesPerSec=10.55 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3694|ppo_ep: 1|act_loss: -0.03289794921875|cri_loss: -0.015838623046875|unsuper_loss: 0.0 +average reward score: 4.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.56%) |Training time=0.50s (22.90%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3695|ppo_ep: 1|act_loss: 0.0087890625|cri_loss: 0.004550933837890625|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.63%) |Training time=0.49s (22.80%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +[2023-04-14 11:03:49,795] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384 +epoch: 0|step: 3696|ppo_ep: 1|act_loss: -0.009063720703125|cri_loss: -0.0033855438232421875|unsuper_loss: 0.0 +average reward score: 4.74609375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.64%) |Training time=0.47s (21.74%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46 +[2023-04-14 11:03:51,942] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192 +epoch: 0|step: 3697|ppo_ep: 1|act_loss: -0.06842041015625|cri_loss: -0.03167724609375|unsuper_loss: 0.0 +average reward score: 5.03125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.41%) |Training time=0.47s (21.99%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3698|ppo_ep: 1|act_loss: 0.05010986328125|cri_loss: 0.0263214111328125|unsuper_loss: 0.0 +average reward score: 5.234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.61%) |Training time=0.50s (22.86%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +[2023-04-14 11:03:56,267] [INFO] [logging.py:96:log_dist] [Rank 0] step=3700, skipped=52, lr=[5.806358953504726e-06, 5.806358953504726e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:03:56,286] [INFO] [timer.py:199:stop] epoch=0/micro_step=3700/global_step=3700, RunningAvgSamplesPerSec=105.5771961459714, CurrSamplesPerSec=97.05807384695485, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:03:56,380] [INFO] [logging.py:96:log_dist] [Rank 0] step=3700, skipped=57, lr=[3.013187093398035e-06, 3.013187093398035e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3699|ppo_ep: 1|act_loss: -0.003143310546875|cri_loss: -0.001316070556640625|unsuper_loss: 0.0 +average reward score: 4.859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.75%) |Training time=0.49s (22.67%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3700|ppo_ep: 1|act_loss: -0.018280029296875|cri_loss: -0.008575439453125|unsuper_loss: 0.0 +average reward score: 4.87890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.48s (22.36%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3701|ppo_ep: 1|act_loss: -0.018524169921875|cri_loss: -0.00849151611328125|unsuper_loss: 0.0 +average reward score: 4.8125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.24%) |Training time=0.48s (22.16%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3702|ppo_ep: 1|act_loss: 0.01904296875|cri_loss: 0.00995635986328125|unsuper_loss: 0.0 +average reward score: 5.17578125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.31%) |Training time=0.48s (22.14%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3703|ppo_ep: 1|act_loss: 0.00037097930908203125|cri_loss: 0.0003833770751953125|unsuper_loss: 0.0 +average reward score: 5.90234375 +------------------------------------------------------------------------------------- +|E2E latency=3.27s |Gather latency=0.00s (0.00%) |Generate time=1.75s (53.58%) |Training time=0.49s (14.82%) |Others=1.03 (31.60%)|CurSamplesPerSec=9.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3704|ppo_ep: 1|act_loss: -0.04229736328125|cri_loss: -0.01947021484375|unsuper_loss: 0.0 +average reward score: 5.7265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.03%) |Training time=0.49s (22.37%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3705|ppo_ep: 1|act_loss: -0.007030487060546875|cri_loss: -0.0033054351806640625|unsuper_loss: 0.0 +average reward score: 4.59375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.01%) |Training time=0.49s (22.40%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3706|ppo_ep: 1|act_loss: -0.02105712890625|cri_loss: -0.0096282958984375|unsuper_loss: 0.0 +average reward score: 5.09375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.17%) |Training time=0.48s (22.25%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3707|ppo_ep: 1|act_loss: -0.01012420654296875|cri_loss: -0.00421905517578125|unsuper_loss: 0.0 +average reward score: 5.02734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.91%) |Training time=0.49s (22.56%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3708|ppo_ep: 1|act_loss: 0.0220947265625|cri_loss: 0.01151275634765625|unsuper_loss: 0.0 +average reward score: 5.20703125 +------------------------------------------------------------------------------------- +|E2E latency=2.70s |Gather latency=0.00s (0.00%) |Generate time=1.59s (58.93%) |Training time=0.48s (17.65%) |Others=0.63 (23.42%)|CurSamplesPerSec=11.85 |AvgSamplesPerSec=14.46 +[2023-04-14 11:04:19,624] [INFO] [logging.py:96:log_dist] [Rank 0] step=3710, skipped=52, lr=[5.788163804112695e-06, 5.788163804112695e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:04:19,642] [INFO] [timer.py:199:stop] epoch=0/micro_step=3710/global_step=3710, RunningAvgSamplesPerSec=105.55912844115356, CurrSamplesPerSec=96.80682664850997, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:04:19,736] [INFO] [logging.py:96:log_dist] [Rank 0] step=3710, skipped=57, lr=[3.0037633058140433e-06, 3.0037633058140433e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3709|ppo_ep: 1|act_loss: 0.0076446533203125|cri_loss: 0.00484466552734375|unsuper_loss: 0.0 +average reward score: 5.703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.63%) |Training time=0.49s (22.78%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3710|ppo_ep: 1|act_loss: 0.03350830078125|cri_loss: 0.0171356201171875|unsuper_loss: 0.0 +average reward score: 4.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.71%) |Training time=0.49s (22.71%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3711|ppo_ep: 1|act_loss: -0.0281982421875|cri_loss: -0.01369476318359375|unsuper_loss: 0.0 +average reward score: 5.3359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.98%) |Training time=0.49s (22.45%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3712|ppo_ep: 1|act_loss: 0.04644775390625|cri_loss: 0.02423095703125|unsuper_loss: 0.0 +average reward score: 5.6015625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.90%) |Training time=0.49s (22.59%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3713|ppo_ep: 1|act_loss: 0.0093536376953125|cri_loss: 0.0048980712890625|unsuper_loss: 0.0 +average reward score: 4.2109375 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.46%) |Training time=0.51s (22.91%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.38 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3714|ppo_ep: 1|act_loss: -0.018890380859375|cri_loss: -0.0089263916015625|unsuper_loss: 0.0 +average reward score: 5.390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.02%) |Training time=0.49s (22.41%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3715|ppo_ep: 1|act_loss: -0.0121917724609375|cri_loss: -0.00533294677734375|unsuper_loss: 0.0 +average reward score: 5.9140625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.09%) |Training time=0.49s (22.28%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3716|ppo_ep: 1|act_loss: -0.04791259765625|cri_loss: -0.0214996337890625|unsuper_loss: 0.0 +average reward score: 5.19140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.10%) |Training time=0.46s (21.30%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3717|ppo_ep: 1|act_loss: -0.0043792724609375|cri_loss: -0.00199127197265625|unsuper_loss: 0.0 +average reward score: 5.00390625 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.02%) |Training time=0.48s (20.75%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.46 +[2023-04-14 11:04:39,479] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 3718|ppo_ep: 1|act_loss: -0.0160369873046875|cri_loss: -0.007083892822265625|unsuper_loss: 0.0 +average reward score: 5.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.49s (22.54%) |Others=0.09 (4.09%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +[2023-04-14 11:04:41,549] [INFO] [logging.py:96:log_dist] [Rank 0] step=3720, skipped=52, lr=[5.769954378309581e-06, 5.769954378309581e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:04:41,567] [INFO] [timer.py:199:stop] epoch=0/micro_step=3720/global_step=3720, RunningAvgSamplesPerSec=105.53938682509363, CurrSamplesPerSec=100.27136032034784, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:04:42,121] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +[2023-04-14 11:04:42,122] [INFO] [logging.py:96:log_dist] [Rank 0] step=3720, skipped=59, lr=[2.99621889280735e-06, 2.99621889280735e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3719|ppo_ep: 1|act_loss: 0.00487518310546875|cri_loss: 0.0027008056640625|unsuper_loss: 0.0 +average reward score: 5.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.64s |Gather latency=0.00s (0.00%) |Generate time=1.60s (60.67%) |Training time=0.95s (36.04%) |Others=0.09 (3.29%)|CurSamplesPerSec=12.11 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3720|ppo_ep: 1|act_loss: 0.0572509765625|cri_loss: 0.030029296875|unsuper_loss: 0.0 +average reward score: 5.40234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.60%) |Training time=0.50s (22.83%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3721|ppo_ep: 1|act_loss: 0.03192138671875|cri_loss: 0.0172576904296875|unsuper_loss: 0.0 +average reward score: 4.87109375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.63%) |Training time=0.50s (22.84%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3722|ppo_ep: 1|act_loss: 0.0190887451171875|cri_loss: 0.01143646240234375|unsuper_loss: 0.0 +average reward score: 5.48828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.74%) |Training time=0.49s (22.65%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3723|ppo_ep: 1|act_loss: 0.0341796875|cri_loss: 0.0178985595703125|unsuper_loss: 0.0 +average reward score: 5.59375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.79%) |Training time=0.49s (22.67%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3724|ppo_ep: 1|act_loss: 0.007442474365234375|cri_loss: 0.005657196044921875|unsuper_loss: 0.0 +average reward score: 4.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.63%) |Training time=0.49s (22.77%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3725|ppo_ep: 1|act_loss: 0.047576904296875|cri_loss: 0.026275634765625|unsuper_loss: 0.0 +average reward score: 4.34375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.29%) |Training time=0.50s (23.21%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3726|ppo_ep: 1|act_loss: -0.020843505859375|cri_loss: -0.009979248046875|unsuper_loss: 0.0 +average reward score: 5.66015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.68%) |Training time=0.49s (22.72%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3727|ppo_ep: 1|act_loss: -0.0172119140625|cri_loss: -0.0084991455078125|unsuper_loss: 0.0 +average reward score: 5.3671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.55%) |Training time=0.50s (22.93%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3728|ppo_ep: 1|act_loss: -0.02239990234375|cri_loss: -0.010528564453125|unsuper_loss: 0.0 +average reward score: 5.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.51%) |Training time=0.50s (22.95%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +[2023-04-14 11:05:03,709] [INFO] [logging.py:96:log_dist] [Rank 0] step=3730, skipped=52, lr=[5.7517309460030025e-06, 5.7517309460030025e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:05:03,727] [INFO] [timer.py:199:stop] epoch=0/micro_step=3730/global_step=3730, RunningAvgSamplesPerSec=105.51180342958422, CurrSamplesPerSec=98.24020143168742, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:05:03,821] [INFO] [logging.py:96:log_dist] [Rank 0] step=3730, skipped=59, lr=[2.9867817652923354e-06, 2.9867817652923354e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3729|ppo_ep: 1|act_loss: -0.0194091796875|cri_loss: -0.009429931640625|unsuper_loss: 0.0 +average reward score: 4.7265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.96%) |Training time=0.49s (22.49%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3730|ppo_ep: 1|act_loss: -0.02294921875|cri_loss: -0.01116180419921875|unsuper_loss: 0.0 +average reward score: 5.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.87%) |Training time=0.49s (22.58%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3731|ppo_ep: 1|act_loss: -0.0149078369140625|cri_loss: -0.006526947021484375|unsuper_loss: 0.0 +average reward score: 4.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.15%) |Training time=0.48s (22.30%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3732|ppo_ep: 1|act_loss: 0.040771484375|cri_loss: 0.021240234375|unsuper_loss: 0.0 +average reward score: 5.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.33%) |Training time=0.48s (20.48%) |Others=0.10 (4.20%)|CurSamplesPerSec=13.70 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3733|ppo_ep: 1|act_loss: 0.03448486328125|cri_loss: 0.0180511474609375|unsuper_loss: 0.0 +average reward score: 5.48828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.12%) |Training time=0.49s (22.37%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3734|ppo_ep: 1|act_loss: 0.04742431640625|cri_loss: 0.02508544921875|unsuper_loss: 0.0 +average reward score: 5.1015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.22%) |Training time=0.48s (22.26%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3735|ppo_ep: 1|act_loss: 0.0178375244140625|cri_loss: 0.00946807861328125|unsuper_loss: 0.0 +average reward score: 4.83203125 +------------------------------------------------------------------------------------- +|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.59s (67.02%) |Training time=0.48s (20.40%) |Others=0.30 (12.59%)|CurSamplesPerSec=13.50 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3736|ppo_ep: 1|act_loss: 0.0396728515625|cri_loss: 0.0210418701171875|unsuper_loss: 0.0 +average reward score: 6.1640625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.61%) |Training time=0.48s (22.08%) |Others=0.12 (5.31%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3737|ppo_ep: 1|act_loss: 0.00572967529296875|cri_loss: 0.0031280517578125|unsuper_loss: 0.0 +average reward score: 4.50390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.91%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3738|ppo_ep: 1|act_loss: 0.01273345947265625|cri_loss: 0.00933074951171875|unsuper_loss: 0.0 +average reward score: 4.9140625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.80%) |Training time=0.49s (22.68%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46 +[2023-04-14 11:05:25,794] [INFO] [logging.py:96:log_dist] [Rank 0] step=3740, skipped=52, lr=[5.733493777308187e-06, 5.733493777308187e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:05:25,813] [INFO] [timer.py:199:stop] epoch=0/micro_step=3740/global_step=3740, RunningAvgSamplesPerSec=105.49538247453388, CurrSamplesPerSec=100.76580001006023, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:05:25,905] [INFO] [logging.py:96:log_dist] [Rank 0] step=3740, skipped=59, lr=[2.977337422497263e-06, 2.977337422497263e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3739|ppo_ep: 1|act_loss: -0.03662109375|cri_loss: -0.0174713134765625|unsuper_loss: 0.0 +average reward score: 5.36328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.33%) |Training time=0.48s (22.16%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3740|ppo_ep: 1|act_loss: -0.02685546875|cri_loss: -0.01302337646484375|unsuper_loss: 0.0 +average reward score: 5.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.61%) |Training time=0.50s (22.89%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3741|ppo_ep: 1|act_loss: -0.000637054443359375|cri_loss: 0.001438140869140625|unsuper_loss: 0.0 +average reward score: 5.6796875 +------------------------------------------------------------------------------------- +|E2E latency=2.42s |Gather latency=0.00s (0.00%) |Generate time=1.61s (66.66%) |Training time=0.48s (19.73%) |Others=0.33 (13.61%)|CurSamplesPerSec=13.23 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3742|ppo_ep: 1|act_loss: -0.0020751953125|cri_loss: -0.0006694793701171875|unsuper_loss: 0.0 +average reward score: 5.17578125 +------------------------------------------------------------------------------------- +|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.66s (72.44%) |Training time=0.47s (20.73%) |Others=0.16 (6.83%)|CurSamplesPerSec=13.99 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3743|ppo_ep: 1|act_loss: -0.0175628662109375|cri_loss: -0.0082855224609375|unsuper_loss: 0.0 +average reward score: 3.8359375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.62%) |Training time=0.45s (20.86%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3744|ppo_ep: 1|act_loss: 0.035675048828125|cri_loss: 0.0195159912109375|unsuper_loss: 0.0 +average reward score: 5.43359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.40%) |Training time=0.44s (20.06%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3745|ppo_ep: 1|act_loss: -0.037872314453125|cri_loss: -0.0181121826171875|unsuper_loss: 0.0 +average reward score: 4.8359375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.82%) |Training time=0.48s (21.66%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3746|ppo_ep: 1|act_loss: -0.006008148193359375|cri_loss: -0.002582550048828125|unsuper_loss: 0.0 +average reward score: 5.44140625 +------------------------------------------------------------------------------------- +|E2E latency=2.39s |Gather latency=0.00s (0.00%) |Generate time=1.80s (75.25%) |Training time=0.49s (20.63%) |Others=0.10 (4.13%)|CurSamplesPerSec=13.39 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3747|ppo_ep: 1|act_loss: -0.001678466796875|cri_loss: -0.0007061958312988281|unsuper_loss: 0.0 +average reward score: 5.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.73%) |Training time=0.44s (20.68%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3748|ppo_ep: 1|act_loss: 0.01319122314453125|cri_loss: 0.00751495361328125|unsuper_loss: 0.0 +average reward score: 5.69140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.06%) |Training time=0.44s (20.31%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46 +[2023-04-14 11:05:48,055] [INFO] [logging.py:96:log_dist] [Rank 0] step=3750, skipped=52, lr=[5.7152431425439704e-06, 5.7152431425439704e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:05:48,073] [INFO] [timer.py:199:stop] epoch=0/micro_step=3750/global_step=3750, RunningAvgSamplesPerSec=105.49968423227209, CurrSamplesPerSec=121.1488010831547, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:05:48,166] [INFO] [logging.py:96:log_dist] [Rank 0] step=3750, skipped=59, lr=[2.9678860044100745e-06, 2.9678860044100745e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3749|ppo_ep: 1|act_loss: -0.014923095703125|cri_loss: -0.0067596435546875|unsuper_loss: 0.0 +average reward score: 4.671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.50%) |Training time=0.43s (19.92%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3750|ppo_ep: 1|act_loss: 0.04046630859375|cri_loss: 0.0205535888671875|unsuper_loss: 0.0 +average reward score: 5.59375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.05%) |Training time=0.44s (20.32%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3751|ppo_ep: 1|act_loss: 0.0002613067626953125|cri_loss: 0.0005140304565429688|unsuper_loss: 0.0 +average reward score: 5.6484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.45s (20.87%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3752|ppo_ep: 1|act_loss: 0.02374267578125|cri_loss: 0.01258087158203125|unsuper_loss: 0.0 +average reward score: 6.01953125 +------------------------------------------------------------------------------------- +|E2E latency=2.63s |Gather latency=0.00s (0.00%) |Generate time=1.60s (60.84%) |Training time=0.45s (17.18%) |Others=0.58 (21.99%)|CurSamplesPerSec=12.18 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3753|ppo_ep: 1|act_loss: -0.004230499267578125|cri_loss: -0.001766204833984375|unsuper_loss: 0.0 +average reward score: 4.390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.26%) |Training time=0.45s (21.07%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3754|ppo_ep: 1|act_loss: -0.0200347900390625|cri_loss: -0.0087890625|unsuper_loss: 0.0 +average reward score: 5.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.67%) |Training time=0.44s (20.74%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3755|ppo_ep: 1|act_loss: -0.004261016845703125|cri_loss: -0.0016603469848632812|unsuper_loss: 0.0 +average reward score: 5.82421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.24%) |Training time=0.45s (21.10%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3756|ppo_ep: 1|act_loss: -0.032989501953125|cri_loss: -0.0160675048828125|unsuper_loss: 0.0 +average reward score: 5.93359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.39%) |Training time=0.45s (21.03%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3757|ppo_ep: 1|act_loss: 0.015899658203125|cri_loss: 0.0082550048828125|unsuper_loss: 0.0 +average reward score: 5.26171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.80%) |Training time=0.44s (20.51%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3758|ppo_ep: 1|act_loss: 0.00212860107421875|cri_loss: 0.002384185791015625|unsuper_loss: 0.0 +average reward score: 5.05859375 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.60s (69.51%) |Training time=0.44s (19.25%) |Others=0.26 (11.24%)|CurSamplesPerSec=13.87 |AvgSamplesPerSec=14.46 +[2023-04-14 11:06:10,155] [INFO] [logging.py:96:log_dist] [Rank 0] step=3760, skipped=52, lr=[5.6969793122287855e-06, 5.6969793122287855e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:06:10,173] [INFO] [timer.py:199:stop] epoch=0/micro_step=3760/global_step=3760, RunningAvgSamplesPerSec=105.51794677764592, CurrSamplesPerSec=115.36689304357319, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:06:10,265] [INFO] [logging.py:96:log_dist] [Rank 0] step=3760, skipped=59, lr=[2.9584276511235884e-06, 2.9584276511235884e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3759|ppo_ep: 1|act_loss: -0.00699615478515625|cri_loss: -0.0029449462890625|unsuper_loss: 0.0 +average reward score: 4.09375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.77%) |Training time=0.44s (20.55%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3760|ppo_ep: 1|act_loss: -0.0217742919921875|cri_loss: -0.00971221923828125|unsuper_loss: 0.0 +average reward score: 5.3359375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.03%) |Training time=0.44s (20.35%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3761|ppo_ep: 1|act_loss: -0.029144287109375|cri_loss: -0.0140380859375|unsuper_loss: 0.0 +average reward score: 5.48828125 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.45%) |Training time=0.51s (22.99%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.46 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3762|ppo_ep: 1|act_loss: -0.08831787109375|cri_loss: -0.04180908203125|unsuper_loss: 0.0 +average reward score: 5.45703125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.11%) |Training time=0.46s (21.30%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3763|ppo_ep: 1|act_loss: 0.020843505859375|cri_loss: 0.01064300537109375|unsuper_loss: 0.0 +average reward score: 5.05859375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.69%) |Training time=0.46s (21.57%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3764|ppo_ep: 1|act_loss: 0.033203125|cri_loss: 0.0171661376953125|unsuper_loss: 0.0 +average reward score: 4.83203125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.16%) |Training time=0.46s (21.26%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3765|ppo_ep: 1|act_loss: -0.01349639892578125|cri_loss: -0.00501251220703125|unsuper_loss: 0.0 +average reward score: 5.8984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.57%) |Training time=0.47s (21.77%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3766|ppo_ep: 1|act_loss: 0.0176239013671875|cri_loss: 0.00907135009765625|unsuper_loss: 0.0 +average reward score: 5.2734375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.91%) |Training time=0.46s (21.50%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3767|ppo_ep: 1|act_loss: -0.004486083984375|cri_loss: -0.0013856887817382812|unsuper_loss: 0.0 +average reward score: 4.35546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.64%) |Training time=0.47s (21.71%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3768|ppo_ep: 1|act_loss: -0.0341796875|cri_loss: -0.016265869140625|unsuper_loss: 0.0 +average reward score: 5.515625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.91%) |Training time=0.46s (21.42%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46 +[2023-04-14 11:06:31,673] [INFO] [logging.py:96:log_dist] [Rank 0] step=3770, skipped=52, lr=[5.678702557076659e-06, 5.678702557076659e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:06:31,691] [INFO] [timer.py:199:stop] epoch=0/micro_step=3770/global_step=3770, RunningAvgSamplesPerSec=105.52253519060744, CurrSamplesPerSec=106.89682394300014, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:06:31,784] [INFO] [logging.py:96:log_dist] [Rank 0] step=3770, skipped=59, lr=[2.9489625028334145e-06, 2.9489625028334145e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3769|ppo_ep: 1|act_loss: 0.001468658447265625|cri_loss: 0.0014200210571289062|unsuper_loss: 0.0 +average reward score: 5.06640625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.82%) |Training time=0.46s (21.53%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3770|ppo_ep: 1|act_loss: 0.00737762451171875|cri_loss: 0.0038051605224609375|unsuper_loss: 0.0 +average reward score: 4.51171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.65%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3771|ppo_ep: 1|act_loss: 0.0081634521484375|cri_loss: 0.00421142578125|unsuper_loss: 0.0 +average reward score: 5.03125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.53%) |Training time=0.48s (21.96%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3772|ppo_ep: 1|act_loss: -0.021820068359375|cri_loss: -0.00951385498046875|unsuper_loss: 0.0 +average reward score: 4.21875 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.50%) |Training time=0.48s (21.18%) |Others=0.10 (4.32%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3773|ppo_ep: 1|act_loss: -0.02081298828125|cri_loss: -0.0070648193359375|unsuper_loss: 0.0 +average reward score: 4.50390625 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.59s (70.19%) |Training time=0.46s (20.09%) |Others=0.22 (9.72%)|CurSamplesPerSec=14.12 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3774|ppo_ep: 1|act_loss: -0.003711700439453125|cri_loss: -0.0015134811401367188|unsuper_loss: 0.0 +average reward score: 4.95703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.46s (21.15%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3775|ppo_ep: 1|act_loss: -0.018524169921875|cri_loss: -0.0089569091796875|unsuper_loss: 0.0 +average reward score: 4.953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (21.02%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3776|ppo_ep: 1|act_loss: 0.0028858184814453125|cri_loss: 0.0022220611572265625|unsuper_loss: 0.0 +average reward score: 5.6640625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.50%) |Training time=0.43s (19.88%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3777|ppo_ep: 1|act_loss: 0.037353515625|cri_loss: 0.01947021484375|unsuper_loss: 0.0 +average reward score: 5.20703125 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.44%) |Training time=0.47s (20.25%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.70 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3778|ppo_ep: 1|act_loss: 0.0109710693359375|cri_loss: 0.005802154541015625|unsuper_loss: 0.0 +average reward score: 4.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.87%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +[2023-04-14 11:06:53,644] [INFO] [logging.py:96:log_dist] [Rank 0] step=3780, skipped=52, lr=[5.6604131479931914e-06, 5.6604131479931914e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:06:53,662] [INFO] [timer.py:199:stop] epoch=0/micro_step=3780/global_step=3780, RunningAvgSamplesPerSec=105.52500263158235, CurrSamplesPerSec=103.33717113747495, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:06:53,754] [INFO] [logging.py:96:log_dist] [Rank 0] step=3780, skipped=59, lr=[2.939490699835887e-06, 2.939490699835887e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3779|ppo_ep: 1|act_loss: 0.00405120849609375|cri_loss: 0.002605438232421875|unsuper_loss: 0.0 +average reward score: 6.30859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.93%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3780|ppo_ep: 1|act_loss: -0.017364501953125|cri_loss: -0.0079803466796875|unsuper_loss: 0.0 +average reward score: 5.4453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.82%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3781|ppo_ep: 1|act_loss: 0.0308074951171875|cri_loss: 0.0172119140625|unsuper_loss: 0.0 +average reward score: 4.98828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.48%) |Training time=0.47s (21.86%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3782|ppo_ep: 1|act_loss: -0.003948211669921875|cri_loss: -0.0017042160034179688|unsuper_loss: 0.0 +average reward score: 5.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.98%) |Training time=0.48s (22.45%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3783|ppo_ep: 1|act_loss: -0.0244140625|cri_loss: -0.0100250244140625|unsuper_loss: 0.0 +average reward score: 5.87109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.52%) |Training time=0.47s (21.82%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3784|ppo_ep: 1|act_loss: -0.016387939453125|cri_loss: -0.007472991943359375|unsuper_loss: 0.0 +average reward score: 5.04296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.83%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3785|ppo_ep: 1|act_loss: -0.031341552734375|cri_loss: -0.0128021240234375|unsuper_loss: 0.0 +average reward score: 5.25390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.48s (22.10%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3786|ppo_ep: 1|act_loss: 0.003833770751953125|cri_loss: 0.002223968505859375|unsuper_loss: 0.0 +average reward score: 5.15234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.87%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3787|ppo_ep: 1|act_loss: 0.01019287109375|cri_loss: 0.0052642822265625|unsuper_loss: 0.0 +average reward score: 6.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.33%) |Training time=0.48s (22.05%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3788|ppo_ep: 1|act_loss: 0.01226806640625|cri_loss: 0.0078887939453125|unsuper_loss: 0.0 +average reward score: 5.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.44%) |Training time=0.47s (21.92%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +[2023-04-14 11:07:15,210] [INFO] [logging.py:96:log_dist] [Rank 0] step=3790, skipped=52, lr=[5.642111356071544e-06, 5.642111356071544e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:07:15,508] [INFO] [timer.py:199:stop] epoch=0/micro_step=3790/global_step=3790, RunningAvgSamplesPerSec=105.49148721233601, CurrSamplesPerSec=53.862661932836275, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:07:15,602] [INFO] [logging.py:96:log_dist] [Rank 0] step=3790, skipped=59, lr=[2.9300123825259737e-06, 2.9300123825259737e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3789|ppo_ep: 1|act_loss: 0.031036376953125|cri_loss: 0.0164794921875|unsuper_loss: 0.0 +average reward score: 5.59375 +------------------------------------------------------------------------------------- +|E2E latency=2.44s |Gather latency=0.00s (0.00%) |Generate time=1.58s (64.83%) |Training time=0.76s (31.04%) |Others=0.10 (4.12%)|CurSamplesPerSec=13.12 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3790|ppo_ep: 1|act_loss: 0.0275115966796875|cri_loss: 0.0144805908203125|unsuper_loss: 0.0 +average reward score: 6.15625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.48s (21.99%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3791|ppo_ep: 1|act_loss: 0.03350830078125|cri_loss: 0.0183563232421875|unsuper_loss: 0.0 +average reward score: 5.69921875 +------------------------------------------------------------------------------------- +|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.68%) |Training time=0.50s (21.06%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.61 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3792|ppo_ep: 1|act_loss: 0.039764404296875|cri_loss: 0.0226287841796875|unsuper_loss: 0.0 +average reward score: 5.9375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.92%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3793|ppo_ep: 1|act_loss: -0.01373291015625|cri_loss: -0.006465911865234375|unsuper_loss: 0.0 +average reward score: 5.77734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.47s (21.95%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3794|ppo_ep: 1|act_loss: -0.0089263916015625|cri_loss: -0.003650665283203125|unsuper_loss: 0.0 +average reward score: 5.875 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.16%) |Training time=0.47s (21.27%) |Others=0.17 (7.57%)|CurSamplesPerSec=14.38 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3795|ppo_ep: 1|act_loss: -0.058349609375|cri_loss: -0.0270233154296875|unsuper_loss: 0.0 +average reward score: 4.83203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.48s (22.06%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3796|ppo_ep: 1|act_loss: -0.0509033203125|cri_loss: -0.024871826171875|unsuper_loss: 0.0 +average reward score: 4.83984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.62%) |Training time=0.47s (21.83%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3797|ppo_ep: 1|act_loss: -0.01873779296875|cri_loss: -0.0084228515625|unsuper_loss: 0.0 +average reward score: 5.01171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.22%) |Training time=0.48s (22.15%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3798|ppo_ep: 1|act_loss: -0.00775909423828125|cri_loss: -0.0035953521728515625|unsuper_loss: 0.0 +average reward score: 5.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.34%) |Training time=0.48s (22.12%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +[2023-04-14 11:07:37,337] [INFO] [logging.py:96:log_dist] [Rank 0] step=3800, skipped=52, lr=[5.623797452588428e-06, 5.623797452588428e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:07:37,355] [INFO] [timer.py:199:stop] epoch=0/micro_step=3800/global_step=3800, RunningAvgSamplesPerSec=105.48203854925829, CurrSamplesPerSec=105.53218149137692, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:07:37,448] [INFO] [logging.py:96:log_dist] [Rank 0] step=3800, skipped=59, lr=[2.9205276913952023e-06, 2.9205276913952023e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3799|ppo_ep: 1|act_loss: 0.007511138916015625|cri_loss: 0.00420379638671875|unsuper_loss: 0.0 +average reward score: 4.9296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.74%) |Training time=0.47s (21.63%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3800|ppo_ep: 1|act_loss: 0.02117919921875|cri_loss: 0.01085662841796875|unsuper_loss: 0.0 +average reward score: 6.04296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.68%) |Training time=0.47s (21.53%) |Others=0.10 (4.79%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3801|ppo_ep: 1|act_loss: 0.028350830078125|cri_loss: 0.01506805419921875|unsuper_loss: 0.0 +average reward score: 4.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.64s (71.46%) |Training time=0.56s (24.16%) |Others=0.10 (4.39%)|CurSamplesPerSec=13.92 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3802|ppo_ep: 1|act_loss: 0.019287109375|cri_loss: 0.010101318359375|unsuper_loss: 0.0 +average reward score: 5.16015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.47%) |Training time=0.47s (21.98%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3803|ppo_ep: 1|act_loss: 0.031219482421875|cri_loss: 0.016265869140625|unsuper_loss: 0.0 +average reward score: 4.6953125 +------------------------------------------------------------------------------------- +|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.59s (67.29%) |Training time=0.47s (19.70%) |Others=0.31 (13.01%)|CurSamplesPerSec=13.52 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3804|ppo_ep: 1|act_loss: -0.0006227493286132812|cri_loss: 0.0002913475036621094|unsuper_loss: 0.0 +average reward score: 6.16796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.76%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3805|ppo_ep: 1|act_loss: 0.031890869140625|cri_loss: 0.0168304443359375|unsuper_loss: 0.0 +average reward score: 5.765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.19%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3806|ppo_ep: 1|act_loss: -0.005077362060546875|cri_loss: -0.0024261474609375|unsuper_loss: 0.0 +average reward score: 5.09765625 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.60%) |Training time=0.50s (21.22%) |Others=0.10 (4.18%)|CurSamplesPerSec=13.54 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3807|ppo_ep: 1|act_loss: 0.024078369140625|cri_loss: 0.0128631591796875|unsuper_loss: 0.0 +average reward score: 5.3828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.78%) |Training time=0.46s (21.57%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3808|ppo_ep: 1|act_loss: -0.0394287109375|cri_loss: -0.01849365234375|unsuper_loss: 0.0 +average reward score: 5.20703125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.98%) |Training time=0.46s (21.45%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +[2023-04-14 11:07:59,978] [INFO] [logging.py:96:log_dist] [Rank 0] step=3810, skipped=52, lr=[5.605471709000069e-06, 5.605471709000069e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:07:59,996] [INFO] [timer.py:199:stop] epoch=0/micro_step=3810/global_step=3810, RunningAvgSamplesPerSec=105.47664181442124, CurrSamplesPerSec=143.0142483899703, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:08:00,088] [INFO] [logging.py:96:log_dist] [Rank 0] step=3810, skipped=59, lr=[2.911036767029578e-06, 2.911036767029578e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3809|ppo_ep: 1|act_loss: -0.0309906005859375|cri_loss: -0.0133514404296875|unsuper_loss: 0.0 +average reward score: 5.75 +------------------------------------------------------------------------------------- +|E2E latency=2.07s |Gather latency=0.00s (0.00%) |Generate time=1.59s (76.55%) |Training time=0.39s (18.63%) |Others=0.10 (4.82%)|CurSamplesPerSec=15.43 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3810|ppo_ep: 1|act_loss: 0.0113983154296875|cri_loss: 0.006107330322265625|unsuper_loss: 0.0 +average reward score: 6.3203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.37%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3811|ppo_ep: 1|act_loss: 0.0013666152954101562|cri_loss: 0.00107574462890625|unsuper_loss: 0.0 +average reward score: 6.32421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.84%) |Training time=0.46s (21.51%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3812|ppo_ep: 1|act_loss: 2.09808349609375e-05|cri_loss: 0.001407623291015625|unsuper_loss: 0.0 +average reward score: 4.8203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.17%) |Training time=0.46s (21.25%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3813|ppo_ep: 1|act_loss: 0.0174560546875|cri_loss: 0.00921630859375|unsuper_loss: 0.0 +average reward score: 5.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.46s (21.56%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3814|ppo_ep: 1|act_loss: 0.01617431640625|cri_loss: 0.0084228515625|unsuper_loss: 0.0 +average reward score: 5.453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.83%) |Training time=0.46s (21.60%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3815|ppo_ep: 1|act_loss: -0.023834228515625|cri_loss: -0.01117706298828125|unsuper_loss: 0.0 +average reward score: 4.58984375 +------------------------------------------------------------------------------------- +|E2E latency=2.94s |Gather latency=0.00s (0.00%) |Generate time=1.59s (54.07%) |Training time=0.46s (15.80%) |Others=0.89 (30.13%)|CurSamplesPerSec=10.89 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3816|ppo_ep: 1|act_loss: 0.005046844482421875|cri_loss: 0.003204345703125|unsuper_loss: 0.0 +average reward score: 4.73046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.69%) |Training time=0.45s (20.73%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3817|ppo_ep: 1|act_loss: 0.00411224365234375|cri_loss: 0.005214691162109375|unsuper_loss: 0.0 +average reward score: 5.6796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.89%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3818|ppo_ep: 1|act_loss: 0.0379638671875|cri_loss: 0.0204315185546875|unsuper_loss: 0.0 +average reward score: 4.8203125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.64%) |Training time=0.45s (20.85%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.46 +[2023-04-14 11:08:22,322] [INFO] [logging.py:96:log_dist] [Rank 0] step=3820, skipped=52, lr=[5.587134396938199e-06, 5.587134396938199e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:08:22,340] [INFO] [timer.py:199:stop] epoch=0/micro_step=3820/global_step=3820, RunningAvgSamplesPerSec=105.48597808754263, CurrSamplesPerSec=126.9696646911081, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:08:22,433] [INFO] [logging.py:96:log_dist] [Rank 0] step=3820, skipped=59, lr=[2.9015397501074932e-06, 2.9015397501074932e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3819|ppo_ep: 1|act_loss: -0.00598907470703125|cri_loss: -0.00261688232421875|unsuper_loss: 0.0 +average reward score: 5.70703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.24%) |Training time=0.42s (19.22%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46 +[2023-04-14 11:08:24,724] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 3820|ppo_ep: 1|act_loss: -0.0023822784423828125|cri_loss: -0.0002593994140625|unsuper_loss: 0.0 +average reward score: 5.48828125 +------------------------------------------------------------------------------------- +|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.61s (70.45%) |Training time=0.59s (25.67%) |Others=0.09 (3.87%)|CurSamplesPerSec=13.97 |AvgSamplesPerSec=14.46 +[2023-04-14 11:08:26,907] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 3821|ppo_ep: 1|act_loss: -0.03167724609375|cri_loss: -0.015045166015625|unsuper_loss: 0.0 +average reward score: 4.75 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.45%) |Training time=0.47s (21.52%) |Others=0.09 (4.03%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3822|ppo_ep: 1|act_loss: -0.001605987548828125|cri_loss: -0.0004248619079589844|unsuper_loss: 0.0 +average reward score: 4.37890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.65%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3823|ppo_ep: 1|act_loss: -0.0477294921875|cri_loss: -0.022308349609375|unsuper_loss: 0.0 +average reward score: 4.9765625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.28%) |Training time=0.48s (22.10%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3824|ppo_ep: 1|act_loss: 0.05181884765625|cri_loss: 0.028594970703125|unsuper_loss: 0.0 +average reward score: 5.33984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.62%) |Training time=0.45s (20.83%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3825|ppo_ep: 1|act_loss: -0.0362548828125|cri_loss: -0.0172576904296875|unsuper_loss: 0.0 +average reward score: 5.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.52%) |Training time=0.48s (21.83%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3826|ppo_ep: 1|act_loss: -0.0181884765625|cri_loss: -0.00885009765625|unsuper_loss: 0.0 +average reward score: 6.09375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.04%) |Training time=0.47s (21.43%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3827|ppo_ep: 1|act_loss: 0.01497650146484375|cri_loss: 0.00775909423828125|unsuper_loss: 0.0 +average reward score: 6.15234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.56%) |Training time=0.47s (21.80%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3828|ppo_ep: 1|act_loss: -0.00310516357421875|cri_loss: -0.001155853271484375|unsuper_loss: 0.0 +average reward score: 5.66015625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.95%) |Training time=0.49s (22.46%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.46 +[2023-04-14 11:08:44,218] [INFO] [logging.py:96:log_dist] [Rank 0] step=3830, skipped=52, lr=[5.568785788206016e-06, 5.568785788206016e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:08:45,110] [INFO] [timer.py:199:stop] epoch=0/micro_step=3830/global_step=3830, RunningAvgSamplesPerSec=105.39178371825996, CurrSamplesPerSec=26.9550695045682, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:08:45,204] [INFO] [logging.py:96:log_dist] [Rank 0] step=3830, skipped=61, lr=[2.8939378445227608e-06, 2.8939378445227608e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3829|ppo_ep: 1|act_loss: 0.023345947265625|cri_loss: 0.01226043701171875|unsuper_loss: 0.0 +average reward score: 4.90234375 +------------------------------------------------------------------------------------- +|E2E latency=3.07s |Gather latency=0.00s (0.00%) |Generate time=1.61s (52.64%) |Training time=1.35s (44.08%) |Others=0.10 (3.28%)|CurSamplesPerSec=10.43 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3830|ppo_ep: 1|act_loss: 0.0284423828125|cri_loss: 0.0149993896484375|unsuper_loss: 0.0 +average reward score: 6.21875 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.77%) |Training time=0.48s (20.91%) |Others=0.10 (4.32%)|CurSamplesPerSec=14.05 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3831|ppo_ep: 1|act_loss: 0.025787353515625|cri_loss: 0.01403045654296875|unsuper_loss: 0.0 +average reward score: 5.296875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.22%) |Training time=0.48s (22.17%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3832|ppo_ep: 1|act_loss: 0.03790283203125|cri_loss: 0.01959228515625|unsuper_loss: 0.0 +average reward score: 5.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.76%) |Training time=0.47s (21.72%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3833|ppo_ep: 1|act_loss: 0.0158233642578125|cri_loss: 0.00843048095703125|unsuper_loss: 0.0 +average reward score: 5.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.90%) |Training time=0.46s (21.39%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3834|ppo_ep: 1|act_loss: -0.0014019012451171875|cri_loss: 6.198883056640625e-05|unsuper_loss: 0.0 +average reward score: 5.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.95%) |Training time=0.47s (21.50%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3835|ppo_ep: 1|act_loss: -0.026611328125|cri_loss: -0.0128631591796875|unsuper_loss: 0.0 +average reward score: 6.39453125 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.04%) |Training time=0.46s (19.65%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.65 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3836|ppo_ep: 1|act_loss: -0.03594970703125|cri_loss: -0.0166778564453125|unsuper_loss: 0.0 +average reward score: 5.04296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.72%) |Training time=0.47s (21.74%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3837|ppo_ep: 1|act_loss: 0.0117034912109375|cri_loss: 0.008209228515625|unsuper_loss: 0.0 +average reward score: 5.5546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.28%) |Training time=0.48s (22.18%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3838|ppo_ep: 1|act_loss: -0.00482940673828125|cri_loss: -0.002147674560546875|unsuper_loss: 0.0 +average reward score: 5.25390625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.04%) |Training time=0.49s (22.35%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46 +[2023-04-14 11:09:07,082] [INFO] [logging.py:96:log_dist] [Rank 0] step=3840, skipped=52, lr=[5.550426154774167e-06, 5.550426154774167e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:09:07,100] [INFO] [timer.py:199:stop] epoch=0/micro_step=3840/global_step=3840, RunningAvgSamplesPerSec=105.38504342863314, CurrSamplesPerSec=101.54163110909366, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:09:07,193] [INFO] [logging.py:96:log_dist] [Rank 0] step=3840, skipped=61, lr=[2.8844302157955294e-06, 2.8844302157955294e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3839|ppo_ep: 1|act_loss: 0.00982666015625|cri_loss: 0.005313873291015625|unsuper_loss: 0.0 +average reward score: 5.8515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.48s (22.10%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3840|ppo_ep: 1|act_loss: -0.0025463104248046875|cri_loss: -0.000339508056640625|unsuper_loss: 0.0 +average reward score: 5.328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (22.08%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3841|ppo_ep: 1|act_loss: 0.02935791015625|cri_loss: 0.01509857177734375|unsuper_loss: 0.0 +average reward score: 4.37890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.30%) |Training time=0.48s (22.09%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3842|ppo_ep: 1|act_loss: 0.0176239013671875|cri_loss: 0.00960540771484375|unsuper_loss: 0.0 +average reward score: 5.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.29%) |Training time=0.48s (22.17%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3843|ppo_ep: 1|act_loss: 0.036529541015625|cri_loss: 0.018768310546875|unsuper_loss: 0.0 +average reward score: 5.59765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.28%) |Training time=0.48s (22.09%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3844|ppo_ep: 1|act_loss: 0.022552490234375|cri_loss: 0.01169586181640625|unsuper_loss: 0.0 +average reward score: 5.55859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.28%) |Training time=0.48s (22.18%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3845|ppo_ep: 1|act_loss: 0.029296875|cri_loss: 0.01503753662109375|unsuper_loss: 0.0 +average reward score: 4.78515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.20%) |Training time=0.48s (22.27%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3846|ppo_ep: 1|act_loss: -0.00392913818359375|cri_loss: -0.001155853271484375|unsuper_loss: 0.0 +average reward score: 5.19140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.99%) |Training time=0.49s (22.48%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3847|ppo_ep: 1|act_loss: 0.02215576171875|cri_loss: 0.011810302734375|unsuper_loss: 0.0 +average reward score: 5.90234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.41%) |Training time=0.47s (21.94%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3848|ppo_ep: 1|act_loss: -0.0194549560546875|cri_loss: -0.00933074951171875|unsuper_loss: 0.0 +average reward score: 5.67578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.14%) |Training time=0.48s (22.34%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +[2023-04-14 11:09:28,729] [INFO] [logging.py:96:log_dist] [Rank 0] step=3850, skipped=52, lr=[5.5320557687767085e-06, 5.5320557687767085e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:09:28,747] [INFO] [timer.py:199:stop] epoch=0/micro_step=3850/global_step=3850, RunningAvgSamplesPerSec=105.37252725245156, CurrSamplesPerSec=102.7891293792571, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:09:28,840] [INFO] [logging.py:96:log_dist] [Rank 0] step=3850, skipped=61, lr=[2.8749168888851126e-06, 2.8749168888851126e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3849|ppo_ep: 1|act_loss: 0.01404571533203125|cri_loss: 0.007328033447265625|unsuper_loss: 0.0 +average reward score: 4.97265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.47s (21.93%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3850|ppo_ep: 1|act_loss: 0.01168060302734375|cri_loss: 0.00693511962890625|unsuper_loss: 0.0 +average reward score: 5.19140625 +------------------------------------------------------------------------------------- +|E2E latency=3.28s |Gather latency=0.00s (0.00%) |Generate time=1.76s (53.80%) |Training time=0.48s (14.71%) |Others=1.03 (31.49%)|CurSamplesPerSec=9.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3851|ppo_ep: 1|act_loss: 0.00482177734375|cri_loss: 0.0029544830322265625|unsuper_loss: 0.0 +average reward score: 5.19140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.51%) |Training time=0.47s (21.84%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3852|ppo_ep: 1|act_loss: -0.00400543212890625|cri_loss: -0.0009441375732421875|unsuper_loss: 0.0 +average reward score: 5.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.09%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3853|ppo_ep: 1|act_loss: 0.00644683837890625|cri_loss: 0.003875732421875|unsuper_loss: 0.0 +average reward score: 6.10546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.19%) |Training time=0.48s (22.18%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3854|ppo_ep: 1|act_loss: 0.0124053955078125|cri_loss: 0.00745391845703125|unsuper_loss: 0.0 +average reward score: 5.50390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.30%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3855|ppo_ep: 1|act_loss: 0.0143280029296875|cri_loss: 0.007793426513671875|unsuper_loss: 0.0 +average reward score: 6.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.47%) |Training time=0.48s (21.93%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3856|ppo_ep: 1|act_loss: 0.018798828125|cri_loss: 0.00968170166015625|unsuper_loss: 0.0 +average reward score: 5.70703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.47s (21.98%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3857|ppo_ep: 1|act_loss: 0.09454345703125|cri_loss: 0.0516357421875|unsuper_loss: 0.0 +average reward score: 5.06640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.30%) |Training time=0.48s (22.06%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3858|ppo_ep: 1|act_loss: 0.0253448486328125|cri_loss: 0.01302337646484375|unsuper_loss: 0.0 +average reward score: 5.0 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.37%) |Training time=0.49s (22.16%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.46 +[2023-04-14 11:09:51,625] [INFO] [logging.py:96:log_dist] [Rank 0] step=3860, skipped=52, lr=[5.513674902507077e-06, 5.513674902507077e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:09:51,643] [INFO] [timer.py:199:stop] epoch=0/micro_step=3860/global_step=3860, RunningAvgSamplesPerSec=105.36149393062692, CurrSamplesPerSec=103.64847575007799, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:09:51,736] [INFO] [logging.py:96:log_dist] [Rank 0] step=3860, skipped=61, lr=[2.8653980048019654e-06, 2.8653980048019654e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3859|ppo_ep: 1|act_loss: -0.00516510009765625|cri_loss: -0.002277374267578125|unsuper_loss: 0.0 +average reward score: 5.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.76%) |Training time=0.47s (20.83%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3860|ppo_ep: 1|act_loss: -0.0166778564453125|cri_loss: -0.007465362548828125|unsuper_loss: 0.0 +average reward score: 5.73046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.38%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3861|ppo_ep: 1|act_loss: -0.0172576904296875|cri_loss: -0.0084228515625|unsuper_loss: 0.0 +average reward score: 3.95703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.26%) |Training time=0.48s (22.13%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3862|ppo_ep: 1|act_loss: 0.013824462890625|cri_loss: 0.008209228515625|unsuper_loss: 0.0 +average reward score: 4.9921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (22.09%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3863|ppo_ep: 1|act_loss: -0.019683837890625|cri_loss: -0.00853729248046875|unsuper_loss: 0.0 +average reward score: 5.33984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (21.95%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3864|ppo_ep: 1|act_loss: 0.01343536376953125|cri_loss: 0.006969451904296875|unsuper_loss: 0.0 +average reward score: 4.765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.80%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3865|ppo_ep: 1|act_loss: -0.0426025390625|cri_loss: -0.0206146240234375|unsuper_loss: 0.0 +average reward score: 6.3671875 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.00%) |Training time=0.46s (19.77%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3866|ppo_ep: 1|act_loss: 0.01137542724609375|cri_loss: 0.0087432861328125|unsuper_loss: 0.0 +average reward score: 4.78515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.08%) |Training time=0.49s (22.41%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3867|ppo_ep: 1|act_loss: 0.05126953125|cri_loss: 0.027069091796875|unsuper_loss: 0.0 +average reward score: 4.73046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.85%) |Training time=0.49s (22.61%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3868|ppo_ep: 1|act_loss: 0.019317626953125|cri_loss: 0.01029205322265625|unsuper_loss: 0.0 +average reward score: 5.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.73%) |Training time=0.49s (22.73%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +[2023-04-14 11:10:13,439] [INFO] [logging.py:96:log_dist] [Rank 0] step=3870, skipped=52, lr=[5.495283828414054e-06, 5.495283828414054e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:10:13,457] [INFO] [timer.py:199:stop] epoch=0/micro_step=3870/global_step=3870, RunningAvgSamplesPerSec=105.35058199086996, CurrSamplesPerSec=98.91628601455396, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:10:13,549] [INFO] [logging.py:96:log_dist] [Rank 0] step=3870, skipped=61, lr=[2.855873704638912e-06, 2.855873704638912e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3869|ppo_ep: 1|act_loss: 0.003063201904296875|cri_loss: 0.00177764892578125|unsuper_loss: 0.0 +average reward score: 5.06640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.89%) |Training time=0.49s (22.49%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3870|ppo_ep: 1|act_loss: 0.069091796875|cri_loss: 0.035552978515625|unsuper_loss: 0.0 +average reward score: 6.2109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.81%) |Training time=0.49s (22.66%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3871|ppo_ep: 1|act_loss: 0.02947998046875|cri_loss: 0.01541900634765625|unsuper_loss: 0.0 +average reward score: 5.1953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.79%) |Training time=0.49s (22.59%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3872|ppo_ep: 1|act_loss: -0.00611114501953125|cri_loss: -0.002147674560546875|unsuper_loss: 0.0 +average reward score: 6.3359375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.30%) |Training time=0.48s (22.20%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3873|ppo_ep: 1|act_loss: -0.0058135986328125|cri_loss: -0.00225830078125|unsuper_loss: 0.0 +average reward score: 4.65625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.90%) |Training time=0.49s (22.46%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3874|ppo_ep: 1|act_loss: -0.0141143798828125|cri_loss: -0.00676727294921875|unsuper_loss: 0.0 +average reward score: 5.3828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.48s (22.40%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3875|ppo_ep: 1|act_loss: 0.0024662017822265625|cri_loss: 0.0014801025390625|unsuper_loss: 0.0 +average reward score: 4.8828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.18%) |Training time=0.48s (22.29%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3876|ppo_ep: 1|act_loss: -0.0263214111328125|cri_loss: -0.01288604736328125|unsuper_loss: 0.0 +average reward score: 6.01953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.27%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3877|ppo_ep: 1|act_loss: 0.031494140625|cri_loss: 0.017913818359375|unsuper_loss: 0.0 +average reward score: 5.48828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.31%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3878|ppo_ep: 1|act_loss: 0.002197265625|cri_loss: 0.0018177032470703125|unsuper_loss: 0.0 +average reward score: 4.921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.49s (22.36%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +[2023-04-14 11:10:35,093] [INFO] [logging.py:96:log_dist] [Rank 0] step=3880, skipped=52, lr=[5.476882819097722e-06, 5.476882819097722e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:10:35,112] [INFO] [timer.py:199:stop] epoch=0/micro_step=3880/global_step=3880, RunningAvgSamplesPerSec=105.33392161732702, CurrSamplesPerSec=100.01730920417752, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:10:35,204] [INFO] [logging.py:96:log_dist] [Rank 0] step=3880, skipped=61, lr=[2.846344129569057e-06, 2.846344129569057e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3879|ppo_ep: 1|act_loss: -0.0154266357421875|cri_loss: -0.007171630859375|unsuper_loss: 0.0 +average reward score: 5.5 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.49s (22.42%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3880|ppo_ep: 1|act_loss: 0.00566864013671875|cri_loss: 0.00415802001953125|unsuper_loss: 0.0 +average reward score: 4.89453125 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.17%) |Training time=0.48s (20.61%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3881|ppo_ep: 1|act_loss: 0.030059814453125|cri_loss: 0.015655517578125|unsuper_loss: 0.0 +average reward score: 5.875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.21%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3882|ppo_ep: 1|act_loss: -0.04449462890625|cri_loss: -0.0208892822265625|unsuper_loss: 0.0 +average reward score: 5.44140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.26%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3883|ppo_ep: 1|act_loss: -0.00847625732421875|cri_loss: -0.00353240966796875|unsuper_loss: 0.0 +average reward score: 5.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.99s |Gather latency=0.00s (0.00%) |Generate time=1.61s (53.79%) |Training time=0.47s (15.83%) |Others=0.91 (30.38%)|CurSamplesPerSec=10.71 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3884|ppo_ep: 1|act_loss: -0.033355712890625|cri_loss: -0.01551055908203125|unsuper_loss: 0.0 +average reward score: 5.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.20%) |Training time=0.48s (22.18%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3885|ppo_ep: 1|act_loss: 0.0021610260009765625|cri_loss: 0.0013217926025390625|unsuper_loss: 0.0 +average reward score: 5.03125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.67%) |Training time=0.47s (21.66%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3886|ppo_ep: 1|act_loss: 0.01102447509765625|cri_loss: 0.005680084228515625|unsuper_loss: 0.0 +average reward score: 5.15234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.66%) |Training time=0.47s (21.74%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3887|ppo_ep: 1|act_loss: -0.0107879638671875|cri_loss: -0.004840850830078125|unsuper_loss: 0.0 +average reward score: 4.83203125 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.11%) |Training time=0.47s (21.33%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3888|ppo_ep: 1|act_loss: 0.03143310546875|cri_loss: 0.0162811279296875|unsuper_loss: 0.0 +average reward score: 5.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.67%) |Training time=0.47s (20.90%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.12 |AvgSamplesPerSec=14.46 +[2023-04-14 11:10:57,877] [INFO] [logging.py:96:log_dist] [Rank 0] step=3890, skipped=52, lr=[5.4584721473054284e-06, 5.4584721473054284e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:10:57,896] [INFO] [timer.py:199:stop] epoch=0/micro_step=3890/global_step=3890, RunningAvgSamplesPerSec=105.32536333025105, CurrSamplesPerSec=101.78936631216996, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:10:57,988] [INFO] [logging.py:96:log_dist] [Rank 0] step=3890, skipped=61, lr=[2.836809420843692e-06, 2.836809420843692e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3889|ppo_ep: 1|act_loss: 0.041259765625|cri_loss: 0.0216522216796875|unsuper_loss: 0.0 +average reward score: 5.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.33%) |Training time=0.48s (22.06%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3890|ppo_ep: 1|act_loss: -0.0309295654296875|cri_loss: -0.014801025390625|unsuper_loss: 0.0 +average reward score: 5.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.29%) |Training time=0.48s (22.07%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3891|ppo_ep: 1|act_loss: 0.016632080078125|cri_loss: 0.00905609130859375|unsuper_loss: 0.0 +average reward score: 5.7109375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.46%) |Training time=0.47s (21.91%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3892|ppo_ep: 1|act_loss: 0.07305908203125|cri_loss: 0.03955078125|unsuper_loss: 0.0 +average reward score: 5.05859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.21%) |Training time=0.48s (22.18%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3893|ppo_ep: 1|act_loss: -0.0031833648681640625|cri_loss: -0.00118255615234375|unsuper_loss: 0.0 +average reward score: 4.765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.48s (21.98%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3894|ppo_ep: 1|act_loss: -0.0030231475830078125|cri_loss: -0.0012531280517578125|unsuper_loss: 0.0 +average reward score: 5.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.20%) |Training time=0.48s (22.18%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3895|ppo_ep: 1|act_loss: -0.056243896484375|cri_loss: -0.0274505615234375|unsuper_loss: 0.0 +average reward score: 4.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.51%) |Training time=0.47s (20.18%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3896|ppo_ep: 1|act_loss: -0.024444580078125|cri_loss: -0.0117950439453125|unsuper_loss: 0.0 +average reward score: 5.8984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.77%) |Training time=0.47s (21.59%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3897|ppo_ep: 1|act_loss: 0.00799560546875|cri_loss: 0.004730224609375|unsuper_loss: 0.0 +average reward score: 5.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.86%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3898|ppo_ep: 1|act_loss: 0.0122528076171875|cri_loss: 0.006862640380859375|unsuper_loss: 0.0 +average reward score: 5.33984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.29%) |Training time=0.48s (22.09%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +[2023-04-14 11:11:19,675] [INFO] [logging.py:96:log_dist] [Rank 0] step=3900, skipped=52, lr=[5.440052085927744e-06, 5.440052085927744e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:11:19,693] [INFO] [timer.py:199:stop] epoch=0/micro_step=3900/global_step=3900, RunningAvgSamplesPerSec=105.3175314438355, CurrSamplesPerSec=102.41601813027702, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:11:19,786] [INFO] [logging.py:96:log_dist] [Rank 0] step=3900, skipped=61, lr=[2.827269719790202e-06, 2.827269719790202e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3899|ppo_ep: 1|act_loss: -0.003170013427734375|cri_loss: -0.0006551742553710938|unsuper_loss: 0.0 +average reward score: 5.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.47s (21.98%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3900|ppo_ep: 1|act_loss: 0.026153564453125|cri_loss: 0.0141448974609375|unsuper_loss: 0.0 +average reward score: 5.56640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.47%) |Training time=0.47s (21.91%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3901|ppo_ep: 1|act_loss: 0.0148773193359375|cri_loss: 0.0092315673828125|unsuper_loss: 0.0 +average reward score: 5.57421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.26%) |Training time=0.48s (22.13%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3902|ppo_ep: 1|act_loss: 0.0027313232421875|cri_loss: 0.00159454345703125|unsuper_loss: 0.0 +average reward score: 6.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (21.97%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3903|ppo_ep: 1|act_loss: -0.05126953125|cri_loss: -0.024444580078125|unsuper_loss: 0.0 +average reward score: 4.95703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (22.00%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3904|ppo_ep: 1|act_loss: 0.018585205078125|cri_loss: 0.0104217529296875|unsuper_loss: 0.0 +average reward score: 5.40625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.47%) |Training time=0.47s (21.91%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3905|ppo_ep: 1|act_loss: -0.03497314453125|cri_loss: -0.0162200927734375|unsuper_loss: 0.0 +average reward score: 5.390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.47s (21.93%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3906|ppo_ep: 1|act_loss: -0.01453399658203125|cri_loss: -0.00646209716796875|unsuper_loss: 0.0 +average reward score: 6.3203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.85%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3907|ppo_ep: 1|act_loss: -0.013519287109375|cri_loss: -0.00604248046875|unsuper_loss: 0.0 +average reward score: 5.9375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.64%) |Others=0.10 (4.80%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3908|ppo_ep: 1|act_loss: 0.0009617805480957031|cri_loss: 0.0007853507995605469|unsuper_loss: 0.0 +average reward score: 5.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.84%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +[2023-04-14 11:11:41,331] [INFO] [logging.py:96:log_dist] [Rank 0] step=3910, skipped=52, lr=[5.421622907994414e-06, 5.421622907994414e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:11:41,349] [INFO] [timer.py:199:stop] epoch=0/micro_step=3910/global_step=3910, RunningAvgSamplesPerSec=105.31036604765642, CurrSamplesPerSec=103.50970950158406, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:11:41,442] [INFO] [logging.py:96:log_dist] [Rank 0] step=3910, skipped=61, lr=[2.8177251678099694e-06, 2.8177251678099694e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3909|ppo_ep: 1|act_loss: 0.005268096923828125|cri_loss: 0.003101348876953125|unsuper_loss: 0.0 +average reward score: 5.37109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.81%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3910|ppo_ep: 1|act_loss: 0.0406494140625|cri_loss: 0.021026611328125|unsuper_loss: 0.0 +average reward score: 5.24609375 +------------------------------------------------------------------------------------- +|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.03%) |Training time=0.46s (19.70%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.59 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3911|ppo_ep: 1|act_loss: 0.0535888671875|cri_loss: 0.0287322998046875|unsuper_loss: 0.0 +average reward score: 5.9296875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.18%) |Training time=0.46s (21.21%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3912|ppo_ep: 1|act_loss: -0.007843017578125|cri_loss: -0.0031261444091796875|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.84%) |Training time=0.47s (21.53%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3913|ppo_ep: 1|act_loss: 0.02130126953125|cri_loss: 0.01087188720703125|unsuper_loss: 0.0 +average reward score: 5.41796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.11%) |Training time=0.46s (21.23%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3914|ppo_ep: 1|act_loss: -0.03924560546875|cri_loss: -0.017730712890625|unsuper_loss: 0.0 +average reward score: 6.35546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.70%) |Training time=0.47s (21.69%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3915|ppo_ep: 1|act_loss: -0.000682830810546875|cri_loss: 0.001865386962890625|unsuper_loss: 0.0 +average reward score: 4.71875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.56%) |Training time=0.45s (20.80%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3916|ppo_ep: 1|act_loss: -0.022735595703125|cri_loss: -0.00836181640625|unsuper_loss: 0.0 +average reward score: 5.5390625 +------------------------------------------------------------------------------------- +|E2E latency=3.21s |Gather latency=0.00s (0.00%) |Generate time=1.61s (50.36%) |Training time=0.47s (14.53%) |Others=1.13 (35.11%)|CurSamplesPerSec=9.98 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3917|ppo_ep: 1|act_loss: 0.05841064453125|cri_loss: 0.03216552734375|unsuper_loss: 0.0 +average reward score: 4.9921875 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.23%) |Training time=0.46s (20.33%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.13 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3918|ppo_ep: 1|act_loss: 0.02142333984375|cri_loss: 0.0140228271484375|unsuper_loss: 0.0 +average reward score: 5.390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.46s (21.38%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +[2023-04-14 11:12:04,287] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, but hysteresis is 2. Reducing hysteresis to 1 +[2023-04-14 11:12:04,287] [INFO] [logging.py:96:log_dist] [Rank 0] step=3920, skipped=53, lr=[5.405029078967381e-06, 5.405029078967381e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:12:04,288] [INFO] [timer.py:199:stop] epoch=0/micro_step=3920/global_step=3920, RunningAvgSamplesPerSec=105.31687299274512, CurrSamplesPerSec=117.73918688232374, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:12:04,379] [INFO] [logging.py:96:log_dist] [Rank 0] step=3920, skipped=61, lr=[2.8081759063762797e-06, 2.8081759063762797e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3919|ppo_ep: 1|act_loss: 0.022247314453125|cri_loss: 0.0158843994140625|unsuper_loss: 0.0 +average reward score: 4.6015625 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.96%) |Training time=0.43s (20.35%) |Others=0.10 (4.68%)|CurSamplesPerSec=15.02 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3920|ppo_ep: 1|act_loss: 0.0309906005859375|cri_loss: 0.016754150390625|unsuper_loss: 0.0 +average reward score: 6.3125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.30%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3921|ppo_ep: 1|act_loss: 0.02581787109375|cri_loss: 0.01375579833984375|unsuper_loss: 0.0 +average reward score: 4.296875 +------------------------------------------------------------------------------------- +|E2E latency=2.85s |Gather latency=0.00s (0.00%) |Generate time=1.60s (56.01%) |Training time=0.46s (16.14%) |Others=0.79 (27.85%)|CurSamplesPerSec=11.22 |AvgSamplesPerSec=14.46 +[2023-04-14 11:12:11,547] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 3922|ppo_ep: 1|act_loss: -0.020843505859375|cri_loss: -0.00952911376953125|unsuper_loss: 0.0 +average reward score: 5.50390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.40%) |Training time=0.46s (21.43%) |Others=0.09 (4.17%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46 +[2023-04-14 11:12:13,588] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384 +[2023-04-14 11:12:13,672] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 3923|ppo_ep: 1|act_loss: -0.0687255859375|cri_loss: -0.0307464599609375|unsuper_loss: 0.0 +average reward score: 5.453125 +------------------------------------------------------------------------------------- +|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.32%) |Training time=0.43s (20.47%) |Others=0.09 (4.21%)|CurSamplesPerSec=15.06 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3924|ppo_ep: 1|act_loss: 0.0050811767578125|cri_loss: 0.0030078887939453125|unsuper_loss: 0.0 +average reward score: 5.58984375 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.60s (70.56%) |Training time=0.57s (25.04%) |Others=0.10 (4.40%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3925|ppo_ep: 1|act_loss: -0.13818359375|cri_loss: -0.047271728515625|unsuper_loss: 0.0 +average reward score: 6.3828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.01%) |Training time=0.44s (20.35%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3926|ppo_ep: 1|act_loss: -0.06524658203125|cri_loss: -0.027069091796875|unsuper_loss: 0.0 +average reward score: 5.17578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.21%) |Training time=0.46s (21.15%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3927|ppo_ep: 1|act_loss: -0.07269287109375|cri_loss: -0.0286712646484375|unsuper_loss: 0.0 +average reward score: 4.8046875 +------------------------------------------------------------------------------------- +|E2E latency=2.82s |Gather latency=0.00s (0.00%) |Generate time=1.60s (56.85%) |Training time=0.46s (16.34%) |Others=0.76 (26.80%)|CurSamplesPerSec=11.36 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3928|ppo_ep: 1|act_loss: 0.0650634765625|cri_loss: 0.034942626953125|unsuper_loss: 0.0 +average reward score: 4.80859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.30%) |Training time=0.45s (21.06%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +[2023-04-14 11:12:27,276] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192 +[2023-04-14 11:12:27,277] [INFO] [logging.py:96:log_dist] [Rank 0] step=3930, skipped=55, lr=[5.390273156270772e-06, 5.390273156270772e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:12:27,277] [INFO] [timer.py:199:stop] epoch=0/micro_step=3930/global_step=3930, RunningAvgSamplesPerSec=105.3224964159603, CurrSamplesPerSec=117.4507008482986, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:12:27,362] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384 +[2023-04-14 11:12:27,362] [INFO] [logging.py:96:log_dist] [Rank 0] step=3930, skipped=64, lr=[2.8014886970414734e-06, 2.8014886970414734e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3929|ppo_ep: 1|act_loss: 0.0504150390625|cri_loss: 0.028167724609375|unsuper_loss: 0.0 +average reward score: 4.67578125 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.29%) |Training time=0.43s (20.42%) |Others=0.09 (4.29%)|CurSamplesPerSec=15.05 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3930|ppo_ep: 1|act_loss: 0.089111328125|cri_loss: 0.047210693359375|unsuper_loss: 0.0 +average reward score: 5.7421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.84%) |Training time=0.47s (21.55%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3931|ppo_ep: 1|act_loss: -0.0012054443359375|cri_loss: 0.0017375946044921875|unsuper_loss: 0.0 +average reward score: 4.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.02%) |Training time=0.46s (21.35%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3932|ppo_ep: 1|act_loss: 0.01161956787109375|cri_loss: 0.006717681884765625|unsuper_loss: 0.0 +average reward score: 4.55859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.21%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3933|ppo_ep: 1|act_loss: 0.048187255859375|cri_loss: 0.025787353515625|unsuper_loss: 0.0 +average reward score: 5.55078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.99%) |Training time=0.46s (21.39%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3934|ppo_ep: 1|act_loss: 0.11151123046875|cri_loss: 0.0634765625|unsuper_loss: 0.0 +average reward score: 4.71484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.41%) |Training time=0.45s (20.95%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3935|ppo_ep: 1|act_loss: -0.0048980712890625|cri_loss: 0.000118255615234375|unsuper_loss: 0.0 +average reward score: 4.828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.25%) |Training time=0.46s (21.11%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3936|ppo_ep: 1|act_loss: 0.01910400390625|cri_loss: 0.026641845703125|unsuper_loss: 0.0 +average reward score: 5.4921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.46%) |Training time=0.45s (20.88%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3937|ppo_ep: 1|act_loss: -0.007503509521484375|cri_loss: -0.001201629638671875|unsuper_loss: 0.0 +average reward score: 3.75 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.38%) |Training time=0.46s (21.04%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.46 +[2023-04-14 11:12:46,720] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096 +epoch: 0|step: 3938|ppo_ep: 1|act_loss: -0.015869140625|cri_loss: -0.006702423095703125|unsuper_loss: 0.0 +average reward score: 5.1328125 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.17%) |Training time=0.43s (20.13%) |Others=0.10 (4.70%)|CurSamplesPerSec=15.03 |AvgSamplesPerSec=14.46 +[2023-04-14 11:12:49,033] [INFO] [logging.py:96:log_dist] [Rank 0] step=3940, skipped=56, lr=[5.373666340493332e-06, 5.373666340493332e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:12:49,083] [INFO] [timer.py:199:stop] epoch=0/micro_step=3940/global_step=3940, RunningAvgSamplesPerSec=105.32905620645766, CurrSamplesPerSec=98.3321840781513, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:12:49,175] [INFO] [logging.py:96:log_dist] [Rank 0] step=3940, skipped=64, lr=[2.7919317544146405e-06, 2.7919317544146405e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3939|ppo_ep: 1|act_loss: 0.0667724609375|cri_loss: 0.049713134765625|unsuper_loss: 0.0 +average reward score: 4.6953125 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.08%) |Training time=0.49s (20.67%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.54 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3940|ppo_ep: 1|act_loss: -0.027984619140625|cri_loss: -0.01288604736328125|unsuper_loss: 0.0 +average reward score: 4.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.83%) |Training time=0.47s (21.55%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3941|ppo_ep: 1|act_loss: -0.0123443603515625|cri_loss: -0.005859375|unsuper_loss: 0.0 +average reward score: 4.54296875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.66%) |Training time=0.47s (21.74%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3942|ppo_ep: 1|act_loss: -0.01959228515625|cri_loss: -0.00902557373046875|unsuper_loss: 0.0 +average reward score: 4.09765625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.84%) |Training time=0.47s (21.54%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3943|ppo_ep: 1|act_loss: -0.01531982421875|cri_loss: -0.005657196044921875|unsuper_loss: 0.0 +average reward score: 4.59765625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.75%) |Training time=0.47s (21.63%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3944|ppo_ep: 1|act_loss: 0.011962890625|cri_loss: 0.018524169921875|unsuper_loss: 0.0 +average reward score: 4.62109375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.12%) |Training time=0.46s (21.29%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3945|ppo_ep: 1|act_loss: 0.092529296875|cri_loss: 0.0631103515625|unsuper_loss: 0.0 +average reward score: 4.703125 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.28%) |Training time=0.47s (21.18%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3946|ppo_ep: 1|act_loss: -0.0811767578125|cri_loss: -0.03668212890625|unsuper_loss: 0.0 +average reward score: 5.59765625 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.83%) |Training time=0.47s (20.57%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3947|ppo_ep: 1|act_loss: -0.0601806640625|cri_loss: -0.0255279541015625|unsuper_loss: 0.0 +average reward score: 4.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.46%) |Training time=0.48s (21.93%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3948|ppo_ep: 1|act_loss: 0.005146026611328125|cri_loss: 0.005069732666015625|unsuper_loss: 0.0 +average reward score: 5.078125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.57%) |Training time=0.48s (21.82%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.46 +[2023-04-14 11:13:10,960] [INFO] [logging.py:96:log_dist] [Rank 0] step=3950, skipped=56, lr=[5.355206605693846e-06, 5.355206605693846e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:13:10,979] [INFO] [timer.py:199:stop] epoch=0/micro_step=3950/global_step=3950, RunningAvgSamplesPerSec=105.32556865211501, CurrSamplesPerSec=104.41765903501886, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:13:11,071] [INFO] [logging.py:96:log_dist] [Rank 0] step=3950, skipped=64, lr=[2.7823704846549614e-06, 2.7823704846549614e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3949|ppo_ep: 1|act_loss: -0.02288818359375|cri_loss: -0.0092010498046875|unsuper_loss: 0.0 +average reward score: 5.62109375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.86%) |Training time=0.47s (21.55%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3950|ppo_ep: 1|act_loss: 0.00533294677734375|cri_loss: 0.0029754638671875|unsuper_loss: 0.0 +average reward score: 4.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.56%) |Training time=0.48s (21.85%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3951|ppo_ep: 1|act_loss: -0.02294921875|cri_loss: -0.0074462890625|unsuper_loss: 0.0 +average reward score: 5.00390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.66%) |Training time=0.47s (21.73%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3952|ppo_ep: 1|act_loss: -0.01934814453125|cri_loss: -0.00791168212890625|unsuper_loss: 0.0 +average reward score: 5.703125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.62%) |Training time=0.48s (21.79%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3953|ppo_ep: 1|act_loss: -0.001678466796875|cri_loss: 0.00296783447265625|unsuper_loss: 0.0 +average reward score: 4.6640625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.53%) |Training time=0.48s (21.89%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3954|ppo_ep: 1|act_loss: 0.05401611328125|cri_loss: 0.03033447265625|unsuper_loss: 0.0 +average reward score: 5.73828125 +------------------------------------------------------------------------------------- +|E2E latency=2.88s |Gather latency=0.00s (0.00%) |Generate time=1.77s (61.51%) |Training time=0.47s (16.29%) |Others=0.64 (22.20%)|CurSamplesPerSec=11.12 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3955|ppo_ep: 1|act_loss: 0.0494384765625|cri_loss: 0.028076171875|unsuper_loss: 0.0 +average reward score: 4.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.24%) |Training time=0.46s (21.09%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3956|ppo_ep: 1|act_loss: 0.08917236328125|cri_loss: 0.04974365234375|unsuper_loss: 0.0 +average reward score: 4.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.84%) |Training time=0.47s (21.55%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3957|ppo_ep: 1|act_loss: 0.04241943359375|cri_loss: 0.024505615234375|unsuper_loss: 0.0 +average reward score: 4.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.91%) |Training time=0.47s (21.49%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3958|ppo_ep: 1|act_loss: 0.0198974609375|cri_loss: 0.01181793212890625|unsuper_loss: 0.0 +average reward score: 4.52734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.76%) |Training time=0.47s (21.63%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +[2023-04-14 11:13:33,404] [INFO] [logging.py:96:log_dist] [Rank 0] step=3960, skipped=56, lr=[5.3367390119534095e-06, 5.3367390119534095e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:13:33,423] [INFO] [timer.py:199:stop] epoch=0/micro_step=3960/global_step=3960, RunningAvgSamplesPerSec=105.32290912174726, CurrSamplesPerSec=106.21121211185998, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:13:33,516] [INFO] [logging.py:96:log_dist] [Rank 0] step=3960, skipped=64, lr=[2.772805029483517e-06, 2.772805029483517e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3959|ppo_ep: 1|act_loss: -0.020965576171875|cri_loss: -0.0093231201171875|unsuper_loss: 0.0 +average reward score: 5.3671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.01%) |Training time=0.46s (21.39%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3960|ppo_ep: 1|act_loss: 0.0175323486328125|cri_loss: 0.0121612548828125|unsuper_loss: 0.0 +average reward score: 5.56640625 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.61s (69.47%) |Training time=0.47s (20.32%) |Others=0.24 (10.21%)|CurSamplesPerSec=13.85 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3961|ppo_ep: 1|act_loss: -0.018310546875|cri_loss: -0.00521087646484375|unsuper_loss: 0.0 +average reward score: 5.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.82%) |Training time=0.47s (21.58%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3962|ppo_ep: 1|act_loss: 0.04412841796875|cri_loss: 0.03289794921875|unsuper_loss: 0.0 +average reward score: 5.0859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.29%) |Training time=0.46s (21.09%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3963|ppo_ep: 1|act_loss: 0.0105743408203125|cri_loss: 0.006397247314453125|unsuper_loss: 0.0 +average reward score: 5.08203125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.66%) |Training time=0.47s (21.74%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3964|ppo_ep: 1|act_loss: 0.006893157958984375|cri_loss: 0.008575439453125|unsuper_loss: 0.0 +average reward score: 4.8203125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.35%) |Training time=0.46s (21.06%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3965|ppo_ep: 1|act_loss: 0.0237274169921875|cri_loss: 0.01345062255859375|unsuper_loss: 0.0 +average reward score: 5.015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.07%) |Training time=0.46s (21.18%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3966|ppo_ep: 1|act_loss: 0.03277587890625|cri_loss: 0.0184478759765625|unsuper_loss: 0.0 +average reward score: 5.08203125 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (72.99%) |Training time=0.45s (20.54%) |Others=0.14 (6.47%)|CurSamplesPerSec=14.46 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3967|ppo_ep: 1|act_loss: 0.0333251953125|cri_loss: 0.0197906494140625|unsuper_loss: 0.0 +average reward score: 4.6328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.10%) |Training time=0.46s (21.29%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3968|ppo_ep: 1|act_loss: -0.0061798095703125|cri_loss: -0.0029392242431640625|unsuper_loss: 0.0 +average reward score: 5.046875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.95%) |Training time=0.47s (21.43%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46 +[2023-04-14 11:13:55,489] [INFO] [logging.py:96:log_dist] [Rank 0] step=3970, skipped=56, lr=[5.318263833006314e-06, 5.318263833006314e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:13:55,507] [INFO] [timer.py:199:stop] epoch=0/micro_step=3970/global_step=3970, RunningAvgSamplesPerSec=105.32585319821645, CurrSamplesPerSec=109.12267687612147, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:13:55,603] [INFO] [logging.py:96:log_dist] [Rank 0] step=3970, skipped=64, lr=[2.7632355306834306e-06, 2.7632355306834306e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3969|ppo_ep: 1|act_loss: 0.009552001953125|cri_loss: 0.0053863525390625|unsuper_loss: 0.0 +average reward score: 4.78515625 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.08%) |Training time=0.46s (19.51%) |Others=0.10 (4.41%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3970|ppo_ep: 1|act_loss: 0.018157958984375|cri_loss: 0.0101318359375|unsuper_loss: 0.0 +average reward score: 4.83203125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.63%) |Training time=0.48s (21.76%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3971|ppo_ep: 1|act_loss: 0.004344940185546875|cri_loss: 0.00278472900390625|unsuper_loss: 0.0 +average reward score: 5.15625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.04%) |Training time=0.46s (21.28%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3972|ppo_ep: 1|act_loss: -0.0034942626953125|cri_loss: -0.001590728759765625|unsuper_loss: 0.0 +average reward score: 5.06640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.27%) |Training time=0.46s (21.08%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3973|ppo_ep: 1|act_loss: -0.0035877227783203125|cri_loss: -0.0016889572143554688|unsuper_loss: 0.0 +average reward score: 4.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.66%) |Training time=0.48s (21.76%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3974|ppo_ep: 1|act_loss: 0.02191162109375|cri_loss: 0.016937255859375|unsuper_loss: 0.0 +average reward score: 4.9609375 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.75%) |Training time=0.48s (21.73%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.40 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3975|ppo_ep: 1|act_loss: -0.0308837890625|cri_loss: -0.01500701904296875|unsuper_loss: 0.0 +average reward score: 5.4921875 +------------------------------------------------------------------------------------- +|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.38%) |Training time=0.49s (21.23%) |Others=0.10 (4.39%)|CurSamplesPerSec=13.99 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3976|ppo_ep: 1|act_loss: 0.0119476318359375|cri_loss: 0.008209228515625|unsuper_loss: 0.0 +average reward score: 5.3515625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.28%) |Training time=0.48s (22.08%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3977|ppo_ep: 1|act_loss: -0.05389404296875|cri_loss: -0.0259246826171875|unsuper_loss: 0.0 +average reward score: 4.953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.31%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3978|ppo_ep: 1|act_loss: -0.01357269287109375|cri_loss: -0.006313323974609375|unsuper_loss: 0.0 +average reward score: 5.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.33%) |Training time=0.48s (22.08%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.46 +[2023-04-14 11:14:17,440] [INFO] [logging.py:96:log_dist] [Rank 0] step=3980, skipped=56, lr=[5.2997813426992765e-06, 5.2997813426992765e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:14:17,459] [INFO] [timer.py:199:stop] epoch=0/micro_step=3980/global_step=3980, RunningAvgSamplesPerSec=105.31787614664505, CurrSamplesPerSec=97.28826867151642, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:14:17,551] [INFO] [logging.py:96:log_dist] [Rank 0] step=3980, skipped=64, lr=[2.753662130097758e-06, 2.753662130097758e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3979|ppo_ep: 1|act_loss: 0.04278564453125|cri_loss: 0.0223236083984375|unsuper_loss: 0.0 +average reward score: 5.42578125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.05%) |Training time=0.49s (22.40%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3980|ppo_ep: 1|act_loss: 0.0094451904296875|cri_loss: 0.0048828125|unsuper_loss: 0.0 +average reward score: 5.40625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.54%) |Training time=0.48s (21.88%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3981|ppo_ep: 1|act_loss: 0.016387939453125|cri_loss: 0.00946044921875|unsuper_loss: 0.0 +average reward score: 5.8203125 +------------------------------------------------------------------------------------- +|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.60s (69.80%) |Training time=0.47s (20.64%) |Others=0.22 (9.56%)|CurSamplesPerSec=13.98 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3982|ppo_ep: 1|act_loss: 0.06060791015625|cri_loss: 0.032379150390625|unsuper_loss: 0.0 +average reward score: 4.15234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.68%) |Training time=0.47s (21.71%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3983|ppo_ep: 1|act_loss: 0.026519775390625|cri_loss: 0.01392364501953125|unsuper_loss: 0.0 +average reward score: 5.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.41%) |Training time=0.54s (24.12%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.30 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3984|ppo_ep: 1|act_loss: -0.050628662109375|cri_loss: -0.02349853515625|unsuper_loss: 0.0 +average reward score: 4.87890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.92%) |Training time=0.46s (21.45%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3985|ppo_ep: 1|act_loss: -0.0207672119140625|cri_loss: -0.01001739501953125|unsuper_loss: 0.0 +average reward score: 3.953125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.48%) |Training time=0.48s (21.90%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3986|ppo_ep: 1|act_loss: 0.05877685546875|cri_loss: 0.03070068359375|unsuper_loss: 0.0 +average reward score: 4.40234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.15%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3987|ppo_ep: 1|act_loss: -0.003879547119140625|cri_loss: -0.0012159347534179688|unsuper_loss: 0.0 +average reward score: 6.18359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.90%) |Training time=0.47s (21.50%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3988|ppo_ep: 1|act_loss: -0.00310516357421875|cri_loss: -0.0013723373413085938|unsuper_loss: 0.0 +average reward score: 5.609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.72%) |Training time=0.47s (21.66%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +[2023-04-14 11:14:39,322] [INFO] [logging.py:96:log_dist] [Rank 0] step=3990, skipped=56, lr=[5.281291814987394e-06, 5.281291814987394e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:14:39,341] [INFO] [timer.py:199:stop] epoch=0/micro_step=3990/global_step=3990, RunningAvgSamplesPerSec=105.31172796465408, CurrSamplesPerSec=106.26090811783111, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:14:39,433] [INFO] [logging.py:96:log_dist] [Rank 0] step=3990, skipped=64, lr=[2.7440849696273907e-06, 2.7440849696273907e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3989|ppo_ep: 1|act_loss: -0.0141143798828125|cri_loss: -0.0065765380859375|unsuper_loss: 0.0 +average reward score: 5.0 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.97%) |Training time=0.46s (21.42%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3990|ppo_ep: 1|act_loss: -0.00800323486328125|cri_loss: -0.003459930419921875|unsuper_loss: 0.0 +average reward score: 5.8984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.78%) |Training time=0.47s (21.60%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3991|ppo_ep: 1|act_loss: -0.013031005859375|cri_loss: -0.00531005859375|unsuper_loss: 0.0 +average reward score: 5.6953125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.02%) |Training time=0.47s (21.40%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3992|ppo_ep: 1|act_loss: 0.0122833251953125|cri_loss: 0.0067596435546875|unsuper_loss: 0.0 +average reward score: 5.18359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.97%) |Training time=0.46s (21.39%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3993|ppo_ep: 1|act_loss: 0.0008268356323242188|cri_loss: 0.0008487701416015625|unsuper_loss: 0.0 +average reward score: 5.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.04%) |Training time=0.46s (21.35%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3994|ppo_ep: 1|act_loss: 0.050140380859375|cri_loss: 0.02655029296875|unsuper_loss: 0.0 +average reward score: 5.16796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.30%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3995|ppo_ep: 1|act_loss: 0.032928466796875|cri_loss: 0.0180206298828125|unsuper_loss: 0.0 +average reward score: 4.8046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.13%) |Training time=0.46s (21.25%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3996|ppo_ep: 1|act_loss: -0.0299072265625|cri_loss: -0.01325225830078125|unsuper_loss: 0.0 +average reward score: 5.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.32%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3997|ppo_ep: 1|act_loss: -0.0191192626953125|cri_loss: -0.00928497314453125|unsuper_loss: 0.0 +average reward score: 5.83203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.02%) |Training time=0.46s (21.36%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 3998|ppo_ep: 1|act_loss: 0.00289154052734375|cri_loss: 0.00174713134765625|unsuper_loss: 0.0 +average reward score: 5.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.30%) |Training time=0.48s (20.45%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.58 |AvgSamplesPerSec=14.46 +[2023-04-14 11:15:01,190] [INFO] [logging.py:96:log_dist] [Rank 0] step=4000, skipped=56, lr=[5.262795523930069e-06, 5.262795523930069e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:15:01,208] [INFO] [timer.py:199:stop] epoch=0/micro_step=4000/global_step=4000, RunningAvgSamplesPerSec=105.31314272642126, CurrSamplesPerSec=108.65087362544847, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:15:01,301] [INFO] [logging.py:96:log_dist] [Rank 0] step=4000, skipped=64, lr=[2.73450419122895e-06, 2.73450419122895e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 3999|ppo_ep: 1|act_loss: 0.01080322265625|cri_loss: 0.00595855712890625|unsuper_loss: 0.0 +average reward score: 4.98046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.21%) |Training time=0.46s (21.16%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4000|ppo_ep: 1|act_loss: 0.00574493408203125|cri_loss: 0.0033092498779296875|unsuper_loss: 0.0 +average reward score: 6.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.83%) |Training time=0.47s (21.55%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4001|ppo_ep: 1|act_loss: 0.040130615234375|cri_loss: 0.021820068359375|unsuper_loss: 0.0 +average reward score: 4.5703125 +------------------------------------------------------------------------------------- +|E2E latency=2.87s |Gather latency=0.00s (0.00%) |Generate time=1.61s (55.99%) |Training time=0.46s (16.10%) |Others=0.80 (27.91%)|CurSamplesPerSec=11.16 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4002|ppo_ep: 1|act_loss: -0.0129547119140625|cri_loss: -0.005870819091796875|unsuper_loss: 0.0 +average reward score: 5.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.33%) |Training time=0.46s (21.05%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4003|ppo_ep: 1|act_loss: 0.00415802001953125|cri_loss: 0.0026340484619140625|unsuper_loss: 0.0 +average reward score: 4.9140625 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.92%) |Training time=0.47s (21.43%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4004|ppo_ep: 1|act_loss: 0.003192901611328125|cri_loss: 0.0031108856201171875|unsuper_loss: 0.0 +average reward score: 5.65234375 +------------------------------------------------------------------------------------- +|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.17%) |Training time=0.49s (21.44%) |Others=0.10 (4.38%)|CurSamplesPerSec=13.98 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4005|ppo_ep: 1|act_loss: 0.03448486328125|cri_loss: 0.017974853515625|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.87%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4006|ppo_ep: 1|act_loss: 0.00943756103515625|cri_loss: 0.005268096923828125|unsuper_loss: 0.0 +average reward score: 5.8828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.24%) |Training time=0.46s (21.13%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4007|ppo_ep: 1|act_loss: 0.0208740234375|cri_loss: 0.0106964111328125|unsuper_loss: 0.0 +average reward score: 4.76953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (21.96%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4008|ppo_ep: 1|act_loss: 0.01227569580078125|cri_loss: 0.006473541259765625|unsuper_loss: 0.0 +average reward score: 4.453125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.56%) |Training time=0.48s (21.84%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46 +[2023-04-14 11:15:23,749] [INFO] [logging.py:96:log_dist] [Rank 0] step=4010, skipped=56, lr=[5.244292743686956e-06, 5.244292743686956e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:15:23,767] [INFO] [timer.py:199:stop] epoch=0/micro_step=4010/global_step=4010, RunningAvgSamplesPerSec=105.30904361496287, CurrSamplesPerSec=102.18848468098487, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:15:23,859] [INFO] [logging.py:96:log_dist] [Rank 0] step=4010, skipped=64, lr=[2.7249199369126855e-06, 2.7249199369126855e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4009|ppo_ep: 1|act_loss: -0.009063720703125|cri_loss: -0.0037689208984375|unsuper_loss: 0.0 +average reward score: 5.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (22.00%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4010|ppo_ep: 1|act_loss: -0.0294189453125|cri_loss: -0.0133209228515625|unsuper_loss: 0.0 +average reward score: 5.39453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.29%) |Training time=0.48s (22.09%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4011|ppo_ep: 1|act_loss: 0.0192413330078125|cri_loss: 0.010101318359375|unsuper_loss: 0.0 +average reward score: 4.64453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.47s (21.60%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4012|ppo_ep: 1|act_loss: -0.01611328125|cri_loss: -0.00748443603515625|unsuper_loss: 0.0 +average reward score: 5.3515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.90%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4013|ppo_ep: 1|act_loss: -0.01181793212890625|cri_loss: -0.0032501220703125|unsuper_loss: 0.0 +average reward score: 4.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.58%) |Training time=0.50s (21.18%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.56 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4014|ppo_ep: 1|act_loss: -0.007022857666015625|cri_loss: -0.002872467041015625|unsuper_loss: 0.0 +average reward score: 6.125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.38%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4015|ppo_ep: 1|act_loss: 0.009246826171875|cri_loss: 0.0051422119140625|unsuper_loss: 0.0 +average reward score: 5.23828125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.24%) |Training time=0.46s (21.13%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4016|ppo_ep: 1|act_loss: 0.0111846923828125|cri_loss: 0.00629425048828125|unsuper_loss: 0.0 +average reward score: 5.8828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.26%) |Training time=0.48s (22.14%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4017|ppo_ep: 1|act_loss: 0.0282135009765625|cri_loss: 0.015533447265625|unsuper_loss: 0.0 +average reward score: 4.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.46%) |Training time=0.47s (21.92%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4018|ppo_ep: 1|act_loss: 0.022796630859375|cri_loss: 0.01273345947265625|unsuper_loss: 0.0 +average reward score: 5.46484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.27%) |Training time=0.48s (22.12%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +[2023-04-14 11:15:45,614] [INFO] [logging.py:96:log_dist] [Rank 0] step=4020, skipped=56, lr=[5.2257837485138914e-06, 5.2257837485138914e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:15:45,632] [INFO] [timer.py:199:stop] epoch=0/micro_step=4020/global_step=4020, RunningAvgSamplesPerSec=105.30119557757467, CurrSamplesPerSec=102.19229714995767, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:15:45,725] [INFO] [logging.py:96:log_dist] [Rank 0] step=4020, skipped=64, lr=[2.7153323487403653e-06, 2.7153323487403653e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4019|ppo_ep: 1|act_loss: 0.0105133056640625|cri_loss: 0.006256103515625|unsuper_loss: 0.0 +average reward score: 6.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.46%) |Training time=0.48s (21.95%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4020|ppo_ep: 1|act_loss: -0.015472412109375|cri_loss: -0.007476806640625|unsuper_loss: 0.0 +average reward score: 5.1796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.54%) |Training time=0.47s (21.84%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4021|ppo_ep: 1|act_loss: -0.0288848876953125|cri_loss: -0.0139923095703125|unsuper_loss: 0.0 +average reward score: 5.26171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.69%) |Training time=0.47s (21.69%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4022|ppo_ep: 1|act_loss: -0.01552581787109375|cri_loss: -0.0073699951171875|unsuper_loss: 0.0 +average reward score: 5.39453125 +------------------------------------------------------------------------------------- +|E2E latency=2.78s |Gather latency=0.00s (0.00%) |Generate time=1.59s (57.30%) |Training time=0.47s (16.97%) |Others=0.71 (25.73%)|CurSamplesPerSec=11.52 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4023|ppo_ep: 1|act_loss: -0.01531982421875|cri_loss: -0.00725555419921875|unsuper_loss: 0.0 +average reward score: 5.74609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.46%) |Training time=0.48s (21.92%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4024|ppo_ep: 1|act_loss: 0.029022216796875|cri_loss: 0.015380859375|unsuper_loss: 0.0 +average reward score: 5.20703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.38%) |Training time=0.45s (20.97%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4025|ppo_ep: 1|act_loss: -0.012115478515625|cri_loss: -0.0057373046875|unsuper_loss: 0.0 +average reward score: 5.8125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.74%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4026|ppo_ep: 1|act_loss: -0.0033702850341796875|cri_loss: -0.0012416839599609375|unsuper_loss: 0.0 +average reward score: 4.88671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.48s (21.95%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4027|ppo_ep: 1|act_loss: 0.007076263427734375|cri_loss: 0.00379180908203125|unsuper_loss: 0.0 +average reward score: 4.6015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.58%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4028|ppo_ep: 1|act_loss: -0.0110015869140625|cri_loss: -0.0049896240234375|unsuper_loss: 0.0 +average reward score: 5.2734375 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.54%) |Training time=0.50s (21.21%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.55 |AvgSamplesPerSec=14.46 +[2023-04-14 11:16:08,106] [INFO] [logging.py:96:log_dist] [Rank 0] step=4030, skipped=56, lr=[5.207268812758837e-06, 5.207268812758837e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:16:08,125] [INFO] [timer.py:199:stop] epoch=0/micro_step=4030/global_step=4030, RunningAvgSamplesPerSec=105.29472441602836, CurrSamplesPerSec=101.67324172839862, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:16:08,218] [INFO] [logging.py:96:log_dist] [Rank 0] step=4030, skipped=64, lr=[2.7057415688231765e-06, 2.7057415688231765e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4029|ppo_ep: 1|act_loss: 0.039093017578125|cri_loss: 0.020843505859375|unsuper_loss: 0.0 +average reward score: 4.5703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (21.98%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4030|ppo_ep: 1|act_loss: -0.017974853515625|cri_loss: -0.00848388671875|unsuper_loss: 0.0 +average reward score: 5.48828125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.23%) |Training time=0.48s (22.17%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4031|ppo_ep: 1|act_loss: -0.046630859375|cri_loss: -0.021697998046875|unsuper_loss: 0.0 +average reward score: 5.17578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.95%) |Training time=0.47s (21.43%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4032|ppo_ep: 1|act_loss: -0.0113372802734375|cri_loss: -0.004917144775390625|unsuper_loss: 0.0 +average reward score: 4.6640625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.03%) |Training time=0.47s (21.40%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4033|ppo_ep: 1|act_loss: 0.027557373046875|cri_loss: 0.0144195556640625|unsuper_loss: 0.0 +average reward score: 5.30859375 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.60%) |Training time=0.47s (20.97%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.13 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4034|ppo_ep: 1|act_loss: -0.0010023117065429688|cri_loss: -0.00019073486328125|unsuper_loss: 0.0 +average reward score: 5.515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.51%) |Training time=0.47s (21.86%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4035|ppo_ep: 1|act_loss: -0.023345947265625|cri_loss: -0.0111083984375|unsuper_loss: 0.0 +average reward score: 5.25390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.81%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4036|ppo_ep: 1|act_loss: 0.03778076171875|cri_loss: 0.0196075439453125|unsuper_loss: 0.0 +average reward score: 4.6953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.35%) |Training time=0.48s (22.04%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4037|ppo_ep: 1|act_loss: 0.00446319580078125|cri_loss: 0.0024566650390625|unsuper_loss: 0.0 +average reward score: 5.546875 +------------------------------------------------------------------------------------- +|E2E latency=2.63s |Gather latency=0.00s (0.00%) |Generate time=1.59s (60.51%) |Training time=0.47s (17.72%) |Others=0.57 (21.77%)|CurSamplesPerSec=12.18 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4038|ppo_ep: 1|act_loss: -0.009307861328125|cri_loss: -0.0042724609375|unsuper_loss: 0.0 +average reward score: 4.84765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.91%) |Training time=0.46s (21.45%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +[2023-04-14 11:16:30,361] [INFO] [logging.py:96:log_dist] [Rank 0] step=4040, skipped=56, lr=[5.188748210857804e-06, 5.188748210857804e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:16:30,379] [INFO] [timer.py:199:stop] epoch=0/micro_step=4040/global_step=4040, RunningAvgSamplesPerSec=105.28961572115509, CurrSamplesPerSec=100.88236204918982, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:16:30,472] [INFO] [logging.py:96:log_dist] [Rank 0] step=4040, skipped=64, lr=[2.696147739319613e-06, 2.696147739319613e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4039|ppo_ep: 1|act_loss: 0.0245361328125|cri_loss: 0.01309967041015625|unsuper_loss: 0.0 +average reward score: 5.0078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.21%) |Training time=0.48s (22.19%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4040|ppo_ep: 1|act_loss: -0.0211181640625|cri_loss: -0.01006317138671875|unsuper_loss: 0.0 +average reward score: 5.4453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.73%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4041|ppo_ep: 1|act_loss: -0.00572967529296875|cri_loss: -0.0024394989013671875|unsuper_loss: 0.0 +average reward score: 6.11328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.48s (22.00%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4042|ppo_ep: 1|act_loss: 6.723403930664062e-05|cri_loss: 0.0003829002380371094|unsuper_loss: 0.0 +average reward score: 4.8515625 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.70%) |Training time=0.51s (23.09%) |Others=0.12 (5.22%)|CurSamplesPerSec=14.45 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4043|ppo_ep: 1|act_loss: 0.01611328125|cri_loss: 0.0083465576171875|unsuper_loss: 0.0 +average reward score: 5.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.72%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4044|ppo_ep: 1|act_loss: 0.017364501953125|cri_loss: 0.009002685546875|unsuper_loss: 0.0 +average reward score: 5.78515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.86%) |Training time=0.47s (21.47%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4045|ppo_ep: 1|act_loss: 0.04150390625|cri_loss: 0.021636962890625|unsuper_loss: 0.0 +average reward score: 6.04296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.49%) |Training time=0.44s (20.25%) |Others=0.11 (5.25%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4046|ppo_ep: 1|act_loss: 0.03741455078125|cri_loss: 0.0192413330078125|unsuper_loss: 0.0 +average reward score: 5.203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.79%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4047|ppo_ep: 1|act_loss: 0.017913818359375|cri_loss: 0.009490966796875|unsuper_loss: 0.0 +average reward score: 5.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.71%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4048|ppo_ep: 1|act_loss: 0.023406982421875|cri_loss: 0.0120697021484375|unsuper_loss: 0.0 +average reward score: 7.00390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.51%) |Training time=0.47s (21.87%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +[2023-04-14 11:16:52,062] [INFO] [logging.py:96:log_dist] [Rank 0] step=4050, skipped=56, lr=[5.170222217330791e-06, 5.170222217330791e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:16:52,081] [INFO] [timer.py:199:stop] epoch=0/micro_step=4050/global_step=4050, RunningAvgSamplesPerSec=105.28449685090004, CurrSamplesPerSec=100.30200816658396, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:16:52,173] [INFO] [logging.py:96:log_dist] [Rank 0] step=4050, skipped=64, lr=[2.686551002433372e-06, 2.686551002433372e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4049|ppo_ep: 1|act_loss: 0.0283660888671875|cri_loss: 0.01503753662109375|unsuper_loss: 0.0 +average reward score: 4.09375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.19%) |Training time=0.48s (22.21%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4050|ppo_ep: 1|act_loss: -0.0017862319946289062|cri_loss: -0.00014972686767578125|unsuper_loss: 0.0 +average reward score: 4.4296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.47s (21.60%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4051|ppo_ep: 1|act_loss: -0.0023956298828125|cri_loss: -0.0001583099365234375|unsuper_loss: 0.0 +average reward score: 4.73828125 +------------------------------------------------------------------------------------- +|E2E latency=2.80s |Gather latency=0.00s (0.00%) |Generate time=1.59s (56.88%) |Training time=0.47s (16.86%) |Others=0.73 (26.26%)|CurSamplesPerSec=11.44 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4052|ppo_ep: 1|act_loss: -0.0013427734375|cri_loss: 0.0010967254638671875|unsuper_loss: 0.0 +average reward score: 4.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.39%) |Training time=0.47s (21.94%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4053|ppo_ep: 1|act_loss: -0.03289794921875|cri_loss: -0.015869140625|unsuper_loss: 0.0 +average reward score: 5.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.40%) |Training time=0.47s (21.95%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4054|ppo_ep: 1|act_loss: -0.0076904296875|cri_loss: -0.0036258697509765625|unsuper_loss: 0.0 +average reward score: 4.6484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.05%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4055|ppo_ep: 1|act_loss: 0.014862060546875|cri_loss: 0.007785797119140625|unsuper_loss: 0.0 +average reward score: 5.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.48s (22.29%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4056|ppo_ep: 1|act_loss: 0.003627777099609375|cri_loss: 0.00479888916015625|unsuper_loss: 0.0 +average reward score: 5.81640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.47s (21.91%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4057|ppo_ep: 1|act_loss: -0.003406524658203125|cri_loss: -0.0008668899536132812|unsuper_loss: 0.0 +average reward score: 5.44921875 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.02%) |Training time=0.48s (20.69%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4058|ppo_ep: 1|act_loss: 0.0282440185546875|cri_loss: 0.0143585205078125|unsuper_loss: 0.0 +average reward score: 5.64453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.25%) |Training time=0.48s (22.11%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +[2023-04-14 11:17:14,465] [INFO] [logging.py:96:log_dist] [Rank 0] step=4060, skipped=56, lr=[5.151691106777714e-06, 5.151691106777714e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:17:14,483] [INFO] [timer.py:199:stop] epoch=0/micro_step=4060/global_step=4060, RunningAvgSamplesPerSec=105.27758348616538, CurrSamplesPerSec=104.85874689254413, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:17:14,576] [INFO] [logging.py:96:log_dist] [Rank 0] step=4060, skipped=64, lr=[2.6769515004112453e-06, 2.6769515004112453e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4059|ppo_ep: 1|act_loss: 0.018707275390625|cri_loss: 0.00998687744140625|unsuper_loss: 0.0 +average reward score: 5.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.67%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4060|ppo_ep: 1|act_loss: 0.01556396484375|cri_loss: 0.0082855224609375|unsuper_loss: 0.0 +average reward score: 5.63671875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.35%) |Training time=0.48s (22.00%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4061|ppo_ep: 1|act_loss: -0.0537109375|cri_loss: -0.0259552001953125|unsuper_loss: 0.0 +average reward score: 5.68359375 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.27%) |Training time=0.49s (22.15%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.41 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4062|ppo_ep: 1|act_loss: -0.0125885009765625|cri_loss: -0.005950927734375|unsuper_loss: 0.0 +average reward score: 5.3359375 +------------------------------------------------------------------------------------- +|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.74s (75.58%) |Training time=0.46s (19.99%) |Others=0.10 (4.43%)|CurSamplesPerSec=13.93 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4063|ppo_ep: 1|act_loss: -0.029144287109375|cri_loss: -0.01389312744140625|unsuper_loss: 0.0 +average reward score: 4.890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.42%) |Training time=0.43s (19.88%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4064|ppo_ep: 1|act_loss: -0.018829345703125|cri_loss: -0.0090179443359375|unsuper_loss: 0.0 +average reward score: 4.671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.28%) |Training time=0.44s (20.06%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4065|ppo_ep: 1|act_loss: -0.03253173828125|cri_loss: -0.0152435302734375|unsuper_loss: 0.0 +average reward score: 5.1640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.20%) |Training time=0.46s (21.06%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4066|ppo_ep: 1|act_loss: -0.001811981201171875|cri_loss: -0.0007963180541992188|unsuper_loss: 0.0 +average reward score: 4.7578125 +------------------------------------------------------------------------------------- +|E2E latency=2.40s |Gather latency=0.00s (0.00%) |Generate time=1.61s (67.20%) |Training time=0.46s (19.05%) |Others=0.33 (13.75%)|CurSamplesPerSec=13.34 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4067|ppo_ep: 1|act_loss: -0.01019287109375|cri_loss: -0.004711151123046875|unsuper_loss: 0.0 +average reward score: 4.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.31%) |Training time=0.46s (21.04%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4068|ppo_ep: 1|act_loss: -0.00948333740234375|cri_loss: -0.004062652587890625|unsuper_loss: 0.0 +average reward score: 5.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.09%) |Training time=0.44s (20.23%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +[2023-04-14 11:17:36,585] [INFO] [logging.py:96:log_dist] [Rank 0] step=4070, skipped=56, lr=[5.133155153874335e-06, 5.133155153874335e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:17:36,603] [INFO] [timer.py:199:stop] epoch=0/micro_step=4070/global_step=4070, RunningAvgSamplesPerSec=105.28821586996683, CurrSamplesPerSec=112.36743147915281, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:17:36,695] [INFO] [logging.py:96:log_dist] [Rank 0] step=4070, skipped=64, lr=[2.6673493755410096e-06, 2.6673493755410096e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4069|ppo_ep: 1|act_loss: -0.03155517578125|cri_loss: -0.015045166015625|unsuper_loss: 0.0 +average reward score: 4.50390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.79%) |Training time=0.45s (20.58%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4070|ppo_ep: 1|act_loss: 0.026214599609375|cri_loss: 0.01456451416015625|unsuper_loss: 0.0 +average reward score: 4.82421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.29%) |Training time=0.46s (21.07%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4071|ppo_ep: 1|act_loss: 0.01337432861328125|cri_loss: 0.00728607177734375|unsuper_loss: 0.0 +average reward score: 5.1171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.57%) |Training time=0.45s (20.78%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4072|ppo_ep: 1|act_loss: -0.022918701171875|cri_loss: -0.0107879638671875|unsuper_loss: 0.0 +average reward score: 4.2734375 +------------------------------------------------------------------------------------- +|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.78s (74.87%) |Training time=0.49s (20.81%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.49 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4073|ppo_ep: 1|act_loss: -0.0123291015625|cri_loss: -0.00556182861328125|unsuper_loss: 0.0 +average reward score: 4.3125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.18%) |Training time=0.46s (21.21%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4074|ppo_ep: 1|act_loss: 0.0379638671875|cri_loss: 0.021881103515625|unsuper_loss: 0.0 +average reward score: 4.9921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.55%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4075|ppo_ep: 1|act_loss: -0.0139617919921875|cri_loss: -0.006725311279296875|unsuper_loss: 0.0 +average reward score: 5.1953125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.31%) |Training time=0.48s (22.07%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4076|ppo_ep: 1|act_loss: -0.0021991729736328125|cri_loss: -0.0007162094116210938|unsuper_loss: 0.0 +average reward score: 5.4921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.03%) |Training time=0.46s (21.33%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4077|ppo_ep: 1|act_loss: 0.04083251953125|cri_loss: 0.0215606689453125|unsuper_loss: 0.0 +average reward score: 4.984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.46%) |Training time=0.45s (20.91%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4078|ppo_ep: 1|act_loss: -0.03167724609375|cri_loss: -0.014312744140625|unsuper_loss: 0.0 +average reward score: 5.1171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.30%) |Training time=0.46s (21.06%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +[2023-04-14 11:17:58,512] [INFO] [logging.py:96:log_dist] [Rank 0] step=4080, skipped=56, lr=[5.114614633368193e-06, 5.114614633368193e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:17:58,530] [INFO] [timer.py:199:stop] epoch=0/micro_step=4080/global_step=4080, RunningAvgSamplesPerSec=105.29089003351561, CurrSamplesPerSec=111.21455442164469, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:17:58,622] [INFO] [logging.py:96:log_dist] [Rank 0] step=4080, skipped=64, lr=[2.657744770149321e-06, 2.657744770149321e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4079|ppo_ep: 1|act_loss: -0.0562744140625|cri_loss: -0.0266876220703125|unsuper_loss: 0.0 +average reward score: 5.36328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.57%) |Training time=0.45s (20.80%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4080|ppo_ep: 1|act_loss: -0.01227569580078125|cri_loss: -0.00440216064453125|unsuper_loss: 0.0 +average reward score: 5.19921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.23%) |Training time=0.46s (21.12%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4081|ppo_ep: 1|act_loss: 0.013671875|cri_loss: 0.00727081298828125|unsuper_loss: 0.0 +average reward score: 5.0625 +------------------------------------------------------------------------------------- +|E2E latency=3.27s |Gather latency=0.00s (0.00%) |Generate time=1.60s (48.92%) |Training time=0.47s (14.48%) |Others=1.20 (36.60%)|CurSamplesPerSec=9.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4082|ppo_ep: 1|act_loss: -0.0014829635620117188|cri_loss: 5.340576171875e-05|unsuper_loss: 0.0 +average reward score: 5.01171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.77%) |Training time=0.47s (21.59%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4083|ppo_ep: 1|act_loss: -0.007236480712890625|cri_loss: -0.002948760986328125|unsuper_loss: 0.0 +average reward score: 5.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.58%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4084|ppo_ep: 1|act_loss: 0.002590179443359375|cri_loss: 0.001953125|unsuper_loss: 0.0 +average reward score: 5.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.64%) |Training time=0.47s (21.73%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4085|ppo_ep: 1|act_loss: 0.0026874542236328125|cri_loss: 0.0015869140625|unsuper_loss: 0.0 +average reward score: 4.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.65%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4086|ppo_ep: 1|act_loss: 0.030548095703125|cri_loss: 0.018157958984375|unsuper_loss: 0.0 +average reward score: 4.9609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.55%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4087|ppo_ep: 1|act_loss: -0.02142333984375|cri_loss: -0.01007080078125|unsuper_loss: 0.0 +average reward score: 5.02734375 +------------------------------------------------------------------------------------- +|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.77s (74.70%) |Training time=0.50s (21.09%) |Others=0.10 (4.21%)|CurSamplesPerSec=13.49 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4088|ppo_ep: 1|act_loss: 0.0472412109375|cri_loss: 0.025299072265625|unsuper_loss: 0.0 +average reward score: 4.94921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.56%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +[2023-04-14 11:18:21,553] [INFO] [logging.py:96:log_dist] [Rank 0] step=4090, skipped=56, lr=[5.096069820074525e-06, 5.096069820074525e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:18:21,571] [INFO] [timer.py:199:stop] epoch=0/micro_step=4090/global_step=4090, RunningAvgSamplesPerSec=105.28498538237922, CurrSamplesPerSec=98.45610699342147, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:18:21,664] [INFO] [logging.py:96:log_dist] [Rank 0] step=4090, skipped=64, lr=[2.6481378265995993e-06, 2.6481378265995993e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4089|ppo_ep: 1|act_loss: -0.0207672119140625|cri_loss: -0.0098876953125|unsuper_loss: 0.0 +average reward score: 5.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.24%) |Training time=0.49s (22.18%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4090|ppo_ep: 1|act_loss: -0.00774383544921875|cri_loss: -0.0029888153076171875|unsuper_loss: 0.0 +average reward score: 5.19921875 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.26%) |Training time=0.50s (22.23%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.34 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4091|ppo_ep: 1|act_loss: -0.01397705078125|cri_loss: -0.006793975830078125|unsuper_loss: 0.0 +average reward score: 5.12890625 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.63%) |Training time=0.48s (20.95%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.10 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4092|ppo_ep: 1|act_loss: 0.02203369140625|cri_loss: 0.0111541748046875|unsuper_loss: 0.0 +average reward score: 4.75390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.15%) |Training time=0.45s (21.18%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4093|ppo_ep: 1|act_loss: 0.019866943359375|cri_loss: 0.01026153564453125|unsuper_loss: 0.0 +average reward score: 5.515625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.86%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4094|ppo_ep: 1|act_loss: 0.034454345703125|cri_loss: 0.018218994140625|unsuper_loss: 0.0 +average reward score: 5.3828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.39%) |Training time=0.45s (20.93%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4095|ppo_ep: 1|act_loss: 0.031097412109375|cri_loss: 0.0160675048828125|unsuper_loss: 0.0 +average reward score: 4.54296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.29%) |Training time=0.45s (21.02%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4096|ppo_ep: 1|act_loss: 0.00621795654296875|cri_loss: 0.0036296844482421875|unsuper_loss: 0.0 +average reward score: 5.5 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.18%) |Training time=0.46s (21.17%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4097|ppo_ep: 1|act_loss: -0.00027561187744140625|cri_loss: 0.0006251335144042969|unsuper_loss: 0.0 +average reward score: 5.3828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.46s (21.13%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4098|ppo_ep: 1|act_loss: 0.01508331298828125|cri_loss: 0.00829315185546875|unsuper_loss: 0.0 +average reward score: 5.44921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.48%) |Training time=0.45s (20.86%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46 +[2023-04-14 11:18:43,269] [INFO] [logging.py:96:log_dist] [Rank 0] step=4100, skipped=56, lr=[5.077520988872205e-06, 5.077520988872205e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:18:43,287] [INFO] [timer.py:199:stop] epoch=0/micro_step=4100/global_step=4100, RunningAvgSamplesPerSec=105.28956076846497, CurrSamplesPerSec=102.20793903058447, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:18:43,379] [INFO] [logging.py:96:log_dist] [Rank 0] step=4100, skipped=64, lr=[2.638528687289925e-06, 2.638528687289925e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4099|ppo_ep: 1|act_loss: -0.001491546630859375|cri_loss: -0.0004725456237792969|unsuper_loss: 0.0 +average reward score: 5.9921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.50%) |Training time=0.48s (21.91%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4100|ppo_ep: 1|act_loss: 0.002899169921875|cri_loss: 0.002117156982421875|unsuper_loss: 0.0 +average reward score: 5.15625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.23%) |Training time=0.46s (21.15%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4101|ppo_ep: 1|act_loss: 0.00829315185546875|cri_loss: 0.004711151123046875|unsuper_loss: 0.0 +average reward score: 5.30859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.13%) |Training time=0.44s (20.12%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4102|ppo_ep: 1|act_loss: -0.0052642822265625|cri_loss: -0.00102996826171875|unsuper_loss: 0.0 +average reward score: 4.90234375 +------------------------------------------------------------------------------------- +|E2E latency=2.61s |Gather latency=0.00s (0.00%) |Generate time=1.78s (68.45%) |Training time=0.46s (17.74%) |Others=0.36 (13.81%)|CurSamplesPerSec=12.27 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4103|ppo_ep: 1|act_loss: 0.014617919921875|cri_loss: 0.00801849365234375|unsuper_loss: 0.0 +average reward score: 5.3046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.52%) |Training time=0.43s (19.75%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4104|ppo_ep: 1|act_loss: 0.0107421875|cri_loss: 0.005863189697265625|unsuper_loss: 0.0 +average reward score: 4.99609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.66%) |Training time=0.42s (19.66%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4105|ppo_ep: 1|act_loss: 0.0217437744140625|cri_loss: 0.01155853271484375|unsuper_loss: 0.0 +average reward score: 5.71875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.80%) |Training time=0.44s (20.54%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4106|ppo_ep: 1|act_loss: 0.020599365234375|cri_loss: 0.01052093505859375|unsuper_loss: 0.0 +average reward score: 5.765625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.88%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4107|ppo_ep: 1|act_loss: 0.05413818359375|cri_loss: 0.0306396484375|unsuper_loss: 0.0 +average reward score: 4.96484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.07%) |Training time=0.46s (21.29%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4108|ppo_ep: 1|act_loss: 0.0389404296875|cri_loss: 0.0228271484375|unsuper_loss: 0.0 +average reward score: 4.6796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.82%) |Training time=0.44s (20.49%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +[2023-04-14 11:19:05,339] [INFO] [logging.py:96:log_dist] [Rank 0] step=4110, skipped=56, lr=[5.058968414699655e-06, 5.058968414699655e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:19:05,358] [INFO] [timer.py:199:stop] epoch=0/micro_step=4110/global_step=4110, RunningAvgSamplesPerSec=105.3072645068091, CurrSamplesPerSec=110.64967844108436, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:19:05,450] [INFO] [logging.py:96:log_dist] [Rank 0] step=4110, skipped=64, lr=[2.628917494650923e-06, 2.628917494650923e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4109|ppo_ep: 1|act_loss: -0.0223541259765625|cri_loss: -0.01024627685546875|unsuper_loss: 0.0 +average reward score: 5.34765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.41%) |Training time=0.45s (20.94%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4110|ppo_ep: 1|act_loss: -0.004241943359375|cri_loss: -0.0017843246459960938|unsuper_loss: 0.0 +average reward score: 5.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.60%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4111|ppo_ep: 1|act_loss: -0.0271453857421875|cri_loss: -0.0132598876953125|unsuper_loss: 0.0 +average reward score: 6.109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.46%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4112|ppo_ep: 1|act_loss: -0.0033969879150390625|cri_loss: -0.001392364501953125|unsuper_loss: 0.0 +average reward score: 4.546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.47s (21.89%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4113|ppo_ep: 1|act_loss: -0.0269622802734375|cri_loss: -0.01311492919921875|unsuper_loss: 0.0 +average reward score: 5.859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.65%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4114|ppo_ep: 1|act_loss: 0.013763427734375|cri_loss: 0.007221221923828125|unsuper_loss: 0.0 +average reward score: 4.4296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.76%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4115|ppo_ep: 1|act_loss: 0.00577545166015625|cri_loss: 0.0037689208984375|unsuper_loss: 0.0 +average reward score: 4.8515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.72%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4116|ppo_ep: 1|act_loss: 0.0099029541015625|cri_loss: 0.0052490234375|unsuper_loss: 0.0 +average reward score: 4.92578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.77%) |Training time=0.46s (21.30%) |Others=0.11 (4.92%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4117|ppo_ep: 1|act_loss: 0.0099945068359375|cri_loss: 0.005489349365234375|unsuper_loss: 0.0 +average reward score: 5.71484375 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.40%) |Training time=0.48s (20.70%) |Others=0.11 (4.90%)|CurSamplesPerSec=13.66 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4118|ppo_ep: 1|act_loss: 0.04541015625|cri_loss: 0.023223876953125|unsuper_loss: 0.0 +average reward score: 5.296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.95%) |Training time=0.46s (21.08%) |Others=0.11 (4.98%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +[2023-04-14 11:19:27,136] [INFO] [logging.py:96:log_dist] [Rank 0] step=4120, skipped=56, lr=[5.040412372550783e-06, 5.040412372550783e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:19:27,154] [INFO] [timer.py:199:stop] epoch=0/micro_step=4120/global_step=4120, RunningAvgSamplesPerSec=105.30798613236172, CurrSamplesPerSec=111.58476661744949, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:19:27,247] [INFO] [logging.py:96:log_dist] [Rank 0] step=4120, skipped=64, lr=[2.6193043911436534e-06, 2.6193043911436534e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4119|ppo_ep: 1|act_loss: -0.01499176025390625|cri_loss: -0.007236480712890625|unsuper_loss: 0.0 +average reward score: 4.42578125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.81%) |Training time=0.45s (20.59%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4120|ppo_ep: 1|act_loss: -0.0572509765625|cri_loss: -0.027008056640625|unsuper_loss: 0.0 +average reward score: 4.7578125 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.84%) |Training time=0.47s (20.74%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.06 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4121|ppo_ep: 1|act_loss: -0.0028820037841796875|cri_loss: -0.0010728836059570312|unsuper_loss: 0.0 +average reward score: 5.9375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.32%) |Training time=0.45s (20.98%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4122|ppo_ep: 1|act_loss: 0.009918212890625|cri_loss: 0.005207061767578125|unsuper_loss: 0.0 +average reward score: 5.60546875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.30%) |Training time=0.45s (21.02%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4123|ppo_ep: 1|act_loss: 0.0007719993591308594|cri_loss: 0.0010890960693359375|unsuper_loss: 0.0 +average reward score: 5.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.59s (67.95%) |Training time=0.45s (19.46%) |Others=0.29 (12.59%)|CurSamplesPerSec=13.70 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4124|ppo_ep: 1|act_loss: -0.02691650390625|cri_loss: -0.013092041015625|unsuper_loss: 0.0 +average reward score: 5.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.29%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4125|ppo_ep: 1|act_loss: -0.0169830322265625|cri_loss: -0.00821685791015625|unsuper_loss: 0.0 +average reward score: 4.98828125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.77%) |Training time=0.46s (21.57%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4126|ppo_ep: 1|act_loss: 0.0174407958984375|cri_loss: 0.0089874267578125|unsuper_loss: 0.0 +average reward score: 6.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.02%) |Training time=0.46s (21.29%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4127|ppo_ep: 1|act_loss: 0.0179443359375|cri_loss: 0.00942230224609375|unsuper_loss: 0.0 +average reward score: 5.984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.67%) |Training time=0.47s (21.70%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4128|ppo_ep: 1|act_loss: 0.0059051513671875|cri_loss: 0.0035343170166015625|unsuper_loss: 0.0 +average reward score: 4.87109375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.78%) |Training time=0.46s (21.55%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.46 +[2023-04-14 11:19:48,896] [INFO] [logging.py:96:log_dist] [Rank 0] step=4130, skipped=56, lr=[5.021853137470893e-06, 5.021853137470893e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:19:48,914] [INFO] [timer.py:199:stop] epoch=0/micro_step=4130/global_step=4130, RunningAvgSamplesPerSec=105.31352380297785, CurrSamplesPerSec=104.6573610334602, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:19:49,007] [INFO] [logging.py:96:log_dist] [Rank 0] step=4130, skipped=64, lr=[2.6096895192575005e-06, 2.6096895192575005e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4129|ppo_ep: 1|act_loss: 0.0205078125|cri_loss: 0.01119232177734375|unsuper_loss: 0.0 +average reward score: 5.18359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.53%) |Training time=0.47s (21.82%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46 +[2023-04-14 11:19:51,138] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 4130|ppo_ep: 1|act_loss: -0.0105438232421875|cri_loss: -0.00447845458984375|unsuper_loss: 0.0 +average reward score: 5.0234375 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.18%) |Training time=0.46s (21.63%) |Others=0.09 (4.19%)|CurSamplesPerSec=15.03 |AvgSamplesPerSec=14.46 +[2023-04-14 11:19:53,458] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 4131|ppo_ep: 1|act_loss: 0.0074615478515625|cri_loss: 0.0038928985595703125|unsuper_loss: 0.0 +average reward score: 5.30859375 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.67s (71.92%) |Training time=0.56s (24.20%) |Others=0.09 (3.87%)|CurSamplesPerSec=13.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4132|ppo_ep: 1|act_loss: -0.0083770751953125|cri_loss: -0.0036468505859375|unsuper_loss: 0.0 +average reward score: 4.953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.01%) |Training time=0.46s (21.31%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4133|ppo_ep: 1|act_loss: -0.0003032684326171875|cri_loss: 0.001373291015625|unsuper_loss: 0.0 +average reward score: 4.80859375 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.54%) |Training time=0.44s (20.76%) |Others=0.10 (4.70%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4134|ppo_ep: 1|act_loss: -0.030120849609375|cri_loss: -0.013824462890625|unsuper_loss: 0.0 +average reward score: 5.36328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.46s (21.50%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4135|ppo_ep: 1|act_loss: 0.003726959228515625|cri_loss: 0.002178192138671875|unsuper_loss: 0.0 +average reward score: 5.06640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.17%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4136|ppo_ep: 1|act_loss: -0.03802490234375|cri_loss: -0.0180511474609375|unsuper_loss: 0.0 +average reward score: 4.85546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.01%) |Training time=0.46s (21.32%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4137|ppo_ep: 1|act_loss: 0.008544921875|cri_loss: 0.00445556640625|unsuper_loss: 0.0 +average reward score: 5.66796875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.35%) |Training time=0.45s (20.98%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4138|ppo_ep: 1|act_loss: 0.0252685546875|cri_loss: 0.01383209228515625|unsuper_loss: 0.0 +average reward score: 5.7421875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.82%) |Training time=0.46s (21.51%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46 +[2023-04-14 11:20:10,518] [INFO] [logging.py:96:log_dist] [Rank 0] step=4140, skipped=56, lr=[5.003290984552626e-06, 5.003290984552626e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:20:10,536] [INFO] [timer.py:199:stop] epoch=0/micro_step=4140/global_step=4140, RunningAvgSamplesPerSec=105.31356977645355, CurrSamplesPerSec=108.1466958779957, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:20:10,629] [INFO] [logging.py:96:log_dist] [Rank 0] step=4140, skipped=66, lr=[2.6019964442854366e-06, 2.6019964442854366e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4139|ppo_ep: 1|act_loss: 0.03631591796875|cri_loss: 0.0190582275390625|unsuper_loss: 0.0 +average reward score: 6.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.93%) |Training time=0.46s (21.40%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4140|ppo_ep: 1|act_loss: 0.0304107666015625|cri_loss: 0.0156097412109375|unsuper_loss: 0.0 +average reward score: 5.5703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.68%) |Training time=0.47s (21.65%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4141|ppo_ep: 1|act_loss: 0.02288818359375|cri_loss: 0.01279449462890625|unsuper_loss: 0.0 +average reward score: 4.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.59%) |Training time=0.47s (21.76%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4142|ppo_ep: 1|act_loss: -0.0200042724609375|cri_loss: -0.00955963134765625|unsuper_loss: 0.0 +average reward score: 5.38671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.58%) |Training time=0.47s (21.75%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4143|ppo_ep: 1|act_loss: -0.025634765625|cri_loss: -0.0119476318359375|unsuper_loss: 0.0 +average reward score: 5.26171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.52%) |Training time=0.47s (21.82%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4144|ppo_ep: 1|act_loss: -0.01433563232421875|cri_loss: -0.006839752197265625|unsuper_loss: 0.0 +average reward score: 5.3203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.59%) |Training time=0.47s (21.81%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4145|ppo_ep: 1|act_loss: -0.00942230224609375|cri_loss: -0.0035762786865234375|unsuper_loss: 0.0 +average reward score: 5.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.87%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4146|ppo_ep: 1|act_loss: 0.019073486328125|cri_loss: 0.01001739501953125|unsuper_loss: 0.0 +average reward score: 4.6171875 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.61s (69.23%) |Training time=0.61s (26.14%) |Others=0.11 (4.63%)|CurSamplesPerSec=13.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4147|ppo_ep: 1|act_loss: 0.0037994384765625|cri_loss: 0.002201080322265625|unsuper_loss: 0.0 +average reward score: 5.17578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.52%) |Training time=0.47s (21.82%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4148|ppo_ep: 1|act_loss: -0.037200927734375|cri_loss: -0.0171966552734375|unsuper_loss: 0.0 +average reward score: 5.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.48s (21.98%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +[2023-04-14 11:20:32,261] [INFO] [logging.py:96:log_dist] [Rank 0] step=4150, skipped=56, lr=[4.984726188931862e-06, 4.984726188931862e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:20:32,296] [INFO] [timer.py:199:stop] epoch=0/micro_step=4150/global_step=4150, RunningAvgSamplesPerSec=105.30342576879355, CurrSamplesPerSec=100.15022639020299, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:20:32,388] [INFO] [logging.py:96:log_dist] [Rank 0] step=4150, skipped=66, lr=[2.592378748472863e-06, 2.592378748472863e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4149|ppo_ep: 1|act_loss: -0.03155517578125|cri_loss: -0.0149993896484375|unsuper_loss: 0.0 +average reward score: 5.51171875 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.65%) |Training time=0.48s (21.83%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4150|ppo_ep: 1|act_loss: 0.03875732421875|cri_loss: 0.0203857421875|unsuper_loss: 0.0 +average reward score: 5.96484375 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.10%) |Training time=0.46s (20.47%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.09 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4151|ppo_ep: 1|act_loss: 0.056396484375|cri_loss: 0.029937744140625|unsuper_loss: 0.0 +average reward score: 6.0234375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.16%) |Training time=0.45s (21.19%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4152|ppo_ep: 1|act_loss: 0.032470703125|cri_loss: 0.0168304443359375|unsuper_loss: 0.0 +average reward score: 6.14453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.99%) |Training time=0.46s (21.34%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4153|ppo_ep: 1|act_loss: 0.056732177734375|cri_loss: 0.0311279296875|unsuper_loss: 0.0 +average reward score: 4.90234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.96%) |Training time=0.46s (21.40%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4154|ppo_ep: 1|act_loss: -0.00653839111328125|cri_loss: -0.001399993896484375|unsuper_loss: 0.0 +average reward score: 5.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.98%) |Training time=0.46s (21.39%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4155|ppo_ep: 1|act_loss: 0.0716552734375|cri_loss: 0.03912353515625|unsuper_loss: 0.0 +average reward score: 5.58984375 +------------------------------------------------------------------------------------- +|E2E latency=2.38s |Gather latency=0.00s (0.00%) |Generate time=1.61s (67.83%) |Training time=0.45s (19.08%) |Others=0.31 (13.09%)|CurSamplesPerSec=13.47 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4156|ppo_ep: 1|act_loss: -0.0033702850341796875|cri_loss: -0.000652313232421875|unsuper_loss: 0.0 +average reward score: 6.1640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.96%) |Training time=0.46s (21.38%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4157|ppo_ep: 1|act_loss: -0.047393798828125|cri_loss: -0.022369384765625|unsuper_loss: 0.0 +average reward score: 4.6484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.34%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4158|ppo_ep: 1|act_loss: -0.033660888671875|cri_loss: -0.0152130126953125|unsuper_loss: 0.0 +average reward score: 5.66796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.45s (21.10%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46 +[2023-04-14 11:20:54,125] [INFO] [logging.py:96:log_dist] [Rank 0] step=4160, skipped=56, lr=[4.966159025783661e-06, 4.966159025783661e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:20:54,143] [INFO] [timer.py:199:stop] epoch=0/micro_step=4160/global_step=4160, RunningAvgSamplesPerSec=105.31034590166539, CurrSamplesPerSec=109.2526283915123, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:20:54,235] [INFO] [logging.py:96:log_dist] [Rank 0] step=4160, skipped=66, lr=[2.5827596833843844e-06, 2.5827596833843844e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4159|ppo_ep: 1|act_loss: -0.035186767578125|cri_loss: -0.016571044921875|unsuper_loss: 0.0 +average reward score: 5.31640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.46s (21.16%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4160|ppo_ep: 1|act_loss: 0.023956298828125|cri_loss: 0.0150146484375|unsuper_loss: 0.0 +average reward score: 4.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.21%) |Training time=0.45s (21.13%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4161|ppo_ep: 1|act_loss: 0.032257080078125|cri_loss: 0.0177764892578125|unsuper_loss: 0.0 +average reward score: 5.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.55%) |Training time=0.47s (20.15%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4162|ppo_ep: 1|act_loss: 0.0159454345703125|cri_loss: 0.00836181640625|unsuper_loss: 0.0 +average reward score: 5.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.52%) |Training time=0.45s (20.79%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4163|ppo_ep: 1|act_loss: 0.042205810546875|cri_loss: 0.0225067138671875|unsuper_loss: 0.0 +average reward score: 6.37109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.00%) |Training time=0.46s (21.35%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4164|ppo_ep: 1|act_loss: 0.018768310546875|cri_loss: 0.0106201171875|unsuper_loss: 0.0 +average reward score: 5.2109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.17%) |Training time=0.45s (21.15%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4165|ppo_ep: 1|act_loss: 0.022857666015625|cri_loss: 0.01219940185546875|unsuper_loss: 0.0 +average reward score: 4.62109375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.74%) |Training time=0.47s (21.61%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4166|ppo_ep: 1|act_loss: -0.009429931640625|cri_loss: -0.003875732421875|unsuper_loss: 0.0 +average reward score: 5.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.43%) |Training time=0.45s (20.90%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4167|ppo_ep: 1|act_loss: 0.03271484375|cri_loss: 0.018402099609375|unsuper_loss: 0.0 +average reward score: 5.53515625 +------------------------------------------------------------------------------------- +|E2E latency=2.67s |Gather latency=0.00s (0.00%) |Generate time=1.59s (59.66%) |Training time=0.44s (16.65%) |Others=0.63 (23.69%)|CurSamplesPerSec=11.98 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4168|ppo_ep: 1|act_loss: -0.02325439453125|cri_loss: -0.01129150390625|unsuper_loss: 0.0 +average reward score: 4.9296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.65%) |Training time=0.44s (20.65%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +[2023-04-14 11:21:16,351] [INFO] [logging.py:96:log_dist] [Rank 0] step=4170, skipped=56, lr=[4.94758977031817e-06, 4.94758977031817e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:21:16,369] [INFO] [timer.py:199:stop] epoch=0/micro_step=4170/global_step=4170, RunningAvgSamplesPerSec=105.3199038682373, CurrSamplesPerSec=108.86511188022202, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:21:16,461] [INFO] [logging.py:96:log_dist] [Rank 0] step=4170, skipped=66, lr=[2.5731393915977522e-06, 2.5731393915977522e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4169|ppo_ep: 1|act_loss: -0.0391845703125|cri_loss: -0.0189056396484375|unsuper_loss: 0.0 +average reward score: 5.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.16%) |Training time=0.46s (21.22%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4170|ppo_ep: 1|act_loss: -0.0283660888671875|cri_loss: -0.0117950439453125|unsuper_loss: 0.0 +average reward score: 6.078125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.33%) |Training time=0.45s (20.99%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4171|ppo_ep: 1|act_loss: 0.000972747802734375|cri_loss: 0.0014219284057617188|unsuper_loss: 0.0 +average reward score: 4.6171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.62%) |Training time=0.44s (20.69%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4172|ppo_ep: 1|act_loss: -0.00641632080078125|cri_loss: -0.00286102294921875|unsuper_loss: 0.0 +average reward score: 4.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.36%) |Training time=0.45s (20.99%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4173|ppo_ep: 1|act_loss: 0.0224609375|cri_loss: 0.01314544677734375|unsuper_loss: 0.0 +average reward score: 5.78515625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.46s (21.40%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4174|ppo_ep: 1|act_loss: -0.035736083984375|cri_loss: -0.0157012939453125|unsuper_loss: 0.0 +average reward score: 5.8359375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.41%) |Training time=0.45s (20.92%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4175|ppo_ep: 1|act_loss: 0.0092315673828125|cri_loss: 0.005367279052734375|unsuper_loss: 0.0 +average reward score: 5.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.33%) |Training time=0.45s (21.03%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4176|ppo_ep: 1|act_loss: 0.028106689453125|cri_loss: 0.0146331787109375|unsuper_loss: 0.0 +average reward score: 4.40234375 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.41%) |Training time=0.48s (20.31%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.66 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4177|ppo_ep: 1|act_loss: 0.022796630859375|cri_loss: 0.0117950439453125|unsuper_loss: 0.0 +average reward score: 6.27734375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.42%) |Training time=0.45s (20.89%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4178|ppo_ep: 1|act_loss: 0.0141754150390625|cri_loss: 0.007419586181640625|unsuper_loss: 0.0 +average reward score: 6.25390625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.00%) |Training time=0.47s (21.39%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46 +[2023-04-14 11:21:38,140] [INFO] [logging.py:96:log_dist] [Rank 0] step=4180, skipped=56, lr=[4.92901869777655e-06, 4.92901869777655e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:21:39,028] [INFO] [timer.py:199:stop] epoch=0/micro_step=4180/global_step=4180, RunningAvgSamplesPerSec=105.2561419271204, CurrSamplesPerSec=27.181817887243163, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:21:39,122] [INFO] [logging.py:96:log_dist] [Rank 0] step=4180, skipped=66, lr=[2.563518015708896e-06, 2.563518015708896e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4179|ppo_ep: 1|act_loss: 0.0024871826171875|cri_loss: 0.0018558502197265625|unsuper_loss: 0.0 +average reward score: 5.4375 +------------------------------------------------------------------------------------- +|E2E latency=3.13s |Gather latency=0.00s (0.00%) |Generate time=1.69s (54.01%) |Training time=1.34s (42.79%) |Others=0.10 (3.20%)|CurSamplesPerSec=10.21 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4180|ppo_ep: 1|act_loss: 0.01177215576171875|cri_loss: 0.00606536865234375|unsuper_loss: 0.0 +average reward score: 5.9375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.97%) |Training time=0.44s (20.36%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4181|ppo_ep: 1|act_loss: -0.051971435546875|cri_loss: -0.025054931640625|unsuper_loss: 0.0 +average reward score: 4.88671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.38%) |Training time=0.43s (19.87%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4182|ppo_ep: 1|act_loss: -0.03253173828125|cri_loss: -0.015716552734375|unsuper_loss: 0.0 +average reward score: 5.7109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.21%) |Training time=0.44s (20.16%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4183|ppo_ep: 1|act_loss: 0.00038909912109375|cri_loss: 0.0009660720825195312|unsuper_loss: 0.0 +average reward score: 6.40625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.74%) |Training time=0.44s (20.58%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4184|ppo_ep: 1|act_loss: -0.037841796875|cri_loss: -0.0181884765625|unsuper_loss: 0.0 +average reward score: 4.83984375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.50%) |Training time=0.45s (20.82%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4185|ppo_ep: 1|act_loss: -0.006397247314453125|cri_loss: -0.002197265625|unsuper_loss: 0.0 +average reward score: 5.51171875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.35%) |Training time=0.45s (20.99%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4186|ppo_ep: 1|act_loss: -0.02880859375|cri_loss: -0.01381683349609375|unsuper_loss: 0.0 +average reward score: 4.609375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.18%) |Training time=0.45s (21.15%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4187|ppo_ep: 1|act_loss: -0.026763916015625|cri_loss: -0.0127716064453125|unsuper_loss: 0.0 +average reward score: 5.296875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.53%) |Training time=0.45s (20.78%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4188|ppo_ep: 1|act_loss: -0.01885986328125|cri_loss: -0.00865936279296875|unsuper_loss: 0.0 +average reward score: 4.49609375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.09%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46 +[2023-04-14 11:22:00,467] [INFO] [logging.py:96:log_dist] [Rank 0] step=4190, skipped=56, lr=[4.910446083426898e-06, 4.910446083426898e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:22:00,728] [INFO] [timer.py:199:stop] epoch=0/micro_step=4190/global_step=4190, RunningAvgSamplesPerSec=105.25487124469956, CurrSamplesPerSec=59.8330459467797, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:22:00,820] [INFO] [logging.py:96:log_dist] [Rank 0] step=4190, skipped=66, lr=[2.5538956983298186e-06, 2.5538956983298186e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4189|ppo_ep: 1|act_loss: -0.0031604766845703125|cri_loss: -0.000560760498046875|unsuper_loss: 0.0 +average reward score: 5.26171875 +------------------------------------------------------------------------------------- +|E2E latency=2.39s |Gather latency=0.00s (0.00%) |Generate time=1.59s (66.67%) |Training time=0.70s (29.16%) |Others=0.10 (4.17%)|CurSamplesPerSec=13.38 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4190|ppo_ep: 1|act_loss: 0.0625|cri_loss: 0.032318115234375|unsuper_loss: 0.0 +average reward score: 5.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.55%) |Training time=0.45s (20.77%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4191|ppo_ep: 1|act_loss: 0.029144287109375|cri_loss: 0.014923095703125|unsuper_loss: 0.0 +average reward score: 4.51171875 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.74%) |Training time=0.47s (19.96%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4192|ppo_ep: 1|act_loss: 0.008636474609375|cri_loss: 0.0048980712890625|unsuper_loss: 0.0 +average reward score: 5.68359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.19%) |Training time=0.45s (21.16%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4193|ppo_ep: 1|act_loss: -0.030487060546875|cri_loss: -0.01329803466796875|unsuper_loss: 0.0 +average reward score: 6.06640625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.44%) |Training time=0.45s (20.90%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4194|ppo_ep: 1|act_loss: 0.003936767578125|cri_loss: 0.0020847320556640625|unsuper_loss: 0.0 +average reward score: 5.24609375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.68%) |Training time=0.44s (20.63%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4195|ppo_ep: 1|act_loss: -0.036102294921875|cri_loss: -0.01715087890625|unsuper_loss: 0.0 +average reward score: 6.40625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.57%) |Training time=0.45s (20.77%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4196|ppo_ep: 1|act_loss: 6.031990051269531e-05|cri_loss: 0.00016546249389648438|unsuper_loss: 0.0 +average reward score: 5.21484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.30%) |Training time=0.45s (21.03%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4197|ppo_ep: 1|act_loss: 0.0220947265625|cri_loss: 0.01141357421875|unsuper_loss: 0.0 +average reward score: 5.03125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.87%) |Training time=0.44s (20.42%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4198|ppo_ep: 1|act_loss: 0.03314208984375|cri_loss: 0.017913818359375|unsuper_loss: 0.0 +average reward score: 5.17578125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.64%) |Training time=0.44s (20.68%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46 +[2023-04-14 11:22:22,354] [INFO] [logging.py:96:log_dist] [Rank 0] step=4200, skipped=56, lr=[4.8918722025601635e-06, 4.8918722025601635e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:22:22,373] [INFO] [timer.py:199:stop] epoch=0/micro_step=4200/global_step=4200, RunningAvgSamplesPerSec=105.26922859440923, CurrSamplesPerSec=108.52489355130318, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:22:22,465] [INFO] [logging.py:96:log_dist] [Rank 0] step=4200, skipped=66, lr=[2.5442725820864755e-06, 2.5442725820864755e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4199|ppo_ep: 1|act_loss: 0.003414154052734375|cri_loss: 0.00225067138671875|unsuper_loss: 0.0 +average reward score: 5.47265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.07%) |Training time=0.46s (21.29%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4200|ppo_ep: 1|act_loss: -0.00171661376953125|cri_loss: -0.0006322860717773438|unsuper_loss: 0.0 +average reward score: 5.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.28%) |Training time=0.45s (21.04%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4201|ppo_ep: 1|act_loss: -0.006732940673828125|cri_loss: -0.0029163360595703125|unsuper_loss: 0.0 +average reward score: 5.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.19%) |Training time=0.45s (21.16%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4202|ppo_ep: 1|act_loss: 0.0506591796875|cri_loss: 0.0276336669921875|unsuper_loss: 0.0 +average reward score: 6.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.07%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4203|ppo_ep: 1|act_loss: 0.038818359375|cri_loss: 0.0203399658203125|unsuper_loss: 0.0 +average reward score: 5.9375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.00%) |Training time=0.45s (20.84%) |Others=0.11 (5.16%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4204|ppo_ep: 1|act_loss: -0.0139923095703125|cri_loss: -0.00634002685546875|unsuper_loss: 0.0 +average reward score: 4.6484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.08%) |Training time=0.46s (21.26%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4205|ppo_ep: 1|act_loss: 0.00275421142578125|cri_loss: 0.0015354156494140625|unsuper_loss: 0.0 +average reward score: 5.109375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.80%) |Training time=0.46s (21.54%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4206|ppo_ep: 1|act_loss: -0.010833740234375|cri_loss: -0.0052947998046875|unsuper_loss: 0.0 +average reward score: 5.41796875 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.21%) |Training time=0.48s (20.51%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4207|ppo_ep: 1|act_loss: 0.00415802001953125|cri_loss: 0.00371551513671875|unsuper_loss: 0.0 +average reward score: 4.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.31%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4208|ppo_ep: 1|act_loss: 0.00266265869140625|cri_loss: 0.0019474029541015625|unsuper_loss: 0.0 +average reward score: 4.9140625 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.45%) |Training time=0.48s (21.17%) |Others=0.10 (4.38%)|CurSamplesPerSec=14.02 |AvgSamplesPerSec=14.46 +[2023-04-14 11:22:44,152] [INFO] [logging.py:96:log_dist] [Rank 0] step=4210, skipped=56, lr=[4.8732973304860655e-06, 4.8732973304860655e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:22:44,170] [INFO] [timer.py:199:stop] epoch=0/micro_step=4210/global_step=4210, RunningAvgSamplesPerSec=105.27366239794104, CurrSamplesPerSec=105.68341682427847, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:22:44,263] [INFO] [logging.py:96:log_dist] [Rank 0] step=4210, skipped=66, lr=[2.5346488096166647e-06, 2.5346488096166647e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4209|ppo_ep: 1|act_loss: 0.004459381103515625|cri_loss: 0.0029392242431640625|unsuper_loss: 0.0 +average reward score: 5.3046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.70%) |Training time=0.47s (21.66%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4210|ppo_ep: 1|act_loss: -0.007663726806640625|cri_loss: -0.0016937255859375|unsuper_loss: 0.0 +average reward score: 5.87890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.40%) |Training time=0.43s (19.88%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4211|ppo_ep: 1|act_loss: 0.0038623809814453125|cri_loss: 0.002246856689453125|unsuper_loss: 0.0 +average reward score: 4.28515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.37%) |Training time=0.48s (22.00%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4212|ppo_ep: 1|act_loss: -0.0183563232421875|cri_loss: -0.00844573974609375|unsuper_loss: 0.0 +average reward score: 4.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.41%) |Training time=0.47s (21.94%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4213|ppo_ep: 1|act_loss: -0.01000213623046875|cri_loss: -0.004795074462890625|unsuper_loss: 0.0 +average reward score: 4.96875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.37%) |Training time=0.47s (21.99%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4214|ppo_ep: 1|act_loss: 0.051055908203125|cri_loss: 0.0279693603515625|unsuper_loss: 0.0 +average reward score: 6.16796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.85%) |Training time=0.46s (21.52%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4215|ppo_ep: 1|act_loss: -0.01861572265625|cri_loss: -0.00881195068359375|unsuper_loss: 0.0 +average reward score: 5.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.08%) |Training time=0.46s (21.27%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4216|ppo_ep: 1|act_loss: 0.003650665283203125|cri_loss: 0.0027332305908203125|unsuper_loss: 0.0 +average reward score: 5.94140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.32%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4217|ppo_ep: 1|act_loss: -0.02685546875|cri_loss: -0.0128631591796875|unsuper_loss: 0.0 +average reward score: 4.828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.18%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4218|ppo_ep: 1|act_loss: -0.01708984375|cri_loss: -0.007843017578125|unsuper_loss: 0.0 +average reward score: 5.98828125 +------------------------------------------------------------------------------------- +|E2E latency=3.50s |Gather latency=0.00s (0.00%) |Generate time=1.60s (45.66%) |Training time=0.45s (12.92%) |Others=1.45 (41.42%)|CurSamplesPerSec=9.14 |AvgSamplesPerSec=14.46 +[2023-04-14 11:23:07,071] [INFO] [logging.py:96:log_dist] [Rank 0] step=4220, skipped=56, lr=[4.85472174252902e-06, 4.85472174252902e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:23:07,089] [INFO] [timer.py:199:stop] epoch=0/micro_step=4220/global_step=4220, RunningAvgSamplesPerSec=105.27928430346253, CurrSamplesPerSec=109.89310827053387, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:23:07,182] [INFO] [logging.py:96:log_dist] [Rank 0] step=4220, skipped=66, lr=[2.5250245235679094e-06, 2.5250245235679094e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4219|ppo_ep: 1|act_loss: 0.005771636962890625|cri_loss: 0.0032749176025390625|unsuper_loss: 0.0 +average reward score: 5.828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.26%) |Training time=0.45s (21.08%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4220|ppo_ep: 1|act_loss: -0.00738525390625|cri_loss: -0.003170013427734375|unsuper_loss: 0.0 +average reward score: 5.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.93%) |Training time=0.46s (21.40%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4221|ppo_ep: 1|act_loss: -0.004085540771484375|cri_loss: -0.0013275146484375|unsuper_loss: 0.0 +average reward score: 5.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.23%) |Training time=0.48s (20.52%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.64 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4222|ppo_ep: 1|act_loss: -0.0155792236328125|cri_loss: -0.00717926025390625|unsuper_loss: 0.0 +average reward score: 4.3359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.80%) |Training time=0.46s (21.56%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4223|ppo_ep: 1|act_loss: -0.0067138671875|cri_loss: -0.003093719482421875|unsuper_loss: 0.0 +average reward score: 5.1875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.45s (21.13%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4224|ppo_ep: 1|act_loss: -0.0264434814453125|cri_loss: -0.012542724609375|unsuper_loss: 0.0 +average reward score: 5.44140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.68%) |Training time=0.44s (20.64%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4225|ppo_ep: 1|act_loss: -0.0164642333984375|cri_loss: -0.005649566650390625|unsuper_loss: 0.0 +average reward score: 5.203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.92%) |Training time=0.44s (20.37%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4226|ppo_ep: 1|act_loss: -0.0190277099609375|cri_loss: -0.0092315673828125|unsuper_loss: 0.0 +average reward score: 6.03125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.59%) |Training time=0.44s (20.70%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4227|ppo_ep: 1|act_loss: 0.0005841255187988281|cri_loss: 0.0010204315185546875|unsuper_loss: 0.0 +average reward score: 5.6015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.32%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4228|ppo_ep: 1|act_loss: 0.0234222412109375|cri_loss: 0.01303863525390625|unsuper_loss: 0.0 +average reward score: 5.36328125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.46s (21.20%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46 +[2023-04-14 11:23:28,778] [INFO] [logging.py:96:log_dist] [Rank 0] step=4230, skipped=56, lr=[4.836145714024048e-06, 4.836145714024048e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:23:28,796] [INFO] [timer.py:199:stop] epoch=0/micro_step=4230/global_step=4230, RunningAvgSamplesPerSec=105.28801567814266, CurrSamplesPerSec=109.88582062485825, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:23:28,889] [INFO] [logging.py:96:log_dist] [Rank 0] step=4230, skipped=66, lr=[2.515399866595347e-06, 2.515399866595347e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4229|ppo_ep: 1|act_loss: 0.04559326171875|cri_loss: 0.0240631103515625|unsuper_loss: 0.0 +average reward score: 6.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.19%) |Training time=0.45s (21.15%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4230|ppo_ep: 1|act_loss: 0.021453857421875|cri_loss: 0.011810302734375|unsuper_loss: 0.0 +average reward score: 5.09765625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.94%) |Training time=0.46s (21.42%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4231|ppo_ep: 1|act_loss: 0.0029163360595703125|cri_loss: 0.001949310302734375|unsuper_loss: 0.0 +average reward score: 4.72265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.57%) |Training time=0.44s (20.74%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46 +[2023-04-14 11:23:35,324] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 4232|ppo_ep: 1|act_loss: 0.0218048095703125|cri_loss: 0.011627197265625|unsuper_loss: 0.0 +average reward score: 5.7265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.62%) |Training time=0.45s (21.19%) |Others=0.09 (4.18%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.46 +[2023-04-14 11:23:37,455] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 4233|ppo_ep: 1|act_loss: -0.028045654296875|cri_loss: -0.0132598876953125|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.71%) |Training time=0.45s (21.10%) |Others=0.09 (4.19%)|CurSamplesPerSec=15.02 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4234|ppo_ep: 1|act_loss: 0.005527496337890625|cri_loss: 0.002880096435546875|unsuper_loss: 0.0 +average reward score: 5.890625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.29%) |Training time=0.45s (21.04%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4235|ppo_ep: 1|act_loss: 0.05145263671875|cri_loss: 0.0276641845703125|unsuper_loss: 0.0 +average reward score: 6.6015625 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.76s (76.17%) |Training time=0.45s (19.49%) |Others=0.10 (4.34%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4236|ppo_ep: 1|act_loss: -0.0341796875|cri_loss: -0.015838623046875|unsuper_loss: 0.0 +average reward score: 4.73828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.71%) |Training time=0.44s (20.62%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4237|ppo_ep: 1|act_loss: -0.017791748046875|cri_loss: -0.0085601806640625|unsuper_loss: 0.0 +average reward score: 4.92578125 +------------------------------------------------------------------------------------- +|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.74s (75.86%) |Training time=0.45s (19.78%) |Others=0.10 (4.36%)|CurSamplesPerSec=13.98 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4238|ppo_ep: 1|act_loss: -0.032806396484375|cri_loss: -0.01560211181640625|unsuper_loss: 0.0 +average reward score: 5.3359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.90%) |Training time=0.44s (20.60%) |Others=0.12 (5.50%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +[2023-04-14 11:23:50,554] [INFO] [logging.py:96:log_dist] [Rank 0] step=4240, skipped=56, lr=[4.817569520312709e-06, 4.817569520312709e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:23:50,572] [INFO] [timer.py:199:stop] epoch=0/micro_step=4240/global_step=4240, RunningAvgSamplesPerSec=105.30150071014239, CurrSamplesPerSec=111.66534771424102, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:23:50,665] [INFO] [logging.py:96:log_dist] [Rank 0] step=4240, skipped=68, lr=[2.5076999698199706e-06, 2.5076999698199706e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4239|ppo_ep: 1|act_loss: -0.001766204833984375|cri_loss: 0.0019130706787109375|unsuper_loss: 0.0 +average reward score: 6.078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.41%) |Training time=0.45s (20.94%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4240|ppo_ep: 1|act_loss: 0.0226898193359375|cri_loss: 0.0124053955078125|unsuper_loss: 0.0 +average reward score: 4.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.84%) |Training time=0.44s (20.49%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4241|ppo_ep: 1|act_loss: 0.002574920654296875|cri_loss: 0.0016956329345703125|unsuper_loss: 0.0 +average reward score: 6.20703125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.00%) |Training time=0.49s (22.40%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4242|ppo_ep: 1|act_loss: 0.0186767578125|cri_loss: 0.01000213623046875|unsuper_loss: 0.0 +average reward score: 5.8984375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.43%) |Training time=0.50s (22.87%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4243|ppo_ep: 1|act_loss: 0.030120849609375|cri_loss: 0.0162506103515625|unsuper_loss: 0.0 +average reward score: 5.34765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.65%) |Training time=0.47s (21.59%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4244|ppo_ep: 1|act_loss: 0.0592041015625|cri_loss: 0.031158447265625|unsuper_loss: 0.0 +average reward score: 5.484375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.98%) |Training time=0.42s (19.12%) |Others=0.11 (4.90%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4245|ppo_ep: 1|act_loss: 0.0345458984375|cri_loss: 0.0182037353515625|unsuper_loss: 0.0 +average reward score: 4.79296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.99%) |Training time=0.46s (21.33%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4246|ppo_ep: 1|act_loss: 0.00125885009765625|cri_loss: 0.0011882781982421875|unsuper_loss: 0.0 +average reward score: 4.9765625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.47%) |Training time=0.45s (20.88%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4247|ppo_ep: 1|act_loss: -0.0139312744140625|cri_loss: -0.00653076171875|unsuper_loss: 0.0 +average reward score: 6.15234375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.46s (21.17%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4248|ppo_ep: 1|act_loss: -0.0172271728515625|cri_loss: -0.0080413818359375|unsuper_loss: 0.0 +average reward score: 7.1796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.45s (20.74%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +[2023-04-14 11:24:12,290] [INFO] [logging.py:96:log_dist] [Rank 0] step=4250, skipped=56, lr=[4.798993436739004e-06, 4.798993436739004e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:24:12,308] [INFO] [timer.py:199:stop] epoch=0/micro_step=4250/global_step=4250, RunningAvgSamplesPerSec=105.30792594664827, CurrSamplesPerSec=114.12237540451974, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:24:12,401] [INFO] [logging.py:96:log_dist] [Rank 0] step=4250, skipped=68, lr=[2.4980750046916983e-06, 2.4980750046916983e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4249|ppo_ep: 1|act_loss: -0.02313232421875|cri_loss: -0.01113128662109375|unsuper_loss: 0.0 +average reward score: 6.88671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.95%) |Training time=0.44s (20.41%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4250|ppo_ep: 1|act_loss: -0.0238037109375|cri_loss: -0.01103973388671875|unsuper_loss: 0.0 +average reward score: 5.0234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.39%) |Training time=0.45s (20.96%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4251|ppo_ep: 1|act_loss: -0.03948974609375|cri_loss: -0.0182342529296875|unsuper_loss: 0.0 +average reward score: 4.28125 +------------------------------------------------------------------------------------- +|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.28%) |Training time=0.48s (20.47%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.51 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4252|ppo_ep: 1|act_loss: 0.0126495361328125|cri_loss: 0.006908416748046875|unsuper_loss: 0.0 +average reward score: 4.6328125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.88%) |Training time=0.47s (21.50%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4253|ppo_ep: 1|act_loss: 0.024200439453125|cri_loss: 0.0125732421875|unsuper_loss: 0.0 +average reward score: 7.3359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.98%) |Training time=0.46s (21.40%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4254|ppo_ep: 1|act_loss: -0.021636962890625|cri_loss: -0.0095977783203125|unsuper_loss: 0.0 +average reward score: 5.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.67%) |Training time=0.47s (21.71%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4255|ppo_ep: 1|act_loss: -0.01812744140625|cri_loss: -0.008758544921875|unsuper_loss: 0.0 +average reward score: 6.01171875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.62%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4256|ppo_ep: 1|act_loss: -0.004058837890625|cri_loss: -0.0011882781982421875|unsuper_loss: 0.0 +average reward score: 4.5703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.89%) |Training time=0.47s (21.46%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4257|ppo_ep: 1|act_loss: 0.021209716796875|cri_loss: 0.01186370849609375|unsuper_loss: 0.0 +average reward score: 5.91796875 +------------------------------------------------------------------------------------- +|E2E latency=2.86s |Gather latency=0.00s (0.00%) |Generate time=1.61s (56.22%) |Training time=0.47s (16.46%) |Others=0.78 (27.32%)|CurSamplesPerSec=11.18 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4258|ppo_ep: 1|act_loss: 0.06622314453125|cri_loss: 0.0384521484375|unsuper_loss: 0.0 +average reward score: 5.46484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.79%) |Training time=0.44s (20.55%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +[2023-04-14 11:24:34,891] [INFO] [logging.py:96:log_dist] [Rank 0] step=4260, skipped=56, lr=[4.7804177386453025e-06, 4.7804177386453025e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:24:34,909] [INFO] [timer.py:199:stop] epoch=0/micro_step=4260/global_step=4260, RunningAvgSamplesPerSec=105.30860582605034, CurrSamplesPerSec=107.67247823369864, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:24:35,002] [INFO] [logging.py:96:log_dist] [Rank 0] step=4260, skipped=68, lr=[2.488450068096499e-06, 2.488450068096499e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4259|ppo_ep: 1|act_loss: 0.04168701171875|cri_loss: 0.021820068359375|unsuper_loss: 0.0 +average reward score: 4.89453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.92%) |Training time=0.46s (21.16%) |Others=0.11 (4.93%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4260|ppo_ep: 1|act_loss: -0.031890869140625|cri_loss: -0.0142669677734375|unsuper_loss: 0.0 +average reward score: 5.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.96%) |Training time=0.46s (21.40%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4261|ppo_ep: 1|act_loss: 0.018280029296875|cri_loss: 0.00974273681640625|unsuper_loss: 0.0 +average reward score: 6.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.62%) |Training time=0.47s (21.76%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4262|ppo_ep: 1|act_loss: -0.0208587646484375|cri_loss: -0.00948333740234375|unsuper_loss: 0.0 +average reward score: 6.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.35%) |Training time=0.48s (22.02%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4263|ppo_ep: 1|act_loss: -0.01275634765625|cri_loss: -0.0055389404296875|unsuper_loss: 0.0 +average reward score: 5.6796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.70%) |Training time=0.47s (21.67%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4264|ppo_ep: 1|act_loss: -0.0343017578125|cri_loss: -0.01505279541015625|unsuper_loss: 0.0 +average reward score: 5.1875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.60%) |Training time=0.48s (21.81%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4265|ppo_ep: 1|act_loss: -0.0079498291015625|cri_loss: -0.0037689208984375|unsuper_loss: 0.0 +average reward score: 4.6640625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.25%) |Training time=0.48s (22.13%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4266|ppo_ep: 1|act_loss: -0.0089263916015625|cri_loss: -0.0040740966796875|unsuper_loss: 0.0 +average reward score: 5.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.50s |Gather latency=0.00s (0.00%) |Generate time=1.79s (71.52%) |Training time=0.61s (24.32%) |Others=0.10 (4.15%)|CurSamplesPerSec=12.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4267|ppo_ep: 1|act_loss: -0.00508880615234375|cri_loss: -0.002323150634765625|unsuper_loss: 0.0 +average reward score: 5.50390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.44%) |Training time=0.48s (21.92%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4268|ppo_ep: 1|act_loss: 0.021148681640625|cri_loss: 0.01126861572265625|unsuper_loss: 0.0 +average reward score: 5.125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.31%) |Training time=0.48s (22.07%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +[2023-04-14 11:24:56,993] [INFO] [logging.py:96:log_dist] [Rank 0] step=4270, skipped=56, lr=[4.761842701368264e-06, 4.761842701368264e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:24:57,011] [INFO] [timer.py:199:stop] epoch=0/micro_step=4270/global_step=4270, RunningAvgSamplesPerSec=105.29193536234465, CurrSamplesPerSec=100.81037834200097, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:24:57,104] [INFO] [logging.py:96:log_dist] [Rank 0] step=4270, skipped=68, lr=[2.4788253026991545e-06, 2.4788253026991545e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4269|ppo_ep: 1|act_loss: 0.0455322265625|cri_loss: 0.023406982421875|unsuper_loss: 0.0 +average reward score: 6.078125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.40%) |Training time=0.48s (22.01%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4270|ppo_ep: 1|act_loss: 0.0024509429931640625|cri_loss: 0.0016841888427734375|unsuper_loss: 0.0 +average reward score: 5.0859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.35%) |Training time=0.46s (21.00%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4271|ppo_ep: 1|act_loss: 0.010040283203125|cri_loss: 0.006195068359375|unsuper_loss: 0.0 +average reward score: 4.34765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.48s (21.90%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4272|ppo_ep: 1|act_loss: 0.022003173828125|cri_loss: 0.01129150390625|unsuper_loss: 0.0 +average reward score: 5.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (21.93%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4273|ppo_ep: 1|act_loss: 0.018524169921875|cri_loss: 0.0123443603515625|unsuper_loss: 0.0 +average reward score: 6.09375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.55%) |Training time=0.47s (21.78%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4274|ppo_ep: 1|act_loss: -0.00841522216796875|cri_loss: -0.0037631988525390625|unsuper_loss: 0.0 +average reward score: 5.1953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.23%) |Training time=0.48s (22.16%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4275|ppo_ep: 1|act_loss: -0.0285491943359375|cri_loss: -0.0128021240234375|unsuper_loss: 0.0 +average reward score: 6.06640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.48s (21.92%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4276|ppo_ep: 1|act_loss: -0.0015773773193359375|cri_loss: -0.0004177093505859375|unsuper_loss: 0.0 +average reward score: 5.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.69%) |Training time=0.47s (21.67%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4277|ppo_ep: 1|act_loss: -0.031494140625|cri_loss: -0.01528167724609375|unsuper_loss: 0.0 +average reward score: 5.55859375 +------------------------------------------------------------------------------------- +|E2E latency=2.71s |Gather latency=0.00s (0.00%) |Generate time=1.60s (58.98%) |Training time=0.48s (17.68%) |Others=0.63 (23.34%)|CurSamplesPerSec=11.82 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4278|ppo_ep: 1|act_loss: -0.00934600830078125|cri_loss: -0.004230499267578125|unsuper_loss: 0.0 +average reward score: 5.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.08s |Gather latency=0.00s (0.00%) |Generate time=1.59s (76.59%) |Training time=0.39s (18.58%) |Others=0.10 (4.82%)|CurSamplesPerSec=15.40 |AvgSamplesPerSec=14.46 +[2023-04-14 11:25:19,142] [INFO] [logging.py:96:log_dist] [Rank 0] step=4280, skipped=56, lr=[4.74326860023475e-06, 4.74326860023475e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:25:19,161] [INFO] [timer.py:199:stop] epoch=0/micro_step=4280/global_step=4280, RunningAvgSamplesPerSec=105.29278448645617, CurrSamplesPerSec=100.23129910087523, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:25:19,253] [INFO] [logging.py:96:log_dist] [Rank 0] step=4280, skipped=68, lr=[2.4692008511619042e-06, 2.4692008511619042e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4279|ppo_ep: 1|act_loss: 0.0067291259765625|cri_loss: 0.0035915374755859375|unsuper_loss: 0.0 +average reward score: 6.64453125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.15%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4280|ppo_ep: 1|act_loss: -0.0108489990234375|cri_loss: -0.005157470703125|unsuper_loss: 0.0 +average reward score: 7.859375 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.08%) |Training time=0.48s (20.30%) |Others=0.11 (4.62%)|CurSamplesPerSec=13.58 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4281|ppo_ep: 1|act_loss: 0.0028228759765625|cri_loss: 0.0021076202392578125|unsuper_loss: 0.0 +average reward score: 4.87890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (21.99%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4282|ppo_ep: 1|act_loss: -0.0103912353515625|cri_loss: -0.004909515380859375|unsuper_loss: 0.0 +average reward score: 5.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.10%) |Training time=0.49s (22.32%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4283|ppo_ep: 1|act_loss: 0.004169464111328125|cri_loss: 0.0023479461669921875|unsuper_loss: 0.0 +average reward score: 5.7109375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.29%) |Training time=0.48s (22.07%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4284|ppo_ep: 1|act_loss: 0.01311492919921875|cri_loss: 0.00726318359375|unsuper_loss: 0.0 +average reward score: 5.77734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.35%) |Training time=0.48s (22.04%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4285|ppo_ep: 1|act_loss: 0.0207977294921875|cri_loss: 0.0112457275390625|unsuper_loss: 0.0 +average reward score: 5.87890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (21.95%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4286|ppo_ep: 1|act_loss: 0.0085296630859375|cri_loss: 0.004547119140625|unsuper_loss: 0.0 +average reward score: 4.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.35%) |Training time=0.48s (22.04%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4287|ppo_ep: 1|act_loss: -0.0113525390625|cri_loss: -0.005161285400390625|unsuper_loss: 0.0 +average reward score: 6.25390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.90%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4288|ppo_ep: 1|act_loss: 0.027923583984375|cri_loss: 0.0167694091796875|unsuper_loss: 0.0 +average reward score: 4.68359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.83%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +[2023-04-14 11:25:41,026] [INFO] [logging.py:96:log_dist] [Rank 0] step=4290, skipped=56, lr=[4.724695710557746e-06, 4.724695710557746e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:25:41,044] [INFO] [timer.py:199:stop] epoch=0/micro_step=4290/global_step=4290, RunningAvgSamplesPerSec=105.28350097959824, CurrSamplesPerSec=101.11827769046826, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:25:41,137] [INFO] [logging.py:96:log_dist] [Rank 0] step=4290, skipped=68, lr=[2.4595768561423393e-06, 2.4595768561423393e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4289|ppo_ep: 1|act_loss: -0.03314208984375|cri_loss: -0.015716552734375|unsuper_loss: 0.0 +average reward score: 4.859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.05%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4290|ppo_ep: 1|act_loss: -0.02056884765625|cri_loss: -0.0088958740234375|unsuper_loss: 0.0 +average reward score: 5.39453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.84%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4291|ppo_ep: 1|act_loss: -0.023284912109375|cri_loss: -0.01110076904296875|unsuper_loss: 0.0 +average reward score: 5.453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (22.02%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4292|ppo_ep: 1|act_loss: 0.0148468017578125|cri_loss: 0.008026123046875|unsuper_loss: 0.0 +average reward score: 5.296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (22.01%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4293|ppo_ep: 1|act_loss: 0.01361846923828125|cri_loss: 0.0087890625|unsuper_loss: 0.0 +average reward score: 5.26171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.47%) |Training time=0.48s (21.92%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4294|ppo_ep: 1|act_loss: 0.004306793212890625|cri_loss: 0.002872467041015625|unsuper_loss: 0.0 +average reward score: 4.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.29%) |Training time=0.48s (22.14%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4295|ppo_ep: 1|act_loss: 0.0026702880859375|cri_loss: 0.0014410018920898438|unsuper_loss: 0.0 +average reward score: 5.55859375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.76%) |Training time=0.48s (21.63%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.54 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4296|ppo_ep: 1|act_loss: 0.00958251953125|cri_loss: 0.005100250244140625|unsuper_loss: 0.0 +average reward score: 4.62109375 +------------------------------------------------------------------------------------- +|E2E latency=3.13s |Gather latency=0.00s (0.00%) |Generate time=1.78s (56.75%) |Training time=0.48s (15.22%) |Others=0.88 (28.02%)|CurSamplesPerSec=10.23 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4297|ppo_ep: 1|act_loss: 0.03570556640625|cri_loss: 0.018829345703125|unsuper_loss: 0.0 +average reward score: 5.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.34%) |Training time=0.48s (22.03%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4298|ppo_ep: 1|act_loss: 0.03399658203125|cri_loss: 0.019012451171875|unsuper_loss: 0.0 +average reward score: 5.3828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (21.97%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +[2023-04-14 11:26:03,707] [INFO] [logging.py:96:log_dist] [Rank 0] step=4300, skipped=56, lr=[4.706124307632283e-06, 4.706124307632283e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:26:03,726] [INFO] [timer.py:199:stop] epoch=0/micro_step=4300/global_step=4300, RunningAvgSamplesPerSec=105.27549328885391, CurrSamplesPerSec=102.87540000751152, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:26:03,819] [INFO] [logging.py:96:log_dist] [Rank 0] step=4300, skipped=68, lr=[2.4499534602912815e-06, 2.4499534602912815e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4299|ppo_ep: 1|act_loss: 0.0301055908203125|cri_loss: 0.0162200927734375|unsuper_loss: 0.0 +average reward score: 5.390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.86%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4300|ppo_ep: 1|act_loss: -0.0199432373046875|cri_loss: -0.009765625|unsuper_loss: 0.0 +average reward score: 4.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.84%) |Training time=0.49s (22.54%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4301|ppo_ep: 1|act_loss: 0.02093505859375|cri_loss: 0.01099395751953125|unsuper_loss: 0.0 +average reward score: 6.29296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.23%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4302|ppo_ep: 1|act_loss: 0.001983642578125|cri_loss: 0.0019588470458984375|unsuper_loss: 0.0 +average reward score: 5.96875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.14%) |Training time=0.48s (22.22%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4303|ppo_ep: 1|act_loss: -0.041778564453125|cri_loss: -0.0200042724609375|unsuper_loss: 0.0 +average reward score: 5.1796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.90%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4304|ppo_ep: 1|act_loss: -0.0122833251953125|cri_loss: -0.00597381591796875|unsuper_loss: 0.0 +average reward score: 5.25390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.15%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4305|ppo_ep: 1|act_loss: 0.006443023681640625|cri_loss: 0.0036296844482421875|unsuper_loss: 0.0 +average reward score: 4.6796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.47s (21.97%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4306|ppo_ep: 1|act_loss: 0.0103607177734375|cri_loss: 0.0058746337890625|unsuper_loss: 0.0 +average reward score: 5.63671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.47%) |Training time=0.47s (21.89%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4307|ppo_ep: 1|act_loss: 0.037078857421875|cri_loss: 0.0198211669921875|unsuper_loss: 0.0 +average reward score: 6.58203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.18%) |Training time=0.48s (22.21%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4308|ppo_ep: 1|act_loss: 0.00476837158203125|cri_loss: 0.0025424957275390625|unsuper_loss: 0.0 +average reward score: 4.515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.24%) |Training time=0.48s (22.12%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +[2023-04-14 11:26:25,365] [INFO] [logging.py:96:log_dist] [Rank 0] step=4310, skipped=56, lr=[4.687554666731353e-06, 4.687554666731353e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:26:25,383] [INFO] [timer.py:199:stop] epoch=0/micro_step=4310/global_step=4310, RunningAvgSamplesPerSec=105.26517600866048, CurrSamplesPerSec=101.96257966198779, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:26:25,476] [INFO] [logging.py:96:log_dist] [Rank 0] step=4310, skipped=68, lr=[2.440330806250673e-06, 2.440330806250673e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4309|ppo_ep: 1|act_loss: 0.052459716796875|cri_loss: 0.0281829833984375|unsuper_loss: 0.0 +average reward score: 6.38671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (21.99%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4310|ppo_ep: 1|act_loss: -0.01421356201171875|cri_loss: -0.00677490234375|unsuper_loss: 0.0 +average reward score: 5.734375 +------------------------------------------------------------------------------------- +|E2E latency=3.60s |Gather latency=0.00s (0.00%) |Generate time=1.59s (44.04%) |Training time=0.50s (14.00%) |Others=1.51 (41.96%)|CurSamplesPerSec=8.88 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4311|ppo_ep: 1|act_loss: 0.0005640983581542969|cri_loss: 0.000530242919921875|unsuper_loss: 0.0 +average reward score: 5.70703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.16%) |Training time=0.48s (22.21%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4312|ppo_ep: 1|act_loss: -0.001445770263671875|cri_loss: -0.0002770423889160156|unsuper_loss: 0.0 +average reward score: 4.609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.12%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4313|ppo_ep: 1|act_loss: -0.0284423828125|cri_loss: -0.0135040283203125|unsuper_loss: 0.0 +average reward score: 5.65625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.81%) |Training time=0.49s (22.58%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4314|ppo_ep: 1|act_loss: -0.0107269287109375|cri_loss: -0.005100250244140625|unsuper_loss: 0.0 +average reward score: 6.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.47%) |Training time=0.47s (21.86%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4315|ppo_ep: 1|act_loss: -0.0107421875|cri_loss: -0.004486083984375|unsuper_loss: 0.0 +average reward score: 5.67578125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.90%) |Training time=0.49s (22.50%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4316|ppo_ep: 1|act_loss: -0.00214385986328125|cri_loss: -0.00075531005859375|unsuper_loss: 0.0 +average reward score: 5.27734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.10%) |Training time=0.48s (22.28%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4317|ppo_ep: 1|act_loss: -0.015869140625|cri_loss: -0.007343292236328125|unsuper_loss: 0.0 +average reward score: 6.36328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.08%) |Training time=0.48s (22.30%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4318|ppo_ep: 1|act_loss: 0.00504302978515625|cri_loss: 0.0028839111328125|unsuper_loss: 0.0 +average reward score: 5.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.34%) |Training time=0.48s (22.07%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.46 +[2023-04-14 11:26:48,503] [INFO] [logging.py:96:log_dist] [Rank 0] step=4320, skipped=56, lr=[4.66898706310183e-06, 4.66898706310183e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:26:48,521] [INFO] [timer.py:199:stop] epoch=0/micro_step=4320/global_step=4320, RunningAvgSamplesPerSec=105.2507615935386, CurrSamplesPerSec=100.87576558125552, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:26:48,614] [INFO] [logging.py:96:log_dist] [Rank 0] step=4320, skipped=68, lr=[2.4307090366514594e-06, 2.4307090366514594e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4319|ppo_ep: 1|act_loss: 0.004791259765625|cri_loss: 0.002666473388671875|unsuper_loss: 0.0 +average reward score: 6.296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.22%) |Training time=0.48s (22.18%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4320|ppo_ep: 1|act_loss: 0.02197265625|cri_loss: 0.0123138427734375|unsuper_loss: 0.0 +average reward score: 5.37109375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.06%) |Training time=0.48s (22.32%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4321|ppo_ep: 1|act_loss: 0.017242431640625|cri_loss: 0.00876617431640625|unsuper_loss: 0.0 +average reward score: 4.6484375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.99%) |Training time=0.48s (21.82%) |Others=0.14 (6.20%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4322|ppo_ep: 1|act_loss: -0.002384185791015625|cri_loss: -0.0009288787841796875|unsuper_loss: 0.0 +average reward score: 4.65625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.31%) |Training time=0.48s (22.05%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4323|ppo_ep: 1|act_loss: -0.0127716064453125|cri_loss: -0.00608062744140625|unsuper_loss: 0.0 +average reward score: 4.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.13%) |Training time=0.48s (22.26%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4324|ppo_ep: 1|act_loss: -0.003589630126953125|cri_loss: -0.000904083251953125|unsuper_loss: 0.0 +average reward score: 5.62890625 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.50%) |Training time=0.48s (21.10%) |Others=0.10 (4.40%)|CurSamplesPerSec=14.01 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4325|ppo_ep: 1|act_loss: -0.06109619140625|cri_loss: -0.0297698974609375|unsuper_loss: 0.0 +average reward score: 4.8984375 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.89%) |Training time=0.49s (20.82%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4326|ppo_ep: 1|act_loss: -0.0284423828125|cri_loss: -0.01348114013671875|unsuper_loss: 0.0 +average reward score: 5.02734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.90%) |Training time=0.49s (22.50%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4327|ppo_ep: 1|act_loss: 0.00945281982421875|cri_loss: 0.00511932373046875|unsuper_loss: 0.0 +average reward score: 5.19921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.12%) |Training time=0.48s (22.26%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4328|ppo_ep: 1|act_loss: -0.01401519775390625|cri_loss: -0.006587982177734375|unsuper_loss: 0.0 +average reward score: 5.3203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.22%) |Training time=0.48s (22.13%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +[2023-04-14 11:27:10,534] [INFO] [logging.py:96:log_dist] [Rank 0] step=4330, skipped=56, lr=[4.650421771960395e-06, 4.650421771960395e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:27:10,551] [INFO] [timer.py:199:stop] epoch=0/micro_step=4330/global_step=4330, RunningAvgSamplesPerSec=105.23655293591679, CurrSamplesPerSec=96.47581163393487, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:27:10,644] [INFO] [logging.py:96:log_dist] [Rank 0] step=4330, skipped=68, lr=[2.421088294111479e-06, 2.421088294111479e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4329|ppo_ep: 1|act_loss: -0.0030517578125|cri_loss: -0.001354217529296875|unsuper_loss: 0.0 +average reward score: 5.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.79%) |Training time=0.49s (22.62%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4330|ppo_ep: 1|act_loss: 0.00681304931640625|cri_loss: 0.003574371337890625|unsuper_loss: 0.0 +average reward score: 4.86328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.85%) |Training time=0.49s (22.53%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4331|ppo_ep: 1|act_loss: 0.0313720703125|cri_loss: 0.016265869140625|unsuper_loss: 0.0 +average reward score: 4.59765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.17%) |Training time=0.48s (22.19%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4332|ppo_ep: 1|act_loss: 0.050048828125|cri_loss: 0.02886962890625|unsuper_loss: 0.0 +average reward score: 5.9609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.03%) |Training time=0.48s (22.36%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4333|ppo_ep: 1|act_loss: 0.0006165504455566406|cri_loss: 0.0004563331604003906|unsuper_loss: 0.0 +average reward score: 4.64453125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.79%) |Training time=0.49s (22.61%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46 +[2023-04-14 11:27:21,487] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 4334|ppo_ep: 1|act_loss: 0.007572174072265625|cri_loss: 0.00405120849609375|unsuper_loss: 0.0 +average reward score: 5.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.49%) |Training time=0.48s (22.33%) |Others=0.09 (4.18%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +[2023-04-14 11:27:23,649] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 4335|ppo_ep: 1|act_loss: 0.09259033203125|cri_loss: 0.0550537109375|unsuper_loss: 0.0 +average reward score: 5.6171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.49s (22.67%) |Others=0.09 (4.13%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4336|ppo_ep: 1|act_loss: 0.0041046142578125|cri_loss: 0.00220489501953125|unsuper_loss: 0.0 +average reward score: 5.99609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.03%) |Training time=0.48s (22.35%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4337|ppo_ep: 1|act_loss: -0.0007777214050292969|cri_loss: -1.049041748046875e-05|unsuper_loss: 0.0 +average reward score: 4.23046875 +------------------------------------------------------------------------------------- +|E2E latency=3.05s |Gather latency=0.00s (0.00%) |Generate time=1.58s (51.88%) |Training time=0.49s (15.97%) |Others=0.98 (32.15%)|CurSamplesPerSec=10.49 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4338|ppo_ep: 1|act_loss: 0.0126495361328125|cri_loss: 0.00670623779296875|unsuper_loss: 0.0 +average reward score: 6.1796875 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.58s (70.00%) |Training time=0.49s (21.83%) |Others=0.19 (8.17%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.46 +[2023-04-14 11:27:33,216] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +[2023-04-14 11:27:33,216] [INFO] [logging.py:96:log_dist] [Rank 0] step=4340, skipped=57, lr=[4.633715214549443e-06, 4.633715214549443e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:27:33,217] [INFO] [timer.py:199:stop] epoch=0/micro_step=4340/global_step=4340, RunningAvgSamplesPerSec=105.22239416119598, CurrSamplesPerSec=108.74004434905967, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:27:33,309] [INFO] [logging.py:96:log_dist] [Rank 0] step=4340, skipped=70, lr=[2.413392535391663e-06, 2.413392535391663e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4339|ppo_ep: 1|act_loss: -0.0102386474609375|cri_loss: -0.00473785400390625|unsuper_loss: 0.0 +average reward score: 6.28125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.45%) |Training time=0.46s (20.93%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46 +[2023-04-14 11:27:35,354] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 4340|ppo_ep: 1|act_loss: -0.00921630859375|cri_loss: -0.00395965576171875|unsuper_loss: 0.0 +average reward score: 5.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.07%) |Training time=0.45s (21.25%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4341|ppo_ep: 1|act_loss: -0.0147705078125|cri_loss: -0.0072479248046875|unsuper_loss: 0.0 +average reward score: 6.125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.96%) |Training time=0.49s (22.43%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4342|ppo_ep: 1|act_loss: -0.0070648193359375|cri_loss: -0.003284454345703125|unsuper_loss: 0.0 +average reward score: 6.96484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.02%) |Training time=0.48s (22.35%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4343|ppo_ep: 1|act_loss: -0.040374755859375|cri_loss: -0.0195770263671875|unsuper_loss: 0.0 +average reward score: 5.09765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.98%) |Training time=0.49s (22.39%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4344|ppo_ep: 1|act_loss: -0.0029506683349609375|cri_loss: -0.00118255615234375|unsuper_loss: 0.0 +average reward score: 4.73828125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.79%) |Training time=0.49s (22.60%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4345|ppo_ep: 1|act_loss: 0.01107025146484375|cri_loss: 0.00582122802734375|unsuper_loss: 0.0 +average reward score: 5.76171875 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.22%) |Training time=0.50s (22.30%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.32 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4346|ppo_ep: 1|act_loss: 0.037841796875|cri_loss: 0.0195465087890625|unsuper_loss: 0.0 +average reward score: 6.234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.48s (22.31%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4347|ppo_ep: 1|act_loss: 0.010650634765625|cri_loss: 0.00545501708984375|unsuper_loss: 0.0 +average reward score: 4.9296875 +------------------------------------------------------------------------------------- +|E2E latency=2.58s |Gather latency=0.00s (0.00%) |Generate time=1.58s (61.26%) |Training time=0.49s (18.85%) |Others=0.51 (19.88%)|CurSamplesPerSec=12.40 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4348|ppo_ep: 1|act_loss: -0.01806640625|cri_loss: -0.00841522216796875|unsuper_loss: 0.0 +average reward score: 5.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.95%) |Training time=0.48s (22.41%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +[2023-04-14 11:27:55,334] [INFO] [logging.py:96:log_dist] [Rank 0] step=4350, skipped=58, lr=[4.617010953733625e-06, 4.617010953733625e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:27:55,352] [INFO] [timer.py:199:stop] epoch=0/micro_step=4350/global_step=4350, RunningAvgSamplesPerSec=105.20829371375697, CurrSamplesPerSec=99.3526850550957, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:27:55,445] [INFO] [logging.py:96:log_dist] [Rank 0] step=4350, skipped=70, lr=[2.4037740009053053e-06, 2.4037740009053053e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4349|ppo_ep: 1|act_loss: 0.014923095703125|cri_loss: 0.00814056396484375|unsuper_loss: 0.0 +average reward score: 5.59375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.48s (22.35%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4350|ppo_ep: 1|act_loss: -0.0151824951171875|cri_loss: -0.00742340087890625|unsuper_loss: 0.0 +average reward score: 4.83984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.96%) |Training time=0.49s (22.42%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4351|ppo_ep: 1|act_loss: -0.00119781494140625|cri_loss: -0.0004775524139404297|unsuper_loss: 0.0 +average reward score: 5.14453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.04%) |Training time=0.48s (22.32%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4352|ppo_ep: 1|act_loss: 0.00487518310546875|cri_loss: 0.0028095245361328125|unsuper_loss: 0.0 +average reward score: 5.4296875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.78%) |Training time=0.50s (22.63%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4353|ppo_ep: 1|act_loss: 0.022918701171875|cri_loss: 0.0121917724609375|unsuper_loss: 0.0 +average reward score: 5.7421875 +------------------------------------------------------------------------------------- +|E2E latency=3.49s |Gather latency=0.00s (0.00%) |Generate time=1.68s (48.11%) |Training time=0.56s (16.00%) |Others=1.25 (35.89%)|CurSamplesPerSec=9.16 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4354|ppo_ep: 1|act_loss: 0.0035247802734375|cri_loss: 0.0018749237060546875|unsuper_loss: 0.0 +average reward score: 5.70703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.83%) |Training time=0.47s (21.51%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4355|ppo_ep: 1|act_loss: -0.0120697021484375|cri_loss: -0.004734039306640625|unsuper_loss: 0.0 +average reward score: 5.90625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.34%) |Training time=0.48s (22.03%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4356|ppo_ep: 1|act_loss: 0.02142333984375|cri_loss: 0.0111846923828125|unsuper_loss: 0.0 +average reward score: 5.4296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.35%) |Training time=0.48s (22.01%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4357|ppo_ep: 1|act_loss: -0.0118560791015625|cri_loss: -0.00458526611328125|unsuper_loss: 0.0 +average reward score: 6.09765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.27%) |Training time=0.48s (22.10%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4358|ppo_ep: 1|act_loss: 0.0582275390625|cri_loss: 0.0309295654296875|unsuper_loss: 0.0 +average reward score: 5.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.51s |Gather latency=0.00s (0.00%) |Generate time=1.60s (63.54%) |Training time=0.47s (18.86%) |Others=0.44 (17.60%)|CurSamplesPerSec=12.73 |AvgSamplesPerSec=14.46 +[2023-04-14 11:28:18,696] [INFO] [logging.py:96:log_dist] [Rank 0] step=4360, skipped=58, lr=[4.5984536014041675e-06, 4.5984536014041675e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:28:18,714] [INFO] [timer.py:199:stop] epoch=0/micro_step=4360/global_step=4360, RunningAvgSamplesPerSec=105.19267954139451, CurrSamplesPerSec=105.13316387876034, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:28:18,807] [INFO] [logging.py:96:log_dist] [Rank 0] step=4360, skipped=70, lr=[2.3941568927203878e-06, 2.3941568927203878e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4359|ppo_ep: 1|act_loss: -0.001995086669921875|cri_loss: -0.000576019287109375|unsuper_loss: 0.0 +average reward score: 5.63671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.77%) |Training time=0.47s (21.59%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4360|ppo_ep: 1|act_loss: -0.010589599609375|cri_loss: -0.005023956298828125|unsuper_loss: 0.0 +average reward score: 5.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.42%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4361|ppo_ep: 1|act_loss: -0.0277099609375|cri_loss: -0.01325225830078125|unsuper_loss: 0.0 +average reward score: 4.890625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.24%) |Training time=0.45s (21.07%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4362|ppo_ep: 1|act_loss: -0.0044097900390625|cri_loss: -0.001445770263671875|unsuper_loss: 0.0 +average reward score: 5.00390625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.45s (21.10%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4363|ppo_ep: 1|act_loss: 0.0423583984375|cri_loss: 0.0220489501953125|unsuper_loss: 0.0 +average reward score: 5.6484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.45s (21.20%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4364|ppo_ep: 1|act_loss: 0.0306396484375|cri_loss: 0.015960693359375|unsuper_loss: 0.0 +average reward score: 5.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.32%) |Training time=0.45s (20.97%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4365|ppo_ep: 1|act_loss: 0.003692626953125|cri_loss: 0.0020427703857421875|unsuper_loss: 0.0 +average reward score: 5.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.49%) |Training time=0.44s (20.81%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4366|ppo_ep: 1|act_loss: -0.003925323486328125|cri_loss: -0.0013608932495117188|unsuper_loss: 0.0 +average reward score: 5.90625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.15%) |Training time=0.45s (21.19%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4367|ppo_ep: 1|act_loss: 0.03369140625|cri_loss: 0.0172882080078125|unsuper_loss: 0.0 +average reward score: 5.30859375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.28%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4368|ppo_ep: 1|act_loss: 0.0003256797790527344|cri_loss: 0.0004138946533203125|unsuper_loss: 0.0 +average reward score: 5.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.82s |Gather latency=0.00s (0.00%) |Generate time=1.66s (58.85%) |Training time=0.49s (17.54%) |Others=0.67 (23.62%)|CurSamplesPerSec=11.36 |AvgSamplesPerSec=14.46 +[2023-04-14 11:28:40,789] [INFO] [logging.py:96:log_dist] [Rank 0] step=4370, skipped=58, lr=[4.579899607038848e-06, 4.579899607038848e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:28:40,807] [INFO] [timer.py:199:stop] epoch=0/micro_step=4370/global_step=4370, RunningAvgSamplesPerSec=105.20610923662908, CurrSamplesPerSec=110.14616484290242, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:28:40,900] [INFO] [logging.py:96:log_dist] [Rank 0] step=4370, skipped=70, lr=[2.3845413533856517e-06, 2.3845413533856517e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4369|ppo_ep: 1|act_loss: 0.0188140869140625|cri_loss: 0.0099945068359375|unsuper_loss: 0.0 +average reward score: 5.8203125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.13%) |Training time=0.45s (21.20%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4370|ppo_ep: 1|act_loss: 0.0085601806640625|cri_loss: 0.0045623779296875|unsuper_loss: 0.0 +average reward score: 4.6640625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.40%) |Training time=0.45s (20.89%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4371|ppo_ep: 1|act_loss: -0.01934814453125|cri_loss: -0.00904083251953125|unsuper_loss: 0.0 +average reward score: 5.546875 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.30%) |Training time=0.45s (21.00%) |Others=0.10 (4.69%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4372|ppo_ep: 1|act_loss: -0.006256103515625|cri_loss: -0.0027332305908203125|unsuper_loss: 0.0 +average reward score: 5.35546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.17%) |Training time=0.46s (21.18%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4373|ppo_ep: 1|act_loss: -0.008270263671875|cri_loss: -0.004001617431640625|unsuper_loss: 0.0 +average reward score: 5.28515625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.45s (21.11%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4374|ppo_ep: 1|act_loss: 0.028411865234375|cri_loss: 0.01493072509765625|unsuper_loss: 0.0 +average reward score: 6.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.15%) |Training time=0.45s (21.13%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4375|ppo_ep: 1|act_loss: -0.01084136962890625|cri_loss: -0.00421905517578125|unsuper_loss: 0.0 +average reward score: 5.91796875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.08%) |Training time=0.46s (21.24%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4376|ppo_ep: 1|act_loss: 0.0487060546875|cri_loss: 0.026031494140625|unsuper_loss: 0.0 +average reward score: 4.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.25%) |Training time=0.45s (21.07%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4377|ppo_ep: 1|act_loss: 0.0104522705078125|cri_loss: 0.00604248046875|unsuper_loss: 0.0 +average reward score: 5.77734375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.29%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4378|ppo_ep: 1|act_loss: -0.01346588134765625|cri_loss: -0.006237030029296875|unsuper_loss: 0.0 +average reward score: 6.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.45s (21.20%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46 +[2023-04-14 11:29:02,241] [INFO] [logging.py:96:log_dist] [Rank 0] step=4380, skipped=58, lr=[4.561349245652627e-06, 4.561349245652627e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:29:02,259] [INFO] [timer.py:199:stop] epoch=0/micro_step=4380/global_step=4380, RunningAvgSamplesPerSec=105.21642532909777, CurrSamplesPerSec=108.15262171002004, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:29:02,352] [INFO] [logging.py:96:log_dist] [Rank 0] step=4380, skipped=70, lr=[2.374927525426587e-06, 2.374927525426587e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4379|ppo_ep: 1|act_loss: 0.0002435445785522461|cri_loss: 0.0004558563232421875|unsuper_loss: 0.0 +average reward score: 5.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.01%) |Training time=0.46s (21.34%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4380|ppo_ep: 1|act_loss: -0.0017757415771484375|cri_loss: -0.0005774497985839844|unsuper_loss: 0.0 +average reward score: 4.453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.07%) |Training time=0.46s (21.27%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4381|ppo_ep: 1|act_loss: -0.008941650390625|cri_loss: -0.00415802001953125|unsuper_loss: 0.0 +average reward score: 5.8046875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.32%) |Training time=0.46s (21.11%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4382|ppo_ep: 1|act_loss: -0.02496337890625|cri_loss: -0.0115814208984375|unsuper_loss: 0.0 +average reward score: 4.984375 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.69s (75.22%) |Training time=0.46s (20.31%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.23 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4383|ppo_ep: 1|act_loss: -0.018798828125|cri_loss: -0.0090789794921875|unsuper_loss: 0.0 +average reward score: 5.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.96%) |Training time=0.45s (19.61%) |Others=0.10 (4.43%)|CurSamplesPerSec=13.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4384|ppo_ep: 1|act_loss: 0.02191162109375|cri_loss: 0.01122283935546875|unsuper_loss: 0.0 +average reward score: 4.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.28%) |Training time=0.45s (21.05%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4385|ppo_ep: 1|act_loss: 0.01512908935546875|cri_loss: 0.00812530517578125|unsuper_loss: 0.0 +average reward score: 5.1796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.42%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4386|ppo_ep: 1|act_loss: 0.01531982421875|cri_loss: 0.0079345703125|unsuper_loss: 0.0 +average reward score: 5.35546875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.21%) |Training time=0.45s (21.07%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4387|ppo_ep: 1|act_loss: 0.0019092559814453125|cri_loss: 0.00247955322265625|unsuper_loss: 0.0 +average reward score: 6.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.16%) |Training time=0.48s (21.93%) |Others=0.11 (4.91%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4388|ppo_ep: 1|act_loss: -0.018646240234375|cri_loss: -0.00868988037109375|unsuper_loss: 0.0 +average reward score: 4.13671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.65%) |Training time=0.47s (21.71%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +[2023-04-14 11:29:24,065] [INFO] [logging.py:96:log_dist] [Rank 0] step=4390, skipped=58, lr=[4.542802792206608e-06, 4.542802792206608e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:29:24,084] [INFO] [timer.py:199:stop] epoch=0/micro_step=4390/global_step=4390, RunningAvgSamplesPerSec=105.22203844777437, CurrSamplesPerSec=107.79432154766418, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:29:24,176] [INFO] [logging.py:96:log_dist] [Rank 0] step=4390, skipped=70, lr=[2.3653155513433175e-06, 2.3653155513433175e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4389|ppo_ep: 1|act_loss: -0.0149383544921875|cri_loss: -0.005939483642578125|unsuper_loss: 0.0 +average reward score: 5.04296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.29%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4390|ppo_ep: 1|act_loss: 0.05224609375|cri_loss: 0.0274200439453125|unsuper_loss: 0.0 +average reward score: 5.609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.47s (21.61%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4391|ppo_ep: 1|act_loss: -0.0126495361328125|cri_loss: -0.005931854248046875|unsuper_loss: 0.0 +average reward score: 6.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.84%) |Training time=0.46s (21.49%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4392|ppo_ep: 1|act_loss: -0.0054779052734375|cri_loss: -0.0024261474609375|unsuper_loss: 0.0 +average reward score: 5.03125 +------------------------------------------------------------------------------------- +|E2E latency=2.97s |Gather latency=0.00s (0.00%) |Generate time=1.59s (53.63%) |Training time=0.47s (15.71%) |Others=0.91 (30.66%)|CurSamplesPerSec=10.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4393|ppo_ep: 1|act_loss: 0.0209808349609375|cri_loss: 0.01116180419921875|unsuper_loss: 0.0 +average reward score: 4.79296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.62%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4394|ppo_ep: 1|act_loss: -0.013763427734375|cri_loss: -0.006748199462890625|unsuper_loss: 0.0 +average reward score: 6.87109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.60%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4395|ppo_ep: 1|act_loss: -0.0205078125|cri_loss: -0.0098724365234375|unsuper_loss: 0.0 +average reward score: 4.5703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.83%) |Training time=0.46s (21.52%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4396|ppo_ep: 1|act_loss: -0.02789306640625|cri_loss: -0.013275146484375|unsuper_loss: 0.0 +average reward score: 5.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.73%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4397|ppo_ep: 1|act_loss: 0.00476837158203125|cri_loss: 0.0038661956787109375|unsuper_loss: 0.0 +average reward score: 4.80078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.81%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4398|ppo_ep: 1|act_loss: 0.0108642578125|cri_loss: 0.00586700439453125|unsuper_loss: 0.0 +average reward score: 4.7578125 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.88%) |Training time=0.46s (19.70%) |Others=0.10 (4.42%)|CurSamplesPerSec=13.77 |AvgSamplesPerSec=14.46 +[2023-04-14 11:29:46,633] [INFO] [logging.py:96:log_dist] [Rank 0] step=4400, skipped=58, lr=[4.524260521603971e-06, 4.524260521603971e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:29:46,652] [INFO] [timer.py:199:stop] epoch=0/micro_step=4400/global_step=4400, RunningAvgSamplesPerSec=105.2229391910278, CurrSamplesPerSec=105.90347959812492, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:29:46,744] [INFO] [logging.py:96:log_dist] [Rank 0] step=4400, skipped=70, lr=[2.3557055736084847e-06, 2.3557055736084847e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4399|ppo_ep: 1|act_loss: -0.02545166015625|cri_loss: -0.01221466064453125|unsuper_loss: 0.0 +average reward score: 4.72265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.99%) |Training time=0.46s (21.41%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4400|ppo_ep: 1|act_loss: -0.03961181640625|cri_loss: -0.0188751220703125|unsuper_loss: 0.0 +average reward score: 6.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.74%) |Training time=0.47s (21.61%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4401|ppo_ep: 1|act_loss: 0.063720703125|cri_loss: 0.034637451171875|unsuper_loss: 0.0 +average reward score: 5.76171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.57%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4402|ppo_ep: 1|act_loss: -0.0141143798828125|cri_loss: -0.006595611572265625|unsuper_loss: 0.0 +average reward score: 5.3828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.69%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4403|ppo_ep: 1|act_loss: 0.03546142578125|cri_loss: 0.01824951171875|unsuper_loss: 0.0 +average reward score: 7.25 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.61%) |Training time=0.47s (21.76%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4404|ppo_ep: 1|act_loss: -0.0003781318664550781|cri_loss: 6.4849853515625e-05|unsuper_loss: 0.0 +average reward score: 5.69921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.09%) |Training time=0.46s (21.24%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4405|ppo_ep: 1|act_loss: -0.0084228515625|cri_loss: -0.00386810302734375|unsuper_loss: 0.0 +average reward score: 6.1015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.00%) |Training time=0.46s (21.32%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4406|ppo_ep: 1|act_loss: 0.03094482421875|cri_loss: 0.0161285400390625|unsuper_loss: 0.0 +average reward score: 4.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.17%) |Training time=0.45s (21.18%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4407|ppo_ep: 1|act_loss: 0.0733642578125|cri_loss: 0.039581298828125|unsuper_loss: 0.0 +average reward score: 5.56640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.46s (21.38%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4408|ppo_ep: 1|act_loss: 0.0005645751953125|cri_loss: 0.0008716583251953125|unsuper_loss: 0.0 +average reward score: 4.87890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.21%) |Training time=0.45s (21.09%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +[2023-04-14 11:30:08,176] [INFO] [logging.py:96:log_dist] [Rank 0] step=4410, skipped=58, lr=[4.505722708685901e-06, 4.505722708685901e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:30:08,194] [INFO] [timer.py:199:stop] epoch=0/micro_step=4410/global_step=4410, RunningAvgSamplesPerSec=105.22675621955904, CurrSamplesPerSec=107.51842912451345, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:30:08,287] [INFO] [logging.py:96:log_dist] [Rank 0] step=4410, skipped=70, lr=[2.346097734665143e-06, 2.346097734665143e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4409|ppo_ep: 1|act_loss: 0.009124755859375|cri_loss: 0.004863739013671875|unsuper_loss: 0.0 +average reward score: 5.6015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.44%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4410|ppo_ep: 1|act_loss: 0.00501251220703125|cri_loss: 0.0026264190673828125|unsuper_loss: 0.0 +average reward score: 4.6171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.91%) |Training time=0.47s (21.47%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4411|ppo_ep: 1|act_loss: 0.004062652587890625|cri_loss: 0.00296783447265625|unsuper_loss: 0.0 +average reward score: 4.765625 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.56%) |Training time=0.45s (19.98%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.25 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4412|ppo_ep: 1|act_loss: 0.01087188720703125|cri_loss: 0.00595855712890625|unsuper_loss: 0.0 +average reward score: 4.10546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.96%) |Training time=0.46s (21.38%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +[2023-04-14 11:30:17,048] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384 +epoch: 0|step: 4413|ppo_ep: 1|act_loss: 0.022308349609375|cri_loss: 0.0118408203125|unsuper_loss: 0.0 +average reward score: 5.53515625 +------------------------------------------------------------------------------------- +|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.76s (76.95%) |Training time=0.43s (18.68%) |Others=0.10 (4.37%)|CurSamplesPerSec=13.97 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4414|ppo_ep: 1|act_loss: -0.019378662109375|cri_loss: -0.00940704345703125|unsuper_loss: 0.0 +average reward score: 5.0 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.67%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4415|ppo_ep: 1|act_loss: -0.0246429443359375|cri_loss: -0.0103607177734375|unsuper_loss: 0.0 +average reward score: 4.65234375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.75%) |Training time=0.47s (21.65%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4416|ppo_ep: 1|act_loss: -0.0114288330078125|cri_loss: -0.005565643310546875|unsuper_loss: 0.0 +average reward score: 4.99609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.70%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4417|ppo_ep: 1|act_loss: -0.00141143798828125|cri_loss: 0.0003528594970703125|unsuper_loss: 0.0 +average reward score: 5.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.96%) |Training time=0.48s (22.40%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4418|ppo_ep: 1|act_loss: 0.00626373291015625|cri_loss: 0.003574371337890625|unsuper_loss: 0.0 +average reward score: 5.484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.58%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +[2023-04-14 11:30:30,012] [INFO] [logging.py:96:log_dist] [Rank 0] step=4420, skipped=59, lr=[4.489042715482996e-06, 4.489042715482996e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:30:30,030] [INFO] [timer.py:199:stop] epoch=0/micro_step=4420/global_step=4420, RunningAvgSamplesPerSec=105.22975983988711, CurrSamplesPerSec=105.75086433215567, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:30:30,123] [INFO] [logging.py:96:log_dist] [Rank 0] step=4420, skipped=70, lr=[2.3364921769246423e-06, 2.3364921769246423e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4419|ppo_ep: 1|act_loss: 0.015380859375|cri_loss: 0.00812530517578125|unsuper_loss: 0.0 +average reward score: 5.80859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.85%) |Training time=0.47s (21.52%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4420|ppo_ep: 1|act_loss: 0.0207672119140625|cri_loss: 0.0114898681640625|unsuper_loss: 0.0 +average reward score: 5.3125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.82%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4421|ppo_ep: 1|act_loss: 0.03228759765625|cri_loss: 0.0165252685546875|unsuper_loss: 0.0 +average reward score: 5.015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.79%) |Training time=0.47s (21.56%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4422|ppo_ep: 1|act_loss: -0.0010213851928710938|cri_loss: -0.00040078163146972656|unsuper_loss: 0.0 +average reward score: 6.07421875 +------------------------------------------------------------------------------------- +|E2E latency=2.41s |Gather latency=0.00s (0.00%) |Generate time=1.59s (66.06%) |Training time=0.47s (19.46%) |Others=0.35 (14.49%)|CurSamplesPerSec=13.27 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4423|ppo_ep: 1|act_loss: 6.699562072753906e-05|cri_loss: 0.00020170211791992188|unsuper_loss: 0.0 +average reward score: 5.52734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.61%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4424|ppo_ep: 1|act_loss: -0.0081939697265625|cri_loss: -0.00389862060546875|unsuper_loss: 0.0 +average reward score: 5.8203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.57%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4425|ppo_ep: 1|act_loss: -0.0167236328125|cri_loss: -0.0081329345703125|unsuper_loss: 0.0 +average reward score: 5.1953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.29%) |Training time=0.48s (22.06%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4426|ppo_ep: 1|act_loss: -0.0308990478515625|cri_loss: -0.014556884765625|unsuper_loss: 0.0 +average reward score: 5.23828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.47s (21.82%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4427|ppo_ep: 1|act_loss: 0.00275421142578125|cri_loss: 0.0015478134155273438|unsuper_loss: 0.0 +average reward score: 6.15625 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.60s (70.08%) |Training time=0.58s (25.53%) |Others=0.10 (4.39%)|CurSamplesPerSec=14.01 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4428|ppo_ep: 1|act_loss: 0.0010356903076171875|cri_loss: 0.001148223876953125|unsuper_loss: 0.0 +average reward score: 4.86328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.13%) |Training time=0.48s (22.23%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +[2023-04-14 11:30:52,008] [INFO] [logging.py:96:log_dist] [Rank 0] step=4430, skipped=59, lr=[4.470514129113125e-06, 4.470514129113125e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:30:52,026] [INFO] [timer.py:199:stop] epoch=0/micro_step=4430/global_step=4430, RunningAvgSamplesPerSec=105.21941202542125, CurrSamplesPerSec=96.38726764544512, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:30:52,119] [INFO] [logging.py:96:log_dist] [Rank 0] step=4430, skipped=70, lr=[2.3268890427645213e-06, 2.3268890427645213e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4429|ppo_ep: 1|act_loss: 0.0251617431640625|cri_loss: 0.013397216796875|unsuper_loss: 0.0 +average reward score: 5.58203125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.70%) |Training time=0.49s (22.71%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4430|ppo_ep: 1|act_loss: -0.0066070556640625|cri_loss: -0.003173828125|unsuper_loss: 0.0 +average reward score: 5.51953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.00%) |Training time=0.49s (22.38%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4431|ppo_ep: 1|act_loss: -0.024017333984375|cri_loss: -0.011444091796875|unsuper_loss: 0.0 +average reward score: 5.078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.48s (22.29%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4432|ppo_ep: 1|act_loss: -0.0283203125|cri_loss: -0.01357269287109375|unsuper_loss: 0.0 +average reward score: 5.72265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.91%) |Training time=0.49s (22.47%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4433|ppo_ep: 1|act_loss: -0.0015773773193359375|cri_loss: -0.0005788803100585938|unsuper_loss: 0.0 +average reward score: 5.55078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.47s (21.91%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4434|ppo_ep: 1|act_loss: 0.0058746337890625|cri_loss: 0.00351715087890625|unsuper_loss: 0.0 +average reward score: 5.4921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.14%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4435|ppo_ep: 1|act_loss: -0.0010538101196289062|cri_loss: 8.96453857421875e-05|unsuper_loss: 0.0 +average reward score: 4.7421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.48s (22.31%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +[2023-04-14 11:31:07,251] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 4436|ppo_ep: 1|act_loss: -0.00556182861328125|cri_loss: -0.002140045166015625|unsuper_loss: 0.0 +average reward score: 5.87890625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.79%) |Training time=0.47s (22.02%) |Others=0.09 (4.19%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46 +[2023-04-14 11:31:09,405] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 4437|ppo_ep: 1|act_loss: -0.0242919921875|cri_loss: -0.01165008544921875|unsuper_loss: 0.0 +average reward score: 5.0 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.19%) |Training time=0.49s (22.66%) |Others=0.09 (4.15%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4438|ppo_ep: 1|act_loss: 0.00684356689453125|cri_loss: 0.0038604736328125|unsuper_loss: 0.0 +average reward score: 4.28515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.48s (22.32%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +[2023-04-14 11:31:13,619] [INFO] [logging.py:96:log_dist] [Rank 0] step=4440, skipped=59, lr=[4.451990797079012e-06, 4.451990797079012e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:31:13,637] [INFO] [timer.py:199:stop] epoch=0/micro_step=4440/global_step=4440, RunningAvgSamplesPerSec=105.20787125582682, CurrSamplesPerSec=100.67366538753491, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:31:13,729] [INFO] [logging.py:96:log_dist] [Rank 0] step=4440, skipped=72, lr=[2.3192083760691226e-06, 2.3192083760691226e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4439|ppo_ep: 1|act_loss: -0.00165557861328125|cri_loss: -0.0004649162292480469|unsuper_loss: 0.0 +average reward score: 4.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.23%) |Training time=0.48s (22.17%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4440|ppo_ep: 1|act_loss: 0.0289306640625|cri_loss: 0.01500701904296875|unsuper_loss: 0.0 +average reward score: 6.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.31%) |Training time=0.48s (21.83%) |Others=0.11 (4.85%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4441|ppo_ep: 1|act_loss: 0.002044677734375|cri_loss: 0.001621246337890625|unsuper_loss: 0.0 +average reward score: 5.67578125 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.67s (74.29%) |Training time=0.48s (21.24%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.25 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4442|ppo_ep: 1|act_loss: 0.021697998046875|cri_loss: 0.011474609375|unsuper_loss: 0.0 +average reward score: 5.6328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.91%) |Training time=0.49s (22.45%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4443|ppo_ep: 1|act_loss: 0.0056304931640625|cri_loss: 0.003143310546875|unsuper_loss: 0.0 +average reward score: 5.25 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.18%) |Training time=0.48s (22.19%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4444|ppo_ep: 1|act_loss: 0.027313232421875|cri_loss: 0.01392364501953125|unsuper_loss: 0.0 +average reward score: 4.96484375 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.68%) |Training time=0.48s (20.67%) |Others=0.11 (4.65%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4445|ppo_ep: 1|act_loss: 0.03533935546875|cri_loss: 0.01910400390625|unsuper_loss: 0.0 +average reward score: 4.9375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.88%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4446|ppo_ep: 1|act_loss: 0.003787994384765625|cri_loss: 0.0020809173583984375|unsuper_loss: 0.0 +average reward score: 6.28125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.70%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4447|ppo_ep: 1|act_loss: 0.0007276535034179688|cri_loss: 0.0010051727294921875|unsuper_loss: 0.0 +average reward score: 5.796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.10%) |Training time=0.48s (22.28%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4448|ppo_ep: 1|act_loss: -0.0034160614013671875|cri_loss: -0.0006732940673828125|unsuper_loss: 0.0 +average reward score: 4.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.46s (21.49%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +[2023-04-14 11:31:35,512] [INFO] [logging.py:96:log_dist] [Rank 0] step=4450, skipped=59, lr=[4.433472993941121e-06, 4.433472993941121e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:31:35,530] [INFO] [timer.py:199:stop] epoch=0/micro_step=4450/global_step=4450, RunningAvgSamplesPerSec=105.20223601286291, CurrSamplesPerSec=111.97585245167195, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:31:35,623] [INFO] [logging.py:96:log_dist] [Rank 0] step=4450, skipped=72, lr=[2.309609963029065e-06, 2.309609963029065e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4449|ppo_ep: 1|act_loss: -0.01202392578125|cri_loss: -0.005767822265625|unsuper_loss: 0.0 +average reward score: 6.08203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.59%) |Training time=0.45s (20.74%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4450|ppo_ep: 1|act_loss: -0.024658203125|cri_loss: -0.01102447509765625|unsuper_loss: 0.0 +average reward score: 5.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.52s |Gather latency=0.00s (0.00%) |Generate time=1.60s (63.36%) |Training time=0.46s (18.39%) |Others=0.46 (18.25%)|CurSamplesPerSec=12.71 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4451|ppo_ep: 1|act_loss: -0.0023059844970703125|cri_loss: -0.0007138252258300781|unsuper_loss: 0.0 +average reward score: 4.796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.65%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4452|ppo_ep: 1|act_loss: -0.006069183349609375|cri_loss: -0.0021514892578125|unsuper_loss: 0.0 +average reward score: 5.28125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.91%) |Training time=0.46s (21.42%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4453|ppo_ep: 1|act_loss: -0.029815673828125|cri_loss: -0.01385498046875|unsuper_loss: 0.0 +average reward score: 6.00390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.96%) |Training time=0.46s (21.36%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4454|ppo_ep: 1|act_loss: -0.016937255859375|cri_loss: -0.007965087890625|unsuper_loss: 0.0 +average reward score: 5.71484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (21.96%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4455|ppo_ep: 1|act_loss: -0.036224365234375|cri_loss: -0.0174713134765625|unsuper_loss: 0.0 +average reward score: 5.5703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.86%) |Training time=0.46s (21.49%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4456|ppo_ep: 1|act_loss: 0.03314208984375|cri_loss: 0.0172271728515625|unsuper_loss: 0.0 +average reward score: 5.59765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.80%) |Training time=0.46s (21.56%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4457|ppo_ep: 1|act_loss: 0.0027675628662109375|cri_loss: 0.0017824172973632812|unsuper_loss: 0.0 +average reward score: 5.87890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.76%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4458|ppo_ep: 1|act_loss: 0.026031494140625|cri_loss: 0.01338958740234375|unsuper_loss: 0.0 +average reward score: 4.76953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.39%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46 +[2023-04-14 11:31:57,598] [INFO] [logging.py:96:log_dist] [Rank 0] step=4460, skipped=59, lr=[4.41496099417797e-06, 4.41496099417797e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:31:57,616] [INFO] [timer.py:199:stop] epoch=0/micro_step=4460/global_step=4460, RunningAvgSamplesPerSec=105.20444311444572, CurrSamplesPerSec=108.08268602342073, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:31:57,709] [INFO] [logging.py:96:log_dist] [Rank 0] step=4460, skipped=72, lr=[2.3000143720286463e-06, 2.3000143720286463e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4459|ppo_ep: 1|act_loss: 0.06365966796875|cri_loss: 0.03680419921875|unsuper_loss: 0.0 +average reward score: 5.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.96%) |Training time=0.46s (19.74%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4460|ppo_ep: 1|act_loss: 0.01898193359375|cri_loss: 0.00991058349609375|unsuper_loss: 0.0 +average reward score: 6.82421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.47s (21.60%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4461|ppo_ep: 1|act_loss: -0.0066986083984375|cri_loss: -0.00315093994140625|unsuper_loss: 0.0 +average reward score: 4.9140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.46s (21.53%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4462|ppo_ep: 1|act_loss: 0.0011196136474609375|cri_loss: 0.0011081695556640625|unsuper_loss: 0.0 +average reward score: 5.98046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.61%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4463|ppo_ep: 1|act_loss: -0.0222320556640625|cri_loss: -0.00909423828125|unsuper_loss: 0.0 +average reward score: 6.11328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.27%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4464|ppo_ep: 1|act_loss: -0.00608062744140625|cri_loss: 0.001983642578125|unsuper_loss: 0.0 +average reward score: 6.4453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.27%) |Training time=0.45s (21.00%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4465|ppo_ep: 1|act_loss: -0.028961181640625|cri_loss: -0.0133514404296875|unsuper_loss: 0.0 +average reward score: 5.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.13%) |Training time=0.46s (21.17%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4466|ppo_ep: 1|act_loss: 0.029052734375|cri_loss: 0.01490020751953125|unsuper_loss: 0.0 +average reward score: 4.953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.31%) |Training time=0.41s (18.96%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4467|ppo_ep: 1|act_loss: -0.0139923095703125|cri_loss: -0.005023956298828125|unsuper_loss: 0.0 +average reward score: 5.91015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.65%) |Training time=0.40s (18.63%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4468|ppo_ep: 1|act_loss: 0.006591796875|cri_loss: 0.0034809112548828125|unsuper_loss: 0.0 +average reward score: 6.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.40%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +[2023-04-14 11:32:19,212] [INFO] [logging.py:96:log_dist] [Rank 0] step=4470, skipped=59, lr=[4.3964550721820475e-06, 4.3964550721820475e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:32:19,230] [INFO] [timer.py:199:stop] epoch=0/micro_step=4470/global_step=4470, RunningAvgSamplesPerSec=105.21677106152741, CurrSamplesPerSec=103.61686894506175, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:32:19,323] [INFO] [logging.py:96:log_dist] [Rank 0] step=4470, skipped=72, lr=[2.2904217452976725e-06, 2.2904217452976725e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4469|ppo_ep: 1|act_loss: -0.00719451904296875|cri_loss: -0.002063751220703125|unsuper_loss: 0.0 +average reward score: 5.703125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.72%) |Training time=0.47s (21.67%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4470|ppo_ep: 1|act_loss: 0.01214599609375|cri_loss: 0.00646209716796875|unsuper_loss: 0.0 +average reward score: 6.7265625 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.99%) |Training time=0.46s (20.54%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.17 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4471|ppo_ep: 1|act_loss: 0.0025005340576171875|cri_loss: 0.00406646728515625|unsuper_loss: 0.0 +average reward score: 7.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.09%) |Training time=0.46s (21.23%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4472|ppo_ep: 1|act_loss: 0.004077911376953125|cri_loss: 0.002239227294921875|unsuper_loss: 0.0 +average reward score: 5.1171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.67%) |Training time=0.45s (20.63%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4473|ppo_ep: 1|act_loss: -0.0126495361328125|cri_loss: -0.005893707275390625|unsuper_loss: 0.0 +average reward score: 5.78515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.45%) |Training time=0.45s (20.88%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4474|ppo_ep: 1|act_loss: -0.0308990478515625|cri_loss: -0.0146636962890625|unsuper_loss: 0.0 +average reward score: 6.41015625 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.33%) |Training time=0.47s (20.34%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4475|ppo_ep: 1|act_loss: -0.0252532958984375|cri_loss: -0.01204681396484375|unsuper_loss: 0.0 +average reward score: 4.92578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.86%) |Training time=0.46s (21.47%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4476|ppo_ep: 1|act_loss: -0.0056610107421875|cri_loss: -0.002590179443359375|unsuper_loss: 0.0 +average reward score: 5.1171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.85%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4477|ppo_ep: 1|act_loss: 0.0009708404541015625|cri_loss: 0.0006566047668457031|unsuper_loss: 0.0 +average reward score: 5.7109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.64%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4478|ppo_ep: 1|act_loss: 0.04119873046875|cri_loss: 0.02197265625|unsuper_loss: 0.0 +average reward score: 5.94140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.09%) |Training time=0.46s (21.23%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +[2023-04-14 11:32:41,073] [INFO] [logging.py:96:log_dist] [Rank 0] step=4480, skipped=59, lr=[4.377955502255766e-06, 4.377955502255766e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:32:41,092] [INFO] [timer.py:199:stop] epoch=0/micro_step=4480/global_step=4480, RunningAvgSamplesPerSec=105.22041393282225, CurrSamplesPerSec=104.97159258882692, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:32:41,184] [INFO] [logging.py:96:log_dist] [Rank 0] step=4480, skipped=72, lr=[2.2808322250220145e-06, 2.2808322250220145e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4479|ppo_ep: 1|act_loss: -0.024658203125|cri_loss: -0.01195526123046875|unsuper_loss: 0.0 +average reward score: 5.55859375 +------------------------------------------------------------------------------------- +|E2E latency=3.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (50.62%) |Training time=0.47s (14.91%) |Others=1.08 (34.47%)|CurSamplesPerSec=10.19 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4480|ppo_ep: 1|act_loss: 0.01971435546875|cri_loss: 0.0106048583984375|unsuper_loss: 0.0 +average reward score: 5.31640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.39%) |Training time=0.45s (20.92%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4481|ppo_ep: 1|act_loss: -0.017547607421875|cri_loss: -0.00855255126953125|unsuper_loss: 0.0 +average reward score: 5.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.67%) |Training time=0.44s (20.64%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4482|ppo_ep: 1|act_loss: -0.00038242340087890625|cri_loss: 0.0005755424499511719|unsuper_loss: 0.0 +average reward score: 6.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.98%) |Training time=0.44s (20.38%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4483|ppo_ep: 1|act_loss: -0.0160369873046875|cri_loss: -0.007762908935546875|unsuper_loss: 0.0 +average reward score: 5.08984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.64%) |Training time=0.44s (20.68%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4484|ppo_ep: 1|act_loss: 0.00464630126953125|cri_loss: 0.002918243408203125|unsuper_loss: 0.0 +average reward score: 5.56640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.43%) |Training time=0.45s (20.91%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4485|ppo_ep: 1|act_loss: -0.023712158203125|cri_loss: -0.0107879638671875|unsuper_loss: 0.0 +average reward score: 5.23046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.32%) |Training time=0.45s (20.99%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4486|ppo_ep: 1|act_loss: 0.007293701171875|cri_loss: 0.004192352294921875|unsuper_loss: 0.0 +average reward score: 5.14453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.82%) |Training time=0.47s (21.54%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4487|ppo_ep: 1|act_loss: -0.0009126663208007812|cri_loss: -7.581710815429688e-05|unsuper_loss: 0.0 +average reward score: 6.16796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.17%) |Training time=0.46s (21.17%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4488|ppo_ep: 1|act_loss: 0.013427734375|cri_loss: 0.007648468017578125|unsuper_loss: 0.0 +average reward score: 5.72265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.86%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46 +[2023-04-14 11:33:03,613] [INFO] [logging.py:96:log_dist] [Rank 0] step=4490, skipped=59, lr=[4.3594625586073746e-06, 4.3594625586073746e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:33:03,632] [INFO] [timer.py:199:stop] epoch=0/micro_step=4490/global_step=4490, RunningAvgSamplesPerSec=105.23280805486438, CurrSamplesPerSec=110.25764020668524, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:33:03,725] [INFO] [logging.py:96:log_dist] [Rank 0] step=4490, skipped=72, lr=[2.271245953341494e-06, 2.271245953341494e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4489|ppo_ep: 1|act_loss: 0.0073394775390625|cri_loss: 0.0040130615234375|unsuper_loss: 0.0 +average reward score: 4.859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.31%) |Training time=0.45s (21.05%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4490|ppo_ep: 1|act_loss: -0.017333984375|cri_loss: -0.0079803466796875|unsuper_loss: 0.0 +average reward score: 5.0859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.33%) |Training time=0.45s (21.01%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4491|ppo_ep: 1|act_loss: -0.015625|cri_loss: -0.0067596435546875|unsuper_loss: 0.0 +average reward score: 5.7265625 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.40%) |Training time=0.45s (19.27%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4492|ppo_ep: 1|act_loss: -0.046142578125|cri_loss: -0.0220184326171875|unsuper_loss: 0.0 +average reward score: 5.125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.45s (21.08%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4493|ppo_ep: 1|act_loss: -0.0009975433349609375|cri_loss: -0.000133514404296875|unsuper_loss: 0.0 +average reward score: 5.25390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.46s (21.10%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4494|ppo_ep: 1|act_loss: -0.0071868896484375|cri_loss: -0.0030841827392578125|unsuper_loss: 0.0 +average reward score: 5.46484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.56%) |Training time=0.45s (20.77%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4495|ppo_ep: 1|act_loss: -0.0185546875|cri_loss: -0.00876617431640625|unsuper_loss: 0.0 +average reward score: 5.984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.60%) |Training time=0.45s (20.73%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4496|ppo_ep: 1|act_loss: -0.00152587890625|cri_loss: 0.002399444580078125|unsuper_loss: 0.0 +average reward score: 5.3359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.33%) |Training time=0.45s (21.01%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4497|ppo_ep: 1|act_loss: -0.002407073974609375|cri_loss: -0.0009021759033203125|unsuper_loss: 0.0 +average reward score: 5.28515625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.64%) |Training time=0.44s (20.70%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4498|ppo_ep: 1|act_loss: 0.01605224609375|cri_loss: 0.00878143310546875|unsuper_loss: 0.0 +average reward score: 5.14453125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.44s (20.18%) |Others=0.11 (5.22%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46 +[2023-04-14 11:33:25,426] [INFO] [logging.py:96:log_dist] [Rank 0] step=4500, skipped=59, lr=[4.340976515346915e-06, 4.340976515346915e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:33:25,444] [INFO] [timer.py:199:stop] epoch=0/micro_step=4500/global_step=4500, RunningAvgSamplesPerSec=105.24786998748107, CurrSamplesPerSec=114.88214409214008, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:33:25,537] [INFO] [logging.py:96:log_dist] [Rank 0] step=4500, skipped=72, lr=[2.261663072347785e-06, 2.261663072347785e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4499|ppo_ep: 1|act_loss: 0.0015306472778320312|cri_loss: 0.0017213821411132812|unsuper_loss: 0.0 +average reward score: 5.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.93%) |Training time=0.44s (19.62%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4500|ppo_ep: 1|act_loss: -0.002105712890625|cri_loss: -6.4849853515625e-05|unsuper_loss: 0.0 +average reward score: 5.05078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.59%) |Training time=0.45s (20.74%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4501|ppo_ep: 1|act_loss: 0.0303955078125|cri_loss: 0.0157470703125|unsuper_loss: 0.0 +average reward score: 6.1015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.91%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4502|ppo_ep: 1|act_loss: 0.02105712890625|cri_loss: 0.01132965087890625|unsuper_loss: 0.0 +average reward score: 4.98828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.54%) |Training time=0.45s (20.79%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4503|ppo_ep: 1|act_loss: -0.03997802734375|cri_loss: -0.0192108154296875|unsuper_loss: 0.0 +average reward score: 5.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.77%) |Training time=0.44s (20.55%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4504|ppo_ep: 1|act_loss: 0.013336181640625|cri_loss: 0.007686614990234375|unsuper_loss: 0.0 +average reward score: 5.51171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.46%) |Training time=0.45s (20.86%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4505|ppo_ep: 1|act_loss: -0.0149078369140625|cri_loss: -0.0070953369140625|unsuper_loss: 0.0 +average reward score: 6.26171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.42%) |Training time=0.45s (20.92%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4506|ppo_ep: 1|act_loss: 0.00888824462890625|cri_loss: 0.004749298095703125|unsuper_loss: 0.0 +average reward score: 4.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.64%) |Training time=0.46s (19.69%) |Others=0.11 (4.66%)|CurSamplesPerSec=13.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4507|ppo_ep: 1|act_loss: 0.0194854736328125|cri_loss: 0.01007843017578125|unsuper_loss: 0.0 +average reward score: 5.5703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.45s (21.12%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4508|ppo_ep: 1|act_loss: 0.06427001953125|cri_loss: 0.034271240234375|unsuper_loss: 0.0 +average reward score: 5.046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.32%) |Training time=0.45s (21.01%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46 +[2023-04-14 11:33:47,136] [INFO] [logging.py:96:log_dist] [Rank 0] step=4510, skipped=59, lr=[4.32249764648214e-06, 4.32249764648214e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:33:48,297] [INFO] [timer.py:199:stop] epoch=0/micro_step=4510/global_step=4510, RunningAvgSamplesPerSec=105.17105176081999, CurrSamplesPerSec=21.96928961487455, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:33:48,390] [INFO] [logging.py:96:log_dist] [Rank 0] step=4510, skipped=72, lr=[2.2520837240822997e-06, 2.2520837240822997e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4509|ppo_ep: 1|act_loss: -0.017822265625|cri_loss: -0.00855255126953125|unsuper_loss: 0.0 +average reward score: 5.32421875 +------------------------------------------------------------------------------------- +|E2E latency=3.32s |Gather latency=0.00s (0.00%) |Generate time=1.60s (48.26%) |Training time=1.62s (48.71%) |Others=0.10 (3.03%)|CurSamplesPerSec=9.63 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4510|ppo_ep: 1|act_loss: 0.010223388671875|cri_loss: 0.005870819091796875|unsuper_loss: 0.0 +average reward score: 6.625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.48s (22.28%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4511|ppo_ep: 1|act_loss: 0.033660888671875|cri_loss: 0.017913818359375|unsuper_loss: 0.0 +average reward score: 4.37109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.62%) |Training time=0.47s (21.72%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4512|ppo_ep: 1|act_loss: 0.00102996826171875|cri_loss: 0.00168609619140625|unsuper_loss: 0.0 +average reward score: 4.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.32%) |Training time=0.48s (22.06%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4513|ppo_ep: 1|act_loss: 0.00452423095703125|cri_loss: 0.0026302337646484375|unsuper_loss: 0.0 +average reward score: 5.8515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.55%) |Training time=0.47s (21.80%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4514|ppo_ep: 1|act_loss: 0.01007080078125|cri_loss: 0.005413055419921875|unsuper_loss: 0.0 +average reward score: 5.81640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (21.95%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4515|ppo_ep: 1|act_loss: 0.0038013458251953125|cri_loss: 0.00213623046875|unsuper_loss: 0.0 +average reward score: 6.515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.90%) |Training time=0.49s (22.49%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4516|ppo_ep: 1|act_loss: -0.00154876708984375|cri_loss: -0.00052642822265625|unsuper_loss: 0.0 +average reward score: 5.73046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.18%) |Training time=0.48s (22.19%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4517|ppo_ep: 1|act_loss: 0.007537841796875|cri_loss: 0.003902435302734375|unsuper_loss: 0.0 +average reward score: 5.9765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.12%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4518|ppo_ep: 1|act_loss: -0.0220947265625|cri_loss: -0.0106658935546875|unsuper_loss: 0.0 +average reward score: 5.01171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.18%) |Training time=0.48s (22.20%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +[2023-04-14 11:34:09,955] [INFO] [logging.py:96:log_dist] [Rank 0] step=4520, skipped=59, lr=[4.304026225914465e-06, 4.304026225914465e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:34:09,973] [INFO] [timer.py:199:stop] epoch=0/micro_step=4520/global_step=4520, RunningAvgSamplesPerSec=105.16232246066636, CurrSamplesPerSec=103.59375617466904, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:34:10,066] [INFO] [logging.py:96:log_dist] [Rank 0] step=4520, skipped=72, lr=[2.2425080505340897e-06, 2.2425080505340897e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4519|ppo_ep: 1|act_loss: -0.02069091796875|cri_loss: -0.00975799560546875|unsuper_loss: 0.0 +average reward score: 6.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.67%) |Training time=0.47s (21.72%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4520|ppo_ep: 1|act_loss: 0.0130767822265625|cri_loss: 0.006988525390625|unsuper_loss: 0.0 +average reward score: 6.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.39%) |Training time=0.50s (21.33%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.57 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4521|ppo_ep: 1|act_loss: 0.042938232421875|cri_loss: 0.0226898193359375|unsuper_loss: 0.0 +average reward score: 4.76171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.94%) |Training time=0.46s (21.41%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4522|ppo_ep: 1|act_loss: -0.0004248619079589844|cri_loss: 0.0001125335693359375|unsuper_loss: 0.0 +average reward score: 6.875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.65%) |Training time=0.47s (21.72%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4523|ppo_ep: 1|act_loss: 0.03631591796875|cri_loss: 0.01885986328125|unsuper_loss: 0.0 +average reward score: 5.453125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.86%) |Training time=0.49s (22.51%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4524|ppo_ep: 1|act_loss: -0.006473541259765625|cri_loss: -0.003021240234375|unsuper_loss: 0.0 +average reward score: 5.91015625 +------------------------------------------------------------------------------------- +|E2E latency=3.23s |Gather latency=0.00s (0.00%) |Generate time=1.60s (49.51%) |Training time=0.47s (14.71%) |Others=1.16 (35.78%)|CurSamplesPerSec=9.91 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4525|ppo_ep: 1|act_loss: -0.0032958984375|cri_loss: -0.0013866424560546875|unsuper_loss: 0.0 +average reward score: 5.44921875 +------------------------------------------------------------------------------------- +|E2E latency=2.08s |Gather latency=0.00s (0.00%) |Generate time=1.59s (76.58%) |Training time=0.39s (18.58%) |Others=0.10 (4.84%)|CurSamplesPerSec=15.38 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4526|ppo_ep: 1|act_loss: 0.0804443359375|cri_loss: 0.042755126953125|unsuper_loss: 0.0 +average reward score: 5.18359375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.12%) |Training time=0.48s (22.10%) |Others=0.10 (4.78%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4527|ppo_ep: 1|act_loss: 0.0029087066650390625|cri_loss: 0.002635955810546875|unsuper_loss: 0.0 +average reward score: 6.203125 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.66%) |Training time=0.48s (21.78%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4528|ppo_ep: 1|act_loss: -0.0214691162109375|cri_loss: -0.0103302001953125|unsuper_loss: 0.0 +average reward score: 6.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.68s (73.67%) |Training time=0.49s (21.59%) |Others=0.11 (4.74%)|CurSamplesPerSec=14.01 |AvgSamplesPerSec=14.46 +[2023-04-14 11:34:33,000] [INFO] [logging.py:96:log_dist] [Rank 0] step=4530, skipped=59, lr=[4.285562527434903e-06, 4.285562527434903e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:34:33,018] [INFO] [timer.py:199:stop] epoch=0/micro_step=4530/global_step=4530, RunningAvgSamplesPerSec=105.15677733092694, CurrSamplesPerSec=97.51551941225235, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:34:33,111] [INFO] [logging.py:96:log_dist] [Rank 0] step=4530, skipped=72, lr=[2.2329361936377355e-06, 2.2329361936377355e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4529|ppo_ep: 1|act_loss: -0.037109375|cri_loss: -0.0172882080078125|unsuper_loss: 0.0 +average reward score: 4.9453125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.89%) |Training time=0.49s (22.53%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4530|ppo_ep: 1|act_loss: 0.021636962890625|cri_loss: 0.011260986328125|unsuper_loss: 0.0 +average reward score: 4.91796875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.95%) |Training time=0.49s (22.43%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4531|ppo_ep: 1|act_loss: -0.01393890380859375|cri_loss: -0.0066986083984375|unsuper_loss: 0.0 +average reward score: 4.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.17%) |Training time=0.48s (22.08%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4532|ppo_ep: 1|act_loss: 0.01395416259765625|cri_loss: 0.007312774658203125|unsuper_loss: 0.0 +average reward score: 5.16796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.00%) |Training time=0.49s (22.38%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4533|ppo_ep: 1|act_loss: 0.015045166015625|cri_loss: 0.0084991455078125|unsuper_loss: 0.0 +average reward score: 5.73046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.86%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4534|ppo_ep: 1|act_loss: 0.0009570121765136719|cri_loss: 0.0007576942443847656|unsuper_loss: 0.0 +average reward score: 5.8203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.10%) |Training time=0.48s (22.28%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4535|ppo_ep: 1|act_loss: -0.05126953125|cri_loss: -0.0240936279296875|unsuper_loss: 0.0 +average reward score: 5.32421875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.69%) |Training time=0.48s (21.86%) |Others=0.12 (5.45%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4536|ppo_ep: 1|act_loss: 0.00452423095703125|cri_loss: 0.0024509429931640625|unsuper_loss: 0.0 +average reward score: 5.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.73s (74.96%) |Training time=0.47s (20.37%) |Others=0.11 (4.67%)|CurSamplesPerSec=13.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4537|ppo_ep: 1|act_loss: -0.0042266845703125|cri_loss: -0.0018224716186523438|unsuper_loss: 0.0 +average reward score: 5.3671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.19%) |Training time=0.48s (22.18%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +[2023-04-14 11:34:52,784] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 4538|ppo_ep: 1|act_loss: 0.03857421875|cri_loss: 0.019989013671875|unsuper_loss: 0.0 +average reward score: 5.02734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.48s (22.22%) |Others=0.09 (4.19%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +[2023-04-14 11:34:54,843] [INFO] [logging.py:96:log_dist] [Rank 0] step=4540, skipped=59, lr=[4.267106824720006e-06, 4.267106824720006e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:34:54,862] [INFO] [timer.py:199:stop] epoch=0/micro_step=4540/global_step=4540, RunningAvgSamplesPerSec=105.14597895413151, CurrSamplesPerSec=97.63909587372902, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:34:54,945] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +[2023-04-14 11:34:54,946] [INFO] [logging.py:96:log_dist] [Rank 0] step=4540, skipped=74, lr=[2.2252815514539662e-06, 2.2252815514539662e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4539|ppo_ep: 1|act_loss: -0.02557373046875|cri_loss: -0.0123748779296875|unsuper_loss: 0.0 +average reward score: 5.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.18%) |Training time=0.49s (22.68%) |Others=0.09 (4.14%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4540|ppo_ep: 1|act_loss: -0.005657196044921875|cri_loss: -0.0024356842041015625|unsuper_loss: 0.0 +average reward score: 5.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.71%) |Training time=0.49s (22.44%) |Others=0.11 (4.85%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4541|ppo_ep: 1|act_loss: 0.0125885009765625|cri_loss: 0.006679534912109375|unsuper_loss: 0.0 +average reward score: 5.93359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.06%) |Training time=0.48s (22.31%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4542|ppo_ep: 1|act_loss: -0.0191192626953125|cri_loss: -0.0088653564453125|unsuper_loss: 0.0 +average reward score: 5.3515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.99%) |Training time=0.48s (22.17%) |Others=0.10 (4.84%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4543|ppo_ep: 1|act_loss: -0.011566162109375|cri_loss: -0.005367279052734375|unsuper_loss: 0.0 +average reward score: 5.796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.13%) |Training time=0.48s (22.24%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4544|ppo_ep: 1|act_loss: 0.010528564453125|cri_loss: 0.00630950927734375|unsuper_loss: 0.0 +average reward score: 4.859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.27%) |Training time=0.48s (22.11%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4545|ppo_ep: 1|act_loss: -0.0128631591796875|cri_loss: -0.005611419677734375|unsuper_loss: 0.0 +average reward score: 5.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.21%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4546|ppo_ep: 1|act_loss: 0.047515869140625|cri_loss: 0.02423095703125|unsuper_loss: 0.0 +average reward score: 5.83203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.14%) |Training time=0.48s (22.23%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4547|ppo_ep: 1|act_loss: 0.0244140625|cri_loss: 0.01294708251953125|unsuper_loss: 0.0 +average reward score: 4.8515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.12%) |Training time=0.48s (22.26%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4548|ppo_ep: 1|act_loss: 0.0213470458984375|cri_loss: 0.0111083984375|unsuper_loss: 0.0 +average reward score: 4.62890625 +------------------------------------------------------------------------------------- +|E2E latency=2.84s |Gather latency=0.00s (0.00%) |Generate time=1.58s (55.63%) |Training time=0.48s (16.89%) |Others=0.78 (27.49%)|CurSamplesPerSec=11.25 |AvgSamplesPerSec=14.46 +[2023-04-14 11:35:17,159] [INFO] [logging.py:96:log_dist] [Rank 0] step=4550, skipped=59, lr=[4.248659391327812e-06, 4.248659391327812e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:35:17,177] [INFO] [timer.py:199:stop] epoch=0/micro_step=4550/global_step=4550, RunningAvgSamplesPerSec=105.13486238929258, CurrSamplesPerSec=100.88547102857268, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:35:17,270] [INFO] [logging.py:96:log_dist] [Rank 0] step=4550, skipped=74, lr=[2.2157169220241807e-06, 2.2157169220241807e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4549|ppo_ep: 1|act_loss: -0.029541015625|cri_loss: -0.01416015625|unsuper_loss: 0.0 +average reward score: 4.4453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.14%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4550|ppo_ep: 1|act_loss: 0.0008153915405273438|cri_loss: 0.000988006591796875|unsuper_loss: 0.0 +average reward score: 5.0234375 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.12%) |Training time=0.48s (20.57%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4551|ppo_ep: 1|act_loss: 0.0382080078125|cri_loss: 0.0198822021484375|unsuper_loss: 0.0 +average reward score: 4.80859375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.52%) |Training time=0.48s (21.87%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4552|ppo_ep: 1|act_loss: 0.005889892578125|cri_loss: 0.00328826904296875|unsuper_loss: 0.0 +average reward score: 5.94921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (21.94%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4553|ppo_ep: 1|act_loss: 0.007633209228515625|cri_loss: 0.00445556640625|unsuper_loss: 0.0 +average reward score: 5.875 +------------------------------------------------------------------------------------- +|E2E latency=2.51s |Gather latency=0.00s (0.00%) |Generate time=1.60s (63.76%) |Training time=0.46s (18.25%) |Others=0.45 (17.99%)|CurSamplesPerSec=12.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4554|ppo_ep: 1|act_loss: -0.04290771484375|cri_loss: -0.01824951171875|unsuper_loss: 0.0 +average reward score: 4.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.18%) |Training time=0.48s (22.17%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4555|ppo_ep: 1|act_loss: -0.011505126953125|cri_loss: -0.00542449951171875|unsuper_loss: 0.0 +average reward score: 4.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.93%) |Training time=0.49s (22.49%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4556|ppo_ep: 1|act_loss: 0.023284912109375|cri_loss: 0.01212310791015625|unsuper_loss: 0.0 +average reward score: 4.88671875 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.62s (71.91%) |Training time=0.49s (21.71%) |Others=0.14 (6.38%)|CurSamplesPerSec=14.20 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4557|ppo_ep: 1|act_loss: 0.02960205078125|cri_loss: 0.016326904296875|unsuper_loss: 0.0 +average reward score: 5.91015625 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.21%) |Training time=0.50s (22.29%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.37 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4558|ppo_ep: 1|act_loss: 0.0092010498046875|cri_loss: 0.005279541015625|unsuper_loss: 0.0 +average reward score: 5.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.32%) |Training time=0.48s (22.02%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +[2023-04-14 11:35:39,518] [INFO] [logging.py:96:log_dist] [Rank 0] step=4560, skipped=59, lr=[4.230220500693783e-06, 4.230220500693783e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:35:39,838] [INFO] [timer.py:199:stop] epoch=0/micro_step=4560/global_step=4560, RunningAvgSamplesPerSec=105.10160046034554, CurrSamplesPerSec=51.63946257422192, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:35:39,931] [INFO] [logging.py:96:log_dist] [Rank 0] step=4560, skipped=74, lr=[2.2061565063554063e-06, 2.2061565063554063e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4559|ppo_ep: 1|act_loss: -0.020111083984375|cri_loss: -0.00907135009765625|unsuper_loss: 0.0 +average reward score: 4.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.47s |Gather latency=0.00s (0.00%) |Generate time=1.59s (64.27%) |Training time=0.78s (31.69%) |Others=0.10 (4.04%)|CurSamplesPerSec=12.95 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4560|ppo_ep: 1|act_loss: -0.028472900390625|cri_loss: -0.01371002197265625|unsuper_loss: 0.0 +average reward score: 5.25 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.82%) |Training time=0.49s (22.56%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4561|ppo_ep: 1|act_loss: -0.017730712890625|cri_loss: -0.00830078125|unsuper_loss: 0.0 +average reward score: 4.1875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.17%) |Training time=0.48s (22.20%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4562|ppo_ep: 1|act_loss: 0.00510406494140625|cri_loss: 0.0028438568115234375|unsuper_loss: 0.0 +average reward score: 4.01171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.96%) |Training time=0.49s (22.42%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4563|ppo_ep: 1|act_loss: -0.00669097900390625|cri_loss: -0.003200531005859375|unsuper_loss: 0.0 +average reward score: 5.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.18%) |Training time=0.49s (22.25%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4564|ppo_ep: 1|act_loss: 0.0183563232421875|cri_loss: 0.009552001953125|unsuper_loss: 0.0 +average reward score: 4.5859375 +------------------------------------------------------------------------------------- +|E2E latency=2.62s |Gather latency=0.00s (0.00%) |Generate time=1.75s (66.91%) |Training time=0.51s (19.64%) |Others=0.35 (13.45%)|CurSamplesPerSec=12.22 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4565|ppo_ep: 1|act_loss: 0.029815673828125|cri_loss: 0.0155181884765625|unsuper_loss: 0.0 +average reward score: 6.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.13%) |Training time=0.48s (22.24%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4566|ppo_ep: 1|act_loss: 0.0841064453125|cri_loss: 0.044769287109375|unsuper_loss: 0.0 +average reward score: 4.203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.97%) |Training time=0.49s (22.42%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4567|ppo_ep: 1|act_loss: 0.030364990234375|cri_loss: 0.01641845703125|unsuper_loss: 0.0 +average reward score: 5.890625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.51%) |Training time=0.50s (22.93%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4568|ppo_ep: 1|act_loss: -0.068603515625|cri_loss: -0.032928466796875|unsuper_loss: 0.0 +average reward score: 4.90625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.60%) |Training time=0.50s (22.81%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.46 +[2023-04-14 11:36:02,033] [INFO] [logging.py:96:log_dist] [Rank 0] step=4570, skipped=59, lr=[4.2117904261267626e-06, 4.2117904261267626e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:36:02,051] [INFO] [timer.py:199:stop] epoch=0/micro_step=4570/global_step=4570, RunningAvgSamplesPerSec=105.08257705936522, CurrSamplesPerSec=98.051808641592, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:36:02,144] [INFO] [logging.py:96:log_dist] [Rank 0] step=4570, skipped=74, lr=[2.1966004461560667e-06, 2.1966004461560667e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4569|ppo_ep: 1|act_loss: -0.0516357421875|cri_loss: -0.0239410400390625|unsuper_loss: 0.0 +average reward score: 4.5859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.89%) |Training time=0.49s (22.52%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4570|ppo_ep: 1|act_loss: 0.005802154541015625|cri_loss: 0.0033092498779296875|unsuper_loss: 0.0 +average reward score: 6.0078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.93%) |Training time=0.49s (22.46%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4571|ppo_ep: 1|act_loss: 0.0352783203125|cri_loss: 0.0202178955078125|unsuper_loss: 0.0 +average reward score: 4.08984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.21%) |Training time=0.48s (22.15%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4572|ppo_ep: 1|act_loss: 0.0212554931640625|cri_loss: 0.01184844970703125|unsuper_loss: 0.0 +average reward score: 4.58203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.10%) |Training time=0.48s (22.30%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4573|ppo_ep: 1|act_loss: 0.04534912109375|cri_loss: 0.0251312255859375|unsuper_loss: 0.0 +average reward score: 4.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.18%) |Training time=0.48s (22.20%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4574|ppo_ep: 1|act_loss: -0.0034160614013671875|cri_loss: -0.0014438629150390625|unsuper_loss: 0.0 +average reward score: 6.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.48s (22.35%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4575|ppo_ep: 1|act_loss: -0.0118560791015625|cri_loss: -0.002506256103515625|unsuper_loss: 0.0 +average reward score: 4.6484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.02%) |Training time=0.49s (22.38%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4576|ppo_ep: 1|act_loss: -0.0187225341796875|cri_loss: -0.007053375244140625|unsuper_loss: 0.0 +average reward score: 5.078125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.75%) |Training time=0.48s (22.23%) |Others=0.11 (5.02%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4577|ppo_ep: 1|act_loss: 0.0162353515625|cri_loss: 0.0099639892578125|unsuper_loss: 0.0 +average reward score: 4.25 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.14%) |Training time=0.48s (22.24%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4578|ppo_ep: 1|act_loss: -0.002105712890625|cri_loss: -0.0005311965942382812|unsuper_loss: 0.0 +average reward score: 5.73046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.26%) |Training time=0.48s (22.13%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +[2023-04-14 11:36:23,920] [INFO] [logging.py:96:log_dist] [Rank 0] step=4580, skipped=59, lr=[4.193369440804912e-06, 4.193369440804912e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:36:23,938] [INFO] [timer.py:199:stop] epoch=0/micro_step=4580/global_step=4580, RunningAvgSamplesPerSec=105.06683239626007, CurrSamplesPerSec=86.84876269801265, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:36:24,031] [INFO] [logging.py:96:log_dist] [Rank 0] step=4580, skipped=74, lr=[2.1870488830700263e-06, 2.1870488830700263e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4579|ppo_ep: 1|act_loss: 0.02239990234375|cri_loss: 0.011810302734375|unsuper_loss: 0.0 +average reward score: 5.25390625 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.72s (72.70%) |Training time=0.54s (23.00%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.55 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4580|ppo_ep: 1|act_loss: -0.0028533935546875|cri_loss: -0.0010128021240234375|unsuper_loss: 0.0 +average reward score: 5.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.18%) |Training time=0.48s (22.18%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4581|ppo_ep: 1|act_loss: -0.03582763671875|cri_loss: -0.017059326171875|unsuper_loss: 0.0 +average reward score: 4.87109375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (21.94%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4582|ppo_ep: 1|act_loss: -0.0109100341796875|cri_loss: -0.0051116943359375|unsuper_loss: 0.0 +average reward score: 4.60546875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.93%) |Training time=0.49s (22.44%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4583|ppo_ep: 1|act_loss: -0.0345458984375|cri_loss: -0.015655517578125|unsuper_loss: 0.0 +average reward score: 5.1875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.24%) |Training time=0.46s (21.11%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4584|ppo_ep: 1|act_loss: 0.0039215087890625|cri_loss: 0.002899169921875|unsuper_loss: 0.0 +average reward score: 5.97265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.88%) |Training time=0.47s (21.47%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4585|ppo_ep: 1|act_loss: 0.016510009765625|cri_loss: 0.008544921875|unsuper_loss: 0.0 +average reward score: 5.85546875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.28%) |Training time=0.46s (21.07%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4586|ppo_ep: 1|act_loss: 0.052734375|cri_loss: 0.0289154052734375|unsuper_loss: 0.0 +average reward score: 5.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.90%) |Training time=0.47s (20.66%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.16 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4587|ppo_ep: 1|act_loss: 0.012481689453125|cri_loss: 0.006565093994140625|unsuper_loss: 0.0 +average reward score: 5.6953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.69%) |Training time=0.47s (21.69%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4588|ppo_ep: 1|act_loss: 0.00595855712890625|cri_loss: 0.00421905517578125|unsuper_loss: 0.0 +average reward score: 5.51953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.78%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +[2023-04-14 11:36:45,723] [INFO] [logging.py:96:log_dist] [Rank 0] step=4590, skipped=59, lr=[4.174957817771677e-06, 4.174957817771677e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:36:45,741] [INFO] [timer.py:199:stop] epoch=0/micro_step=4590/global_step=4590, RunningAvgSamplesPerSec=105.06351756370773, CurrSamplesPerSec=102.14213537144157, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:36:45,834] [INFO] [logging.py:96:log_dist] [Rank 0] step=4590, skipped=74, lr=[2.1775019586744924e-06, 2.1775019586744924e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4589|ppo_ep: 1|act_loss: -0.00743865966796875|cri_loss: -0.003204345703125|unsuper_loss: 0.0 +average reward score: 4.12890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (21.98%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4590|ppo_ep: 1|act_loss: 0.003932952880859375|cri_loss: 0.0020923614501953125|unsuper_loss: 0.0 +average reward score: 5.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.87%) |Training time=0.47s (21.52%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4591|ppo_ep: 1|act_loss: -0.0245361328125|cri_loss: -0.01166534423828125|unsuper_loss: 0.0 +average reward score: 6.515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.87%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4592|ppo_ep: 1|act_loss: -0.0102691650390625|cri_loss: -0.0049285888671875|unsuper_loss: 0.0 +average reward score: 4.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (21.97%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4593|ppo_ep: 1|act_loss: -0.0169525146484375|cri_loss: -0.0081939697265625|unsuper_loss: 0.0 +average reward score: 5.5 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.67%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4594|ppo_ep: 1|act_loss: -0.0111541748046875|cri_loss: -0.0048980712890625|unsuper_loss: 0.0 +average reward score: 5.796875 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.48%) |Training time=0.47s (20.22%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4595|ppo_ep: 1|act_loss: -0.0110931396484375|cri_loss: -0.00519561767578125|unsuper_loss: 0.0 +average reward score: 5.21875 +------------------------------------------------------------------------------------- +|E2E latency=2.45s |Gather latency=0.00s (0.00%) |Generate time=1.59s (65.04%) |Training time=0.47s (19.26%) |Others=0.38 (15.71%)|CurSamplesPerSec=13.08 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4596|ppo_ep: 1|act_loss: 0.0110931396484375|cri_loss: 0.005840301513671875|unsuper_loss: 0.0 +average reward score: 6.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.07s |Gather latency=0.00s (0.00%) |Generate time=1.59s (76.53%) |Training time=0.39s (18.65%) |Others=0.10 (4.82%)|CurSamplesPerSec=15.44 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4597|ppo_ep: 1|act_loss: 0.009765625|cri_loss: 0.005664825439453125|unsuper_loss: 0.0 +average reward score: 5.7421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.62%) |Training time=0.47s (21.73%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4598|ppo_ep: 1|act_loss: 0.00322723388671875|cri_loss: 0.0019207000732421875|unsuper_loss: 0.0 +average reward score: 5.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.90%) |Training time=0.47s (21.47%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46 +[2023-04-14 11:37:07,765] [INFO] [logging.py:96:log_dist] [Rank 0] step=4600, skipped=59, lr=[4.156555829931723e-06, 4.156555829931723e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:37:07,783] [INFO] [timer.py:199:stop] epoch=0/micro_step=4600/global_step=4600, RunningAvgSamplesPerSec=105.06512917590997, CurrSamplesPerSec=95.71679557963769, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:37:07,876] [INFO] [logging.py:96:log_dist] [Rank 0] step=4600, skipped=74, lr=[2.167959814477915e-06, 2.167959814477915e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4599|ppo_ep: 1|act_loss: 0.0211944580078125|cri_loss: 0.011505126953125|unsuper_loss: 0.0 +average reward score: 5.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.69%) |Training time=0.50s (22.75%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4600|ppo_ep: 1|act_loss: 0.0211334228515625|cri_loss: 0.010833740234375|unsuper_loss: 0.0 +average reward score: 5.8984375 +------------------------------------------------------------------------------------- +|E2E latency=2.96s |Gather latency=0.00s (0.00%) |Generate time=1.58s (53.31%) |Training time=0.50s (16.89%) |Others=0.88 (29.80%)|CurSamplesPerSec=10.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4601|ppo_ep: 1|act_loss: -0.0022735595703125|cri_loss: -0.0004930496215820312|unsuper_loss: 0.0 +average reward score: 5.046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.67%) |Training time=0.49s (22.71%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4602|ppo_ep: 1|act_loss: 0.00644683837890625|cri_loss: 0.0035648345947265625|unsuper_loss: 0.0 +average reward score: 5.68359375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.65%) |Training time=0.49s (22.73%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4603|ppo_ep: 1|act_loss: 0.0165252685546875|cri_loss: 0.0088958740234375|unsuper_loss: 0.0 +average reward score: 5.25390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.97%) |Training time=0.49s (22.35%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4604|ppo_ep: 1|act_loss: -0.01346588134765625|cri_loss: -0.00623321533203125|unsuper_loss: 0.0 +average reward score: 5.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.75%) |Training time=0.49s (22.65%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4605|ppo_ep: 1|act_loss: 0.021270751953125|cri_loss: 0.01096343994140625|unsuper_loss: 0.0 +average reward score: 5.01171875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.56%) |Training time=0.50s (22.83%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4606|ppo_ep: 1|act_loss: -0.000988006591796875|cri_loss: -0.00018930435180664062|unsuper_loss: 0.0 +average reward score: 4.8046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.19%) |Training time=0.48s (22.17%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4607|ppo_ep: 1|act_loss: -0.0102691650390625|cri_loss: -0.0048065185546875|unsuper_loss: 0.0 +average reward score: 6.5546875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.78%) |Training time=0.49s (22.63%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4608|ppo_ep: 1|act_loss: -0.01401519775390625|cri_loss: -0.00673675537109375|unsuper_loss: 0.0 +average reward score: 5.94921875 +------------------------------------------------------------------------------------- +|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.58s (69.12%) |Training time=0.52s (22.89%) |Others=0.18 (7.99%)|CurSamplesPerSec=13.99 |AvgSamplesPerSec=14.46 +[2023-04-14 11:37:30,415] [INFO] [logging.py:96:log_dist] [Rank 0] step=4610, skipped=59, lr=[4.138163750046903e-06, 4.138163750046903e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:37:30,433] [INFO] [timer.py:199:stop] epoch=0/micro_step=4610/global_step=4610, RunningAvgSamplesPerSec=105.04432744786926, CurrSamplesPerSec=99.1981883570604, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:37:30,526] [INFO] [logging.py:96:log_dist] [Rank 0] step=4610, skipped=74, lr=[2.158422591917891e-06, 2.158422591917891e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4609|ppo_ep: 1|act_loss: -0.00725555419921875|cri_loss: -0.0033588409423828125|unsuper_loss: 0.0 +average reward score: 5.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.07%) |Training time=0.49s (22.31%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4610|ppo_ep: 1|act_loss: -0.04510498046875|cri_loss: -0.021087646484375|unsuper_loss: 0.0 +average reward score: 5.828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.76%) |Training time=0.49s (22.64%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4611|ppo_ep: 1|act_loss: 0.01213836669921875|cri_loss: 0.0068511962890625|unsuper_loss: 0.0 +average reward score: 6.0078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.93%) |Training time=0.49s (22.43%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4612|ppo_ep: 1|act_loss: 0.027618408203125|cri_loss: 0.0142364501953125|unsuper_loss: 0.0 +average reward score: 4.88671875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.07%) |Training time=0.49s (22.30%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4613|ppo_ep: 1|act_loss: 0.021514892578125|cri_loss: 0.01171875|unsuper_loss: 0.0 +average reward score: 5.97265625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.97%) |Training time=0.49s (22.42%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46 +[2023-04-14 11:37:41,322] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 4614|ppo_ep: 1|act_loss: 0.03173828125|cri_loss: 0.0161895751953125|unsuper_loss: 0.0 +average reward score: 5.9921875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.21%) |Training time=0.46s (20.90%) |Others=0.11 (4.88%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.46 +[2023-04-14 11:37:43,569] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 4615|ppo_ep: 1|act_loss: -0.00678253173828125|cri_loss: -0.0028896331787109375|unsuper_loss: 0.0 +average reward score: 4.5859375 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.68s (75.08%) |Training time=0.46s (20.46%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.29 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4616|ppo_ep: 1|act_loss: -0.0020618438720703125|cri_loss: -0.0008721351623535156|unsuper_loss: 0.0 +average reward score: 5.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.32%) |Training time=0.48s (22.07%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4617|ppo_ep: 1|act_loss: 0.01251220703125|cri_loss: 0.006763458251953125|unsuper_loss: 0.0 +average reward score: 5.57421875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.78%) |Training time=0.49s (22.64%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4618|ppo_ep: 1|act_loss: 0.0031261444091796875|cri_loss: 0.0018663406372070312|unsuper_loss: 0.0 +average reward score: 4.828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (21.94%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +[2023-04-14 11:37:52,245] [INFO] [logging.py:96:log_dist] [Rank 0] step=4620, skipped=61, lr=[4.123457403069056e-06, 4.123457403069056e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:37:52,264] [INFO] [timer.py:199:stop] epoch=0/micro_step=4620/global_step=4620, RunningAvgSamplesPerSec=105.03470887489762, CurrSamplesPerSec=100.67064495708186, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:37:52,356] [INFO] [logging.py:96:log_dist] [Rank 0] step=4620, skipped=74, lr=[2.1488904323590644e-06, 2.1488904323590644e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4619|ppo_ep: 1|act_loss: -0.00931549072265625|cri_loss: -0.00424957275390625|unsuper_loss: 0.0 +average reward score: 5.91796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.28%) |Training time=0.48s (22.14%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4620|ppo_ep: 1|act_loss: -0.026458740234375|cri_loss: -0.011993408203125|unsuper_loss: 0.0 +average reward score: 5.3515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.18%) |Training time=0.48s (22.21%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4621|ppo_ep: 1|act_loss: -0.027191162109375|cri_loss: -0.01263427734375|unsuper_loss: 0.0 +average reward score: 5.39453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.48s (22.38%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4622|ppo_ep: 1|act_loss: 0.015960693359375|cri_loss: 0.00853729248046875|unsuper_loss: 0.0 +average reward score: 5.46484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.68%) |Training time=0.49s (22.72%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4623|ppo_ep: 1|act_loss: 0.032012939453125|cri_loss: 0.0174407958984375|unsuper_loss: 0.0 +average reward score: 6.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.48s (22.37%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4624|ppo_ep: 1|act_loss: 0.031524658203125|cri_loss: 0.0169219970703125|unsuper_loss: 0.0 +average reward score: 3.92578125 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.88%) |Training time=0.49s (20.82%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4625|ppo_ep: 1|act_loss: 0.0192718505859375|cri_loss: 0.0108184814453125|unsuper_loss: 0.0 +average reward score: 6.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.08%) |Training time=0.48s (22.30%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4626|ppo_ep: 1|act_loss: 0.014068603515625|cri_loss: 0.0085296630859375|unsuper_loss: 0.0 +average reward score: 6.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.86%) |Training time=0.49s (22.52%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4627|ppo_ep: 1|act_loss: 0.0254974365234375|cri_loss: 0.01367950439453125|unsuper_loss: 0.0 +average reward score: 5.01953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.86%) |Training time=0.49s (22.53%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4628|ppo_ep: 1|act_loss: 0.06097412109375|cri_loss: 0.032196044921875|unsuper_loss: 0.0 +average reward score: 6.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.01%) |Training time=0.46s (21.36%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46 +[2023-04-14 11:38:14,122] [INFO] [logging.py:96:log_dist] [Rank 0] step=4630, skipped=61, lr=[4.1050838443919875e-06, 4.1050838443919875e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:38:14,140] [INFO] [timer.py:199:stop] epoch=0/micro_step=4630/global_step=4630, RunningAvgSamplesPerSec=105.02180375590667, CurrSamplesPerSec=99.11819206302553, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:38:14,233] [INFO] [logging.py:96:log_dist] [Rank 0] step=4630, skipped=74, lr=[2.1393634770910363e-06, 2.1393634770910363e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4629|ppo_ep: 1|act_loss: 0.0362548828125|cri_loss: 0.0199737548828125|unsuper_loss: 0.0 +average reward score: 4.56640625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.10%) |Training time=0.49s (22.32%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4630|ppo_ep: 1|act_loss: 0.004764556884765625|cri_loss: 0.003452301025390625|unsuper_loss: 0.0 +average reward score: 4.5546875 +------------------------------------------------------------------------------------- +|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.59s (75.43%) |Training time=0.42s (19.79%) |Others=0.10 (4.77%)|CurSamplesPerSec=15.19 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4631|ppo_ep: 1|act_loss: 0.002178192138671875|cri_loss: 0.0012903213500976562|unsuper_loss: 0.0 +average reward score: 5.15234375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.31%) |Training time=0.48s (22.08%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4632|ppo_ep: 1|act_loss: -0.0230865478515625|cri_loss: -0.01128387451171875|unsuper_loss: 0.0 +average reward score: 5.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.14%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4633|ppo_ep: 1|act_loss: -0.037933349609375|cri_loss: -0.0178375244140625|unsuper_loss: 0.0 +average reward score: 4.98828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (21.97%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4634|ppo_ep: 1|act_loss: 0.00516510009765625|cri_loss: 0.0031719207763671875|unsuper_loss: 0.0 +average reward score: 4.69140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.52%) |Training time=0.47s (21.85%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4635|ppo_ep: 1|act_loss: -0.004116058349609375|cri_loss: -0.0011539459228515625|unsuper_loss: 0.0 +average reward score: 5.58984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (21.97%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4636|ppo_ep: 1|act_loss: -0.0214996337890625|cri_loss: -0.01045989990234375|unsuper_loss: 0.0 +average reward score: 5.44140625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.19%) |Training time=0.48s (22.20%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4637|ppo_ep: 1|act_loss: -0.0006070137023925781|cri_loss: -5.626678466796875e-05|unsuper_loss: 0.0 +average reward score: 5.19921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.84%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4638|ppo_ep: 1|act_loss: -0.007564544677734375|cri_loss: -0.00339508056640625|unsuper_loss: 0.0 +average reward score: 6.296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.34%) |Training time=0.48s (22.05%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +[2023-04-14 11:38:36,000] [INFO] [logging.py:96:log_dist] [Rank 0] step=4640, skipped=61, lr=[4.086720956609049e-06, 4.086720956609049e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:38:36,018] [INFO] [timer.py:199:stop] epoch=0/micro_step=4640/global_step=4640, RunningAvgSamplesPerSec=105.01773699493148, CurrSamplesPerSec=98.99807414433666, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:38:36,111] [INFO] [logging.py:96:log_dist] [Rank 0] step=4640, skipped=74, lr=[2.1298418673262655e-06, 2.1298418673262655e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4639|ppo_ep: 1|act_loss: 0.024658203125|cri_loss: 0.01282501220703125|unsuper_loss: 0.0 +average reward score: 4.68359375 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.98%) |Training time=0.49s (20.77%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.46 +[2023-04-14 11:38:38,285] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 4640|ppo_ep: 1|act_loss: 0.0004324913024902344|cri_loss: 0.0012264251708984375|unsuper_loss: 0.0 +average reward score: 4.83203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.49s (22.46%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +[2023-04-14 11:38:40,445] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 4641|ppo_ep: 1|act_loss: 0.0021190643310546875|cri_loss: 0.0013751983642578125|unsuper_loss: 0.0 +average reward score: 5.32421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.70%) |Training time=0.48s (22.16%) |Others=0.09 (4.14%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4642|ppo_ep: 1|act_loss: 0.04876708984375|cri_loss: 0.0262603759765625|unsuper_loss: 0.0 +average reward score: 5.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.21%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4643|ppo_ep: 1|act_loss: 0.0234375|cri_loss: 0.011993408203125|unsuper_loss: 0.0 +average reward score: 6.7265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.67%) |Training time=0.43s (19.68%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4644|ppo_ep: 1|act_loss: -0.04034423828125|cri_loss: -0.018829345703125|unsuper_loss: 0.0 +average reward score: 6.83203125 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.79%) |Training time=0.47s (20.79%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.12 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4645|ppo_ep: 1|act_loss: -0.008087158203125|cri_loss: -0.0038051605224609375|unsuper_loss: 0.0 +average reward score: 4.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.84%) |Training time=0.47s (21.57%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4646|ppo_ep: 1|act_loss: 0.0191802978515625|cri_loss: 0.009796142578125|unsuper_loss: 0.0 +average reward score: 5.59375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.15%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4647|ppo_ep: 1|act_loss: 0.001789093017578125|cri_loss: 0.0009784698486328125|unsuper_loss: 0.0 +average reward score: 5.765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.75%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4648|ppo_ep: 1|act_loss: 0.019317626953125|cri_loss: 0.010040283203125|unsuper_loss: 0.0 +average reward score: 5.015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.47s (21.93%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +[2023-04-14 11:38:57,764] [INFO] [logging.py:96:log_dist] [Rank 0] step=4650, skipped=61, lr=[4.068369011902537e-06, 4.068369011902537e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:38:57,782] [INFO] [timer.py:199:stop] epoch=0/micro_step=4650/global_step=4650, RunningAvgSamplesPerSec=105.01619755698859, CurrSamplesPerSec=101.73921361882245, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:38:57,875] [INFO] [logging.py:96:log_dist] [Rank 0] step=4650, skipped=76, lr=[2.122228523121039e-06, 2.122228523121039e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4649|ppo_ep: 1|act_loss: 0.0022907257080078125|cri_loss: 0.00200653076171875|unsuper_loss: 0.0 +average reward score: 5.609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (22.02%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4650|ppo_ep: 1|act_loss: -0.032958984375|cri_loss: -0.0160064697265625|unsuper_loss: 0.0 +average reward score: 5.38671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.75%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4651|ppo_ep: 1|act_loss: -0.0117034912109375|cri_loss: -0.00518798828125|unsuper_loss: 0.0 +average reward score: 5.9296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.89%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4652|ppo_ep: 1|act_loss: 0.003528594970703125|cri_loss: 0.002811431884765625|unsuper_loss: 0.0 +average reward score: 5.703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.33%) |Training time=0.48s (22.04%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4653|ppo_ep: 1|act_loss: -0.0008487701416015625|cri_loss: -0.0002982616424560547|unsuper_loss: 0.0 +average reward score: 5.48046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.81%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4654|ppo_ep: 1|act_loss: 0.020538330078125|cri_loss: 0.010650634765625|unsuper_loss: 0.0 +average reward score: 4.80078125 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.59s (69.68%) |Training time=0.59s (25.94%) |Others=0.10 (4.38%)|CurSamplesPerSec=14.03 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4655|ppo_ep: 1|act_loss: 0.006198883056640625|cri_loss: 0.003208160400390625|unsuper_loss: 0.0 +average reward score: 5.26171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.51%) |Training time=0.47s (21.86%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4656|ppo_ep: 1|act_loss: 0.007274627685546875|cri_loss: 0.003936767578125|unsuper_loss: 0.0 +average reward score: 4.94921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.13%) |Training time=0.46s (21.21%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4657|ppo_ep: 1|act_loss: 0.002826690673828125|cri_loss: 0.0015611648559570312|unsuper_loss: 0.0 +average reward score: 5.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.60s (67.47%) |Training time=0.47s (19.75%) |Others=0.30 (12.77%)|CurSamplesPerSec=13.54 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4658|ppo_ep: 1|act_loss: 0.003002166748046875|cri_loss: 0.0018138885498046875|unsuper_loss: 0.0 +average reward score: 5.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.78%) |Training time=0.44s (20.49%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46 +[2023-04-14 11:39:19,693] [INFO] [logging.py:96:log_dist] [Rank 0] step=4660, skipped=61, lr=[4.050028282292539e-06, 4.050028282292539e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:39:19,711] [INFO] [timer.py:199:stop] epoch=0/micro_step=4660/global_step=4660, RunningAvgSamplesPerSec=105.00860834725098, CurrSamplesPerSec=103.96174217485263, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:39:19,804] [INFO] [logging.py:96:log_dist] [Rank 0] step=4660, skipped=76, lr=[2.112716890863282e-06, 2.112716890863282e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4659|ppo_ep: 1|act_loss: 0.00225830078125|cri_loss: 0.001560211181640625|unsuper_loss: 0.0 +average reward score: 5.0859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.66%) |Training time=0.47s (21.72%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4660|ppo_ep: 1|act_loss: 0.019073486328125|cri_loss: 0.010650634765625|unsuper_loss: 0.0 +average reward score: 6.0 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.56%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4661|ppo_ep: 1|act_loss: -0.0088043212890625|cri_loss: -0.004238128662109375|unsuper_loss: 0.0 +average reward score: 5.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.73%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4662|ppo_ep: 1|act_loss: -0.019439697265625|cri_loss: -0.00907135009765625|unsuper_loss: 0.0 +average reward score: 5.453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.73%) |Training time=0.46s (21.46%) |Others=0.10 (4.81%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4663|ppo_ep: 1|act_loss: 0.0100860595703125|cri_loss: 0.0054473876953125|unsuper_loss: 0.0 +average reward score: 5.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.66%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4664|ppo_ep: 1|act_loss: -0.0088348388671875|cri_loss: -0.00360870361328125|unsuper_loss: 0.0 +average reward score: 4.9765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.69%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4665|ppo_ep: 1|act_loss: -0.02349853515625|cri_loss: -0.01110076904296875|unsuper_loss: 0.0 +average reward score: 6.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.66%) |Training time=0.47s (21.70%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4666|ppo_ep: 1|act_loss: 0.011749267578125|cri_loss: 0.0064697265625|unsuper_loss: 0.0 +average reward score: 5.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.54%) |Training time=0.47s (21.82%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4667|ppo_ep: 1|act_loss: 0.03271484375|cri_loss: 0.0168304443359375|unsuper_loss: 0.0 +average reward score: 4.91796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.76%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4668|ppo_ep: 1|act_loss: 0.068115234375|cri_loss: 0.036285400390625|unsuper_loss: 0.0 +average reward score: 5.5859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.45%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46 +[2023-04-14 11:39:41,490] [INFO] [logging.py:96:log_dist] [Rank 0] step=4670, skipped=61, lr=[4.031699039632916e-06, 4.031699039632916e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:39:41,509] [INFO] [timer.py:199:stop] epoch=0/micro_step=4670/global_step=4670, RunningAvgSamplesPerSec=105.0078285540475, CurrSamplesPerSec=105.11653158130927, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:39:41,603] [INFO] [logging.py:96:log_dist] [Rank 0] step=4670, skipped=76, lr=[2.1032109990754734e-06, 2.1032109990754734e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4669|ppo_ep: 1|act_loss: 0.0181427001953125|cri_loss: 0.00955963134765625|unsuper_loss: 0.0 +average reward score: 4.52734375 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.52%) |Training time=0.47s (20.09%) |Others=0.10 (4.39%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4670|ppo_ep: 1|act_loss: 0.024658203125|cri_loss: 0.0130615234375|unsuper_loss: 0.0 +average reward score: 6.765625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.32%) |Training time=0.48s (22.07%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4671|ppo_ep: 1|act_loss: -0.014862060546875|cri_loss: -0.006984710693359375|unsuper_loss: 0.0 +average reward score: 5.3203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.48s (21.93%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4672|ppo_ep: 1|act_loss: -0.0279693603515625|cri_loss: -0.01361083984375|unsuper_loss: 0.0 +average reward score: 5.20703125 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.96%) |Training time=0.48s (21.79%) |Others=0.12 (5.25%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4673|ppo_ep: 1|act_loss: -0.02764892578125|cri_loss: -0.01336669921875|unsuper_loss: 0.0 +average reward score: 4.77734375 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.64s (72.68%) |Training time=0.52s (22.83%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.18 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4674|ppo_ep: 1|act_loss: -0.03143310546875|cri_loss: -0.0149383544921875|unsuper_loss: 0.0 +average reward score: 5.03515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.69%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4675|ppo_ep: 1|act_loss: -0.027587890625|cri_loss: -0.01340484619140625|unsuper_loss: 0.0 +average reward score: 5.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.28%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4676|ppo_ep: 1|act_loss: -0.04046630859375|cri_loss: -0.01983642578125|unsuper_loss: 0.0 +average reward score: 4.7109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.84%) |Training time=0.46s (21.53%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4677|ppo_ep: 1|act_loss: -0.0153961181640625|cri_loss: -0.007396697998046875|unsuper_loss: 0.0 +average reward score: 5.234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.49%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4678|ppo_ep: 1|act_loss: -0.02301025390625|cri_loss: -0.0111541748046875|unsuper_loss: 0.0 +average reward score: 6.125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.46s (21.59%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46 +[2023-04-14 11:40:03,192] [INFO] [logging.py:96:log_dist] [Rank 0] step=4680, skipped=61, lr=[4.01338155560726e-06, 4.01338155560726e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:40:03,210] [INFO] [timer.py:199:stop] epoch=0/micro_step=4680/global_step=4680, RunningAvgSamplesPerSec=105.00428085351663, CurrSamplesPerSec=107.38758437052944, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:40:03,303] [INFO] [logging.py:96:log_dist] [Rank 0] step=4680, skipped=76, lr=[2.093710988657859e-06, 2.093710988657859e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4679|ppo_ep: 1|act_loss: -0.0083160400390625|cri_loss: -0.003635406494140625|unsuper_loss: 0.0 +average reward score: 6.09375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.42%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4680|ppo_ep: 1|act_loss: -0.017486572265625|cri_loss: -0.00780487060546875|unsuper_loss: 0.0 +average reward score: 6.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.97%) |Training time=0.46s (21.37%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4681|ppo_ep: 1|act_loss: 0.015594482421875|cri_loss: 0.00809478759765625|unsuper_loss: 0.0 +average reward score: 5.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.94%) |Training time=0.46s (21.41%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4682|ppo_ep: 1|act_loss: 0.006862640380859375|cri_loss: 0.003787994384765625|unsuper_loss: 0.0 +average reward score: 4.71875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.07%) |Training time=0.46s (21.27%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4683|ppo_ep: 1|act_loss: 0.016693115234375|cri_loss: 0.00894927978515625|unsuper_loss: 0.0 +average reward score: 4.73046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.91%) |Training time=0.46s (21.44%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4684|ppo_ep: 1|act_loss: 0.0419921875|cri_loss: 0.021636962890625|unsuper_loss: 0.0 +average reward score: 5.06640625 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.20%) |Training time=0.48s (20.51%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4685|ppo_ep: 1|act_loss: 0.02496337890625|cri_loss: 0.01329803466796875|unsuper_loss: 0.0 +average reward score: 5.35546875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.23%) |Training time=0.45s (21.10%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4686|ppo_ep: 1|act_loss: -0.03363037109375|cri_loss: -0.01654052734375|unsuper_loss: 0.0 +average reward score: 5.23828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.99%) |Training time=0.46s (21.37%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4687|ppo_ep: 1|act_loss: -0.0013904571533203125|cri_loss: -0.0002593994140625|unsuper_loss: 0.0 +average reward score: 5.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.61s (70.13%) |Training time=0.46s (20.03%) |Others=0.23 (9.84%)|CurSamplesPerSec=13.97 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4688|ppo_ep: 1|act_loss: -0.010955810546875|cri_loss: -0.0045623779296875|unsuper_loss: 0.0 +average reward score: 6.203125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.19%) |Training time=0.48s (22.17%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.46 +[2023-04-14 11:40:25,038] [INFO] [logging.py:96:log_dist] [Rank 0] step=4690, skipped=61, lr=[3.995076101724873e-06, 3.995076101724873e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:40:25,057] [INFO] [timer.py:199:stop] epoch=0/micro_step=4690/global_step=4690, RunningAvgSamplesPerSec=105.00716011355068, CurrSamplesPerSec=106.80988441863944, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:40:25,149] [INFO] [logging.py:96:log_dist] [Rank 0] step=4690, skipped=76, lr=[2.084217000423512e-06, 2.084217000423512e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4689|ppo_ep: 1|act_loss: -0.0246734619140625|cri_loss: -0.01197052001953125|unsuper_loss: 0.0 +average reward score: 6.93359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.89%) |Training time=0.46s (21.49%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4690|ppo_ep: 1|act_loss: -0.03106689453125|cri_loss: -0.014862060546875|unsuper_loss: 0.0 +average reward score: 5.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.22%) |Training time=0.45s (21.10%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4691|ppo_ep: 1|act_loss: 0.003841400146484375|cri_loss: 0.002197265625|unsuper_loss: 0.0 +average reward score: 6.0078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.97%) |Training time=0.46s (21.39%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4692|ppo_ep: 1|act_loss: 0.023040771484375|cri_loss: 0.01206207275390625|unsuper_loss: 0.0 +average reward score: 6.5234375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.31%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4693|ppo_ep: 1|act_loss: 0.033447265625|cri_loss: 0.0173492431640625|unsuper_loss: 0.0 +average reward score: 5.86328125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.31%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4694|ppo_ep: 1|act_loss: 0.0143585205078125|cri_loss: 0.0074462890625|unsuper_loss: 0.0 +average reward score: 5.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.29%) |Training time=0.45s (21.02%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4695|ppo_ep: 1|act_loss: 0.026763916015625|cri_loss: 0.01410675048828125|unsuper_loss: 0.0 +average reward score: 4.625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.05%) |Training time=0.46s (21.30%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4696|ppo_ep: 1|act_loss: -0.000705718994140625|cri_loss: -0.0001049041748046875|unsuper_loss: 0.0 +average reward score: 5.35546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.46s (21.40%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4697|ppo_ep: 1|act_loss: -0.001068115234375|cri_loss: -0.0003914833068847656|unsuper_loss: 0.0 +average reward score: 5.453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.12%) |Training time=0.46s (21.21%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4698|ppo_ep: 1|act_loss: 0.015380859375|cri_loss: 0.00798797607421875|unsuper_loss: 0.0 +average reward score: 4.73828125 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.99%) |Training time=0.46s (20.87%) |Others=0.16 (7.14%)|CurSamplesPerSec=14.49 |AvgSamplesPerSec=14.46 +[2023-04-14 11:40:46,698] [INFO] [logging.py:96:log_dist] [Rank 0] step=4700, skipped=61, lr=[3.97678294931674e-06, 3.97678294931674e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:40:46,716] [INFO] [timer.py:199:stop] epoch=0/micro_step=4700/global_step=4700, RunningAvgSamplesPerSec=105.01293390159111, CurrSamplesPerSec=100.83158391355171, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:40:46,810] [INFO] [logging.py:96:log_dist] [Rank 0] step=4700, skipped=76, lr=[2.07472917509624e-06, 2.07472917509624e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4699|ppo_ep: 1|act_loss: -0.00714874267578125|cri_loss: -0.0033435821533203125|unsuper_loss: 0.0 +average reward score: 4.5546875 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.68s (74.13%) |Training time=0.48s (21.13%) |Others=0.11 (4.73%)|CurSamplesPerSec=14.09 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4700|ppo_ep: 1|act_loss: -0.027618408203125|cri_loss: -0.01334381103515625|unsuper_loss: 0.0 +average reward score: 5.33984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.58%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4701|ppo_ep: 1|act_loss: -0.0284271240234375|cri_loss: -0.0135498046875|unsuper_loss: 0.0 +average reward score: 5.75390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.46s (21.52%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4702|ppo_ep: 1|act_loss: 0.0157012939453125|cri_loss: 0.00835418701171875|unsuper_loss: 0.0 +average reward score: 5.9296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.70%) |Training time=0.47s (21.69%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4703|ppo_ep: 1|act_loss: 0.0203857421875|cri_loss: 0.010467529296875|unsuper_loss: 0.0 +average reward score: 5.60546875 +------------------------------------------------------------------------------------- +|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.69s (73.58%) |Training time=0.51s (22.04%) |Others=0.10 (4.38%)|CurSamplesPerSec=13.95 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4704|ppo_ep: 1|act_loss: -0.0079803466796875|cri_loss: -0.003173828125|unsuper_loss: 0.0 +average reward score: 7.0 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.87%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4705|ppo_ep: 1|act_loss: 0.034271240234375|cri_loss: 0.0179901123046875|unsuper_loss: 0.0 +average reward score: 5.671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.10%) |Training time=0.46s (21.24%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4706|ppo_ep: 1|act_loss: 0.0245819091796875|cri_loss: 0.01285552978515625|unsuper_loss: 0.0 +average reward score: 6.3359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.98%) |Training time=0.46s (21.37%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4707|ppo_ep: 1|act_loss: 0.0191192626953125|cri_loss: 0.010101318359375|unsuper_loss: 0.0 +average reward score: 6.9375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.45s (21.12%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4708|ppo_ep: 1|act_loss: -0.004428863525390625|cri_loss: -0.0017147064208984375|unsuper_loss: 0.0 +average reward score: 4.609375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.11%) |Training time=0.46s (21.23%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +[2023-04-14 11:41:08,383] [INFO] [logging.py:96:log_dist] [Rank 0] step=4710, skipped=61, lr=[3.9585023695315105e-06, 3.9585023695315105e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:41:08,402] [INFO] [timer.py:199:stop] epoch=0/micro_step=4710/global_step=4710, RunningAvgSamplesPerSec=105.01362133774731, CurrSamplesPerSec=109.07168687720736, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:41:08,494] [INFO] [logging.py:96:log_dist] [Rank 0] step=4710, skipped=76, lr=[2.0652476533085043e-06, 2.0652476533085043e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4709|ppo_ep: 1|act_loss: 0.0088348388671875|cri_loss: 0.00482940673828125|unsuper_loss: 0.0 +average reward score: 6.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.09%) |Training time=0.46s (21.25%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4710|ppo_ep: 1|act_loss: -0.0226593017578125|cri_loss: -0.0111083984375|unsuper_loss: 0.0 +average reward score: 5.72265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.46s (21.53%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4711|ppo_ep: 1|act_loss: -0.02349853515625|cri_loss: -0.01152801513671875|unsuper_loss: 0.0 +average reward score: 5.12109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.79%) |Training time=0.46s (21.55%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4712|ppo_ep: 1|act_loss: 0.0088348388671875|cri_loss: 0.005279541015625|unsuper_loss: 0.0 +average reward score: 6.04296875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.98%) |Training time=0.46s (21.36%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4713|ppo_ep: 1|act_loss: -0.007274627685546875|cri_loss: -0.00310516357421875|unsuper_loss: 0.0 +average reward score: 4.375 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.71%) |Training time=0.49s (20.99%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.66 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4714|ppo_ep: 1|act_loss: 0.004566192626953125|cri_loss: 0.002834320068359375|unsuper_loss: 0.0 +average reward score: 4.8984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.07%) |Training time=0.46s (21.24%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4715|ppo_ep: 1|act_loss: 0.0113983154296875|cri_loss: 0.005950927734375|unsuper_loss: 0.0 +average reward score: 5.3203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.91%) |Training time=0.46s (21.44%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46 +[2023-04-14 11:41:23,609] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 4716|ppo_ep: 1|act_loss: 0.078857421875|cri_loss: 0.044708251953125|unsuper_loss: 0.0 +average reward score: 5.390625 +------------------------------------------------------------------------------------- +|E2E latency=2.92s |Gather latency=0.00s (0.00%) |Generate time=1.59s (54.54%) |Training time=1.23s (42.03%) |Others=0.10 (3.43%)|CurSamplesPerSec=10.98 |AvgSamplesPerSec=14.46 +[2023-04-14 11:41:26,520] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 4717|ppo_ep: 1|act_loss: 0.02716064453125|cri_loss: 0.01409912109375|unsuper_loss: 0.0 +average reward score: 5.77734375 +------------------------------------------------------------------------------------- +|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.59s (75.21%) |Training time=0.42s (20.05%) |Others=0.10 (4.74%)|CurSamplesPerSec=15.11 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4718|ppo_ep: 1|act_loss: 0.00347137451171875|cri_loss: 0.0022602081298828125|unsuper_loss: 0.0 +average reward score: 5.9296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.47s (21.93%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +[2023-04-14 11:41:30,826] [INFO] [logging.py:96:log_dist] [Rank 0] step=4720, skipped=63, lr=[3.943887140084842e-06, 3.943887140084842e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:41:30,845] [INFO] [timer.py:199:stop] epoch=0/micro_step=4720/global_step=4720, RunningAvgSamplesPerSec=104.96272361672763, CurrSamplesPerSec=110.75861112853234, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:41:30,938] [INFO] [logging.py:96:log_dist] [Rank 0] step=4720, skipped=76, lr=[2.0557725755993283e-06, 2.0557725755993283e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4719|ppo_ep: 1|act_loss: -0.0146484375|cri_loss: -0.005100250244140625|unsuper_loss: 0.0 +average reward score: 5.58203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.46%) |Training time=0.45s (20.90%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4720|ppo_ep: 1|act_loss: 0.005779266357421875|cri_loss: 0.003086090087890625|unsuper_loss: 0.0 +average reward score: 5.26171875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.33%) |Training time=0.43s (19.98%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4721|ppo_ep: 1|act_loss: -0.016845703125|cri_loss: -0.00811004638671875|unsuper_loss: 0.0 +average reward score: 4.7265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.84%) |Training time=0.44s (20.49%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4722|ppo_ep: 1|act_loss: -0.03753662109375|cri_loss: -0.01436614990234375|unsuper_loss: 0.0 +average reward score: 5.58203125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.03%) |Training time=0.43s (20.29%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4723|ppo_ep: 1|act_loss: -0.008636474609375|cri_loss: -0.0040435791015625|unsuper_loss: 0.0 +average reward score: 5.83984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.61%) |Training time=0.45s (20.72%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4724|ppo_ep: 1|act_loss: 0.004852294921875|cri_loss: 0.002864837646484375|unsuper_loss: 0.0 +average reward score: 4.96875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.82%) |Training time=0.44s (20.51%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4725|ppo_ep: 1|act_loss: 0.0152740478515625|cri_loss: 0.008697509765625|unsuper_loss: 0.0 +average reward score: 4.87890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.87%) |Training time=0.44s (20.43%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4726|ppo_ep: 1|act_loss: -0.005199432373046875|cri_loss: -0.000682830810546875|unsuper_loss: 0.0 +average reward score: 5.92578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.68%) |Training time=0.44s (20.65%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4727|ppo_ep: 1|act_loss: 0.023590087890625|cri_loss: 0.0131072998046875|unsuper_loss: 0.0 +average reward score: 4.58984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.45s (20.78%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4728|ppo_ep: 1|act_loss: 0.003345489501953125|cri_loss: 0.0030059814453125|unsuper_loss: 0.0 +average reward score: 5.6484375 +------------------------------------------------------------------------------------- +|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.79s (75.61%) |Training time=0.48s (20.16%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.52 |AvgSamplesPerSec=14.46 +[2023-04-14 11:41:52,512] [INFO] [logging.py:96:log_dist] [Rank 0] step=4730, skipped=63, lr=[3.925629873718018e-06, 3.925629873718018e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:41:52,531] [INFO] [timer.py:199:stop] epoch=0/micro_step=4730/global_step=4730, RunningAvgSamplesPerSec=104.9805842763996, CurrSamplesPerSec=115.75283565613357, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:41:52,623] [INFO] [logging.py:96:log_dist] [Rank 0] step=4730, skipped=76, lr=[2.046304082412223e-06, 2.046304082412223e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4729|ppo_ep: 1|act_loss: 0.015625|cri_loss: 0.0095672607421875|unsuper_loss: 0.0 +average reward score: 7.2734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.89%) |Training time=0.44s (20.46%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4730|ppo_ep: 1|act_loss: 0.0770263671875|cri_loss: 0.041961669921875|unsuper_loss: 0.0 +average reward score: 6.16796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.61%) |Training time=0.45s (20.75%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4731|ppo_ep: 1|act_loss: -0.009735107421875|cri_loss: -0.003570556640625|unsuper_loss: 0.0 +average reward score: 4.68359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.58%) |Training time=0.45s (20.79%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4732|ppo_ep: 1|act_loss: -0.0060272216796875|cri_loss: -0.002765655517578125|unsuper_loss: 0.0 +average reward score: 5.0 +------------------------------------------------------------------------------------- +|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.64s (71.88%) |Training time=0.54s (23.74%) |Others=0.10 (4.38%)|CurSamplesPerSec=13.99 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4733|ppo_ep: 1|act_loss: -0.005260467529296875|cri_loss: -0.00251007080078125|unsuper_loss: 0.0 +average reward score: 6.58203125 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.28%) |Training time=0.49s (22.16%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4734|ppo_ep: 1|act_loss: 0.019622802734375|cri_loss: 0.0103607177734375|unsuper_loss: 0.0 +average reward score: 5.94140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.71%) |Training time=0.44s (20.64%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4735|ppo_ep: 1|act_loss: -0.00572967529296875|cri_loss: -0.002567291259765625|unsuper_loss: 0.0 +average reward score: 5.63671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.85%) |Training time=0.47s (21.48%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4736|ppo_ep: 1|act_loss: -0.02288818359375|cri_loss: -0.010772705078125|unsuper_loss: 0.0 +average reward score: 6.9375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.48%) |Training time=0.45s (20.87%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4737|ppo_ep: 1|act_loss: 0.0115203857421875|cri_loss: 0.006496429443359375|unsuper_loss: 0.0 +average reward score: 6.45703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.45%) |Training time=0.45s (20.90%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4738|ppo_ep: 1|act_loss: -0.00241851806640625|cri_loss: -0.0006160736083984375|unsuper_loss: 0.0 +average reward score: 6.13671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.46s (21.20%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.46 +[2023-04-14 11:42:14,294] [INFO] [logging.py:96:log_dist] [Rank 0] step=4740, skipped=63, lr=[3.907385938186079e-06, 3.907385938186079e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:42:14,312] [INFO] [timer.py:199:stop] epoch=0/micro_step=4740/global_step=4740, RunningAvgSamplesPerSec=104.98486849818003, CurrSamplesPerSec=111.83245012364935, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:42:14,405] [INFO] [logging.py:96:log_dist] [Rank 0] step=4740, skipped=76, lr=[2.0368423140930975e-06, 2.0368423140930975e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4739|ppo_ep: 1|act_loss: -0.01392364501953125|cri_loss: -0.006786346435546875|unsuper_loss: 0.0 +average reward score: 4.703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.57%) |Training time=0.45s (20.80%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.46 +[2023-04-14 11:42:16,448] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384 +epoch: 0|step: 4740|ppo_ep: 1|act_loss: -0.0205078125|cri_loss: -0.00921630859375|unsuper_loss: 0.0 +average reward score: 5.4296875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.29%) |Training time=0.43s (20.02%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4741|ppo_ep: 1|act_loss: -0.02386474609375|cri_loss: -0.01117706298828125|unsuper_loss: 0.0 +average reward score: 5.06640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.73%) |Training time=0.44s (20.62%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.47 +[2023-04-14 11:42:20,842] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 4742|ppo_ep: 1|act_loss: -0.0082855224609375|cri_loss: -0.004001617431640625|unsuper_loss: 0.0 +average reward score: 4.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.95%) |Training time=0.45s (20.86%) |Others=0.09 (4.19%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.47 +[2023-04-14 11:42:23,156] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 4743|ppo_ep: 1|act_loss: -0.0103607177734375|cri_loss: -0.0032501220703125|unsuper_loss: 0.0 +average reward score: 4.859375 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.93%) |Training time=0.44s (19.19%) |Others=0.09 (3.88%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.47 +[2023-04-14 11:42:25,190] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192 +epoch: 0|step: 4744|ppo_ep: 1|act_loss: -0.0399169921875|cri_loss: -0.0182647705078125|unsuper_loss: 0.0 +average reward score: 6.47265625 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.48%) |Training time=0.42s (19.83%) |Others=0.10 (4.69%)|CurSamplesPerSec=15.03 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4745|ppo_ep: 1|act_loss: -0.00701141357421875|cri_loss: -0.003337860107421875|unsuper_loss: 0.0 +average reward score: 5.31640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.59%) |Training time=0.45s (20.78%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4746|ppo_ep: 1|act_loss: 0.006175994873046875|cri_loss: 0.00450897216796875|unsuper_loss: 0.0 +average reward score: 6.01953125 +------------------------------------------------------------------------------------- +|E2E latency=2.86s |Gather latency=0.00s (0.00%) |Generate time=1.61s (56.27%) |Training time=0.44s (15.55%) |Others=0.81 (28.18%)|CurSamplesPerSec=11.20 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4747|ppo_ep: 1|act_loss: -0.00533294677734375|cri_loss: -0.002384185791015625|unsuper_loss: 0.0 +average reward score: 4.984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.99%) |Training time=0.44s (20.32%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4748|ppo_ep: 1|act_loss: 0.005939483642578125|cri_loss: 0.003276824951171875|unsuper_loss: 0.0 +average reward score: 5.9140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.11%) |Training time=0.46s (21.27%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.46 +[2023-04-14 11:42:36,660] [INFO] [logging.py:96:log_dist] [Rank 0] step=4750, skipped=65, lr=[3.892800569690045e-06, 3.892800569690045e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:42:36,679] [INFO] [timer.py:199:stop] epoch=0/micro_step=4750/global_step=4750, RunningAvgSamplesPerSec=105.00306105783682, CurrSamplesPerSec=112.30368528329925, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:42:36,771] [INFO] [logging.py:96:log_dist] [Rank 0] step=4750, skipped=78, lr=[2.0292778355916135e-06, 2.0292778355916135e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4749|ppo_ep: 1|act_loss: -0.00264739990234375|cri_loss: -0.0008673667907714844|unsuper_loss: 0.0 +average reward score: 6.3046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.62%) |Training time=0.45s (20.73%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4750|ppo_ep: 1|act_loss: 0.000370025634765625|cri_loss: 0.0034427642822265625|unsuper_loss: 0.0 +average reward score: 5.9921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.83%) |Training time=0.44s (20.48%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4751|ppo_ep: 1|act_loss: 0.01898193359375|cri_loss: 0.00974273681640625|unsuper_loss: 0.0 +average reward score: 6.25 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.66%) |Training time=0.44s (20.67%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4752|ppo_ep: 1|act_loss: -0.0025463104248046875|cri_loss: -0.0009636878967285156|unsuper_loss: 0.0 +average reward score: 5.4453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.69%) |Training time=0.44s (20.63%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.46 +epoch: 0|step: 4753|ppo_ep: 1|act_loss: 0.02203369140625|cri_loss: 0.011383056640625|unsuper_loss: 0.0 +average reward score: 5.05859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.61%) |Training time=0.45s (20.72%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4754|ppo_ep: 1|act_loss: 0.0150146484375|cri_loss: 0.00759124755859375|unsuper_loss: 0.0 +average reward score: 6.05078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.87%) |Training time=0.44s (20.46%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4755|ppo_ep: 1|act_loss: 0.00328826904296875|cri_loss: 0.002582550048828125|unsuper_loss: 0.0 +average reward score: 5.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.83%) |Training time=0.44s (20.54%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4756|ppo_ep: 1|act_loss: -0.01629638671875|cri_loss: -0.00687408447265625|unsuper_loss: 0.0 +average reward score: 6.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.61%) |Training time=0.45s (20.73%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4757|ppo_ep: 1|act_loss: -0.0019683837890625|cri_loss: -0.0006279945373535156|unsuper_loss: 0.0 +average reward score: 4.71484375 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.31%) |Training time=0.45s (19.34%) |Others=0.10 (4.35%)|CurSamplesPerSec=13.78 |AvgSamplesPerSec=14.47 +[2023-04-14 11:42:56,174] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096 +epoch: 0|step: 4758|ppo_ep: 1|act_loss: -0.021881103515625|cri_loss: -0.009552001953125|unsuper_loss: 0.0 +average reward score: 4.94921875 +------------------------------------------------------------------------------------- +|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.61%) |Training time=0.42s (19.67%) |Others=0.10 (4.72%)|CurSamplesPerSec=15.09 |AvgSamplesPerSec=14.47 +[2023-04-14 11:42:58,305] [INFO] [logging.py:96:log_dist] [Rank 0] step=4760, skipped=66, lr=[3.876402607369461e-06, 3.876402607369461e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:42:58,323] [INFO] [timer.py:199:stop] epoch=0/micro_step=4760/global_step=4760, RunningAvgSamplesPerSec=105.02179852561878, CurrSamplesPerSec=113.14903772794369, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:42:58,416] [INFO] [logging.py:96:log_dist] [Rank 0] step=4760, skipped=78, lr=[2.0198285253870464e-06, 2.0198285253870464e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4759|ppo_ep: 1|act_loss: -0.00461578369140625|cri_loss: -0.002063751220703125|unsuper_loss: 0.0 +average reward score: 4.8203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.63%) |Training time=0.45s (20.73%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4760|ppo_ep: 1|act_loss: 0.00736236572265625|cri_loss: 0.003910064697265625|unsuper_loss: 0.0 +average reward score: 5.21875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.52%) |Training time=0.45s (20.83%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4761|ppo_ep: 1|act_loss: -0.047882080078125|cri_loss: -0.023040771484375|unsuper_loss: 0.0 +average reward score: 6.04296875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.20%) |Training time=0.44s (20.21%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4762|ppo_ep: 1|act_loss: -0.001644134521484375|cri_loss: -0.0004086494445800781|unsuper_loss: 0.0 +average reward score: 4.51171875 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.71s (76.02%) |Training time=0.44s (19.52%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4763|ppo_ep: 1|act_loss: 0.0257568359375|cri_loss: 0.01320648193359375|unsuper_loss: 0.0 +average reward score: 5.1015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.28%) |Training time=0.45s (21.05%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4764|ppo_ep: 1|act_loss: -0.0240020751953125|cri_loss: -0.0116119384765625|unsuper_loss: 0.0 +average reward score: 4.6171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.62%) |Training time=0.45s (20.70%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4765|ppo_ep: 1|act_loss: 0.0045318603515625|cri_loss: 0.002452850341796875|unsuper_loss: 0.0 +average reward score: 5.484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.65%) |Training time=0.44s (20.68%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4766|ppo_ep: 1|act_loss: -0.0030364990234375|cri_loss: -0.0010528564453125|unsuper_loss: 0.0 +average reward score: 6.16796875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.95%) |Training time=0.44s (20.35%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4767|ppo_ep: 1|act_loss: 0.06121826171875|cri_loss: 0.03350830078125|unsuper_loss: 0.0 +average reward score: 5.7421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.81%) |Training time=0.44s (20.49%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4768|ppo_ep: 1|act_loss: -0.02301025390625|cri_loss: -0.0109710693359375|unsuper_loss: 0.0 +average reward score: 4.9375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.02%) |Training time=0.43s (20.25%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.47 +[2023-04-14 11:43:19,934] [INFO] [logging.py:96:log_dist] [Rank 0] step=4770, skipped=66, lr=[3.8581960152626685e-06, 3.8581960152626685e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:43:19,953] [INFO] [timer.py:199:stop] epoch=0/micro_step=4770/global_step=4770, RunningAvgSamplesPerSec=105.03973265983572, CurrSamplesPerSec=115.99823346444069, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:43:20,046] [INFO] [logging.py:96:log_dist] [Rank 0] step=4770, skipped=78, lr=[2.010386332482083e-06, 2.010386332482083e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4769|ppo_ep: 1|act_loss: -0.02618408203125|cri_loss: -0.0127410888671875|unsuper_loss: 0.0 +average reward score: 5.09375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.86%) |Training time=0.44s (20.46%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4770|ppo_ep: 1|act_loss: -0.01104736328125|cri_loss: -0.00519561767578125|unsuper_loss: 0.0 +average reward score: 4.78515625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.92%) |Training time=0.44s (20.38%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4771|ppo_ep: 1|act_loss: 0.024688720703125|cri_loss: 0.01568603515625|unsuper_loss: 0.0 +average reward score: 5.25390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.57%) |Training time=0.45s (20.77%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4772|ppo_ep: 1|act_loss: -0.039794921875|cri_loss: -0.0181884765625|unsuper_loss: 0.0 +average reward score: 5.48046875 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.94%) |Training time=0.46s (20.59%) |Others=0.17 (7.47%)|CurSamplesPerSec=14.35 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4773|ppo_ep: 1|act_loss: -0.0227508544921875|cri_loss: -0.01081085205078125|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.54%) |Training time=0.43s (19.84%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4774|ppo_ep: 1|act_loss: -0.018585205078125|cri_loss: -0.00902557373046875|unsuper_loss: 0.0 +average reward score: 6.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.70%) |Training time=0.44s (20.64%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4775|ppo_ep: 1|act_loss: -0.01849365234375|cri_loss: -0.008697509765625|unsuper_loss: 0.0 +average reward score: 5.1171875 +------------------------------------------------------------------------------------- +|E2E latency=2.96s |Gather latency=0.00s (0.00%) |Generate time=1.61s (54.33%) |Training time=0.44s (14.90%) |Others=0.91 (30.77%)|CurSamplesPerSec=10.83 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4776|ppo_ep: 1|act_loss: -0.00054931640625|cri_loss: -2.384185791015625e-05|unsuper_loss: 0.0 +average reward score: 4.90234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.81%) |Training time=0.44s (20.50%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4777|ppo_ep: 1|act_loss: 0.01239013671875|cri_loss: 0.006465911865234375|unsuper_loss: 0.0 +average reward score: 6.9921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.88%) |Training time=0.44s (20.43%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4778|ppo_ep: 1|act_loss: 0.029632568359375|cri_loss: 0.0154571533203125|unsuper_loss: 0.0 +average reward score: 4.79296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.75%) |Training time=0.44s (20.56%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.47 +[2023-04-14 11:43:42,352] [INFO] [logging.py:96:log_dist] [Rank 0] step=4780, skipped=66, lr=[3.840003753523217e-06, 3.840003753523217e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:43:42,371] [INFO] [timer.py:199:stop] epoch=0/micro_step=4780/global_step=4780, RunningAvgSamplesPerSec=105.0575480387665, CurrSamplesPerSec=113.63751879597393, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:43:42,463] [INFO] [logging.py:96:log_dist] [Rank 0] step=4780, skipped=78, lr=[2.000951396832801e-06, 2.000951396832801e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4779|ppo_ep: 1|act_loss: -0.0135650634765625|cri_loss: -0.005779266357421875|unsuper_loss: 0.0 +average reward score: 5.31640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.73%) |Training time=0.45s (20.62%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4780|ppo_ep: 1|act_loss: 0.04937744140625|cri_loss: 0.0261383056640625|unsuper_loss: 0.0 +average reward score: 5.17578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.55%) |Training time=0.45s (20.79%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4781|ppo_ep: 1|act_loss: 0.011749267578125|cri_loss: 0.007293701171875|unsuper_loss: 0.0 +average reward score: 4.75 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.86%) |Training time=0.44s (20.47%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4782|ppo_ep: 1|act_loss: 0.03631591796875|cri_loss: 0.019195556640625|unsuper_loss: 0.0 +average reward score: 4.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.88%) |Training time=0.44s (20.44%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4783|ppo_ep: 1|act_loss: 0.034027099609375|cri_loss: 0.01751708984375|unsuper_loss: 0.0 +average reward score: 5.6171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.16%) |Training time=0.44s (20.18%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4784|ppo_ep: 1|act_loss: 0.0131072998046875|cri_loss: 0.007442474365234375|unsuper_loss: 0.0 +average reward score: 5.18359375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.83%) |Training time=0.44s (20.48%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4785|ppo_ep: 1|act_loss: 0.00457000732421875|cri_loss: 0.002666473388671875|unsuper_loss: 0.0 +average reward score: 4.93359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.70%) |Training time=0.44s (20.63%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4786|ppo_ep: 1|act_loss: -0.018707275390625|cri_loss: -0.0084686279296875|unsuper_loss: 0.0 +average reward score: 5.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.93%) |Training time=0.44s (20.39%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4787|ppo_ep: 1|act_loss: -0.018280029296875|cri_loss: -0.00768280029296875|unsuper_loss: 0.0 +average reward score: 5.20703125 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.48%) |Training time=0.44s (19.18%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.86 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4788|ppo_ep: 1|act_loss: 0.0078887939453125|cri_loss: 0.0048980712890625|unsuper_loss: 0.0 +average reward score: 5.4921875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.85%) |Training time=0.44s (20.47%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.47 +[2023-04-14 11:44:03,971] [INFO] [logging.py:96:log_dist] [Rank 0] step=4790, skipped=66, lr=[3.821826091804312e-06, 3.821826091804312e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:44:03,990] [INFO] [timer.py:199:stop] epoch=0/micro_step=4790/global_step=4790, RunningAvgSamplesPerSec=105.07718399184161, CurrSamplesPerSec=115.61762668881086, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:44:04,082] [INFO] [logging.py:96:log_dist] [Rank 0] step=4790, skipped=78, lr=[1.9915238582877077e-06, 1.9915238582877077e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4789|ppo_ep: 1|act_loss: -0.032135009765625|cri_loss: -0.006683349609375|unsuper_loss: 0.0 +average reward score: 5.58984375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.82%) |Training time=0.44s (20.50%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.47 +[2023-04-14 11:44:06,141] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 4096, reducing to 2048 +epoch: 0|step: 4790|ppo_ep: 1|act_loss: -0.0193939208984375|cri_loss: -0.0087890625|unsuper_loss: 0.0 +average reward score: 5.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.49%) |Training time=0.43s (19.85%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4791|ppo_ep: 1|act_loss: -0.03594970703125|cri_loss: -0.017120361328125|unsuper_loss: 0.0 +average reward score: 4.75390625 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.98%) |Training time=0.44s (19.55%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.28 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4792|ppo_ep: 1|act_loss: -0.00423431396484375|cri_loss: -0.0019321441650390625|unsuper_loss: 0.0 +average reward score: 5.50390625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.77%) |Training time=0.44s (20.55%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4793|ppo_ep: 1|act_loss: -0.0269622802734375|cri_loss: -0.0128021240234375|unsuper_loss: 0.0 +average reward score: 5.71875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.58%) |Training time=0.45s (20.73%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4794|ppo_ep: 1|act_loss: 0.04144287109375|cri_loss: 0.023712158203125|unsuper_loss: 0.0 +average reward score: 3.994140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.43%) |Training time=0.45s (20.90%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4795|ppo_ep: 1|act_loss: 0.0204315185546875|cri_loss: 0.0131378173828125|unsuper_loss: 0.0 +average reward score: 5.4921875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.75%) |Training time=0.44s (20.57%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4796|ppo_ep: 1|act_loss: 0.0016412734985351562|cri_loss: 0.000988006591796875|unsuper_loss: 0.0 +average reward score: 4.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.02%) |Training time=0.44s (20.29%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4797|ppo_ep: 1|act_loss: 0.01119232177734375|cri_loss: 0.0069122314453125|unsuper_loss: 0.0 +average reward score: 5.45703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.59%) |Training time=0.45s (20.76%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4798|ppo_ep: 1|act_loss: -0.0167083740234375|cri_loss: -0.00778961181640625|unsuper_loss: 0.0 +average reward score: 5.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.75%) |Training time=0.44s (20.58%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.47 +[2023-04-14 11:44:25,543] [INFO] [logging.py:96:log_dist] [Rank 0] step=4800, skipped=67, lr=[3.8054789019689153e-06, 3.8054789019689153e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:44:25,562] [INFO] [timer.py:199:stop] epoch=0/micro_step=4800/global_step=4800, RunningAvgSamplesPerSec=105.09690332358787, CurrSamplesPerSec=114.88991285099068, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:44:25,655] [INFO] [logging.py:96:log_dist] [Rank 0] step=4800, skipped=78, lr=[1.9821038565856653e-06, 1.9821038565856653e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4799|ppo_ep: 1|act_loss: -0.030059814453125|cri_loss: -0.01418304443359375|unsuper_loss: 0.0 +average reward score: 7.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.81%) |Training time=0.44s (20.54%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4800|ppo_ep: 1|act_loss: 0.0014495849609375|cri_loss: 0.0008115768432617188|unsuper_loss: 0.0 +average reward score: 6.1640625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.13%) |Training time=0.43s (20.16%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4801|ppo_ep: 1|act_loss: 0.007587432861328125|cri_loss: 0.004497528076171875|unsuper_loss: 0.0 +average reward score: 5.3203125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.86%) |Training time=0.44s (20.48%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4802|ppo_ep: 1|act_loss: 0.025054931640625|cri_loss: 0.017242431640625|unsuper_loss: 0.0 +average reward score: 4.765625 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.44%) |Training time=0.45s (19.22%) |Others=0.10 (4.34%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4803|ppo_ep: 1|act_loss: 0.03131103515625|cri_loss: 0.016448974609375|unsuper_loss: 0.0 +average reward score: 4.046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.67%) |Training time=0.44s (20.67%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4804|ppo_ep: 1|act_loss: 0.006938934326171875|cri_loss: 0.00620269775390625|unsuper_loss: 0.0 +average reward score: 5.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.45s (20.79%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4805|ppo_ep: 1|act_loss: -0.022186279296875|cri_loss: -0.00853729248046875|unsuper_loss: 0.0 +average reward score: 5.29296875 +------------------------------------------------------------------------------------- +|E2E latency=2.54s |Gather latency=0.00s (0.00%) |Generate time=1.60s (63.07%) |Training time=0.44s (17.42%) |Others=0.50 (19.51%)|CurSamplesPerSec=12.59 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4806|ppo_ep: 1|act_loss: -0.0245361328125|cri_loss: -0.01068115234375|unsuper_loss: 0.0 +average reward score: 5.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.79%) |Training time=0.44s (20.51%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4807|ppo_ep: 1|act_loss: -0.003475189208984375|cri_loss: 0.0003528594970703125|unsuper_loss: 0.0 +average reward score: 4.68359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.62%) |Training time=0.45s (20.72%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4808|ppo_ep: 1|act_loss: -0.023895263671875|cri_loss: -0.011199951171875|unsuper_loss: 0.0 +average reward score: 5.62109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.68%) |Training time=0.44s (20.63%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.47 +[2023-04-14 11:44:47,592] [INFO] [logging.py:96:log_dist] [Rank 0] step=4810, skipped=67, lr=[3.7873297224053474e-06, 3.7873297224053474e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:44:47,610] [INFO] [timer.py:199:stop] epoch=0/micro_step=4810/global_step=4810, RunningAvgSamplesPerSec=105.11458737215244, CurrSamplesPerSec=111.97529193575703, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:44:47,702] [INFO] [logging.py:96:log_dist] [Rank 0] step=4810, skipped=78, lr=[1.972691531353826e-06, 1.972691531353826e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4809|ppo_ep: 1|act_loss: -0.017120361328125|cri_loss: -0.008392333984375|unsuper_loss: 0.0 +average reward score: 5.8984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.55%) |Training time=0.45s (20.80%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4810|ppo_ep: 1|act_loss: -0.052734375|cri_loss: -0.025604248046875|unsuper_loss: 0.0 +average reward score: 4.78515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.85%) |Training time=0.44s (20.53%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4811|ppo_ep: 1|act_loss: -0.004974365234375|cri_loss: -0.0013408660888671875|unsuper_loss: 0.0 +average reward score: 4.6953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.79%) |Training time=0.44s (20.53%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4812|ppo_ep: 1|act_loss: 0.007965087890625|cri_loss: 0.0068359375|unsuper_loss: 0.0 +average reward score: 5.62890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.59%) |Training time=0.45s (20.74%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4813|ppo_ep: 1|act_loss: 0.013336181640625|cri_loss: 0.007358551025390625|unsuper_loss: 0.0 +average reward score: 6.703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.60%) |Training time=0.45s (20.73%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4814|ppo_ep: 1|act_loss: 0.03265380859375|cri_loss: 0.0167083740234375|unsuper_loss: 0.0 +average reward score: 5.16015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.45s (20.73%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4815|ppo_ep: 1|act_loss: 0.0244293212890625|cri_loss: 0.01416015625|unsuper_loss: 0.0 +average reward score: 4.9375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.67%) |Training time=0.44s (20.65%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4816|ppo_ep: 1|act_loss: 0.01470947265625|cri_loss: 0.0077667236328125|unsuper_loss: 0.0 +average reward score: 7.00390625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.76%) |Training time=0.44s (20.58%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4817|ppo_ep: 1|act_loss: 0.013702392578125|cri_loss: 0.007625579833984375|unsuper_loss: 0.0 +average reward score: 5.1015625 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.99%) |Training time=0.46s (20.71%) |Others=0.16 (7.30%)|CurSamplesPerSec=14.40 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4818|ppo_ep: 1|act_loss: 0.027618408203125|cri_loss: 0.01415252685546875|unsuper_loss: 0.0 +average reward score: 5.9140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.79%) |Training time=0.44s (20.42%) |Others=0.10 (4.79%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.47 +[2023-04-14 11:45:09,187] [INFO] [logging.py:96:log_dist] [Rank 0] step=4820, skipped=67, lr=[3.769195923618539e-06, 3.769195923618539e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:45:09,206] [INFO] [timer.py:199:stop] epoch=0/micro_step=4820/global_step=4820, RunningAvgSamplesPerSec=105.12987027841997, CurrSamplesPerSec=109.2328892433651, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:45:09,298] [INFO] [logging.py:96:log_dist] [Rank 0] step=4820, skipped=78, lr=[1.9632870221055535e-06, 1.9632870221055535e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4819|ppo_ep: 1|act_loss: 0.0164794921875|cri_loss: 0.00868988037109375|unsuper_loss: 0.0 +average reward score: 4.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.33%) |Training time=0.46s (21.06%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4820|ppo_ep: 1|act_loss: 0.02825927734375|cri_loss: 0.01436614990234375|unsuper_loss: 0.0 +average reward score: 4.9609375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.82%) |Training time=0.44s (20.27%) |Others=0.11 (4.91%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4821|ppo_ep: 1|act_loss: -0.001445770263671875|cri_loss: -0.0004239082336425781|unsuper_loss: 0.0 +average reward score: 5.0859375 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.69s (75.59%) |Training time=0.45s (19.93%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.30 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4822|ppo_ep: 1|act_loss: -0.024169921875|cri_loss: -0.01146697998046875|unsuper_loss: 0.0 +average reward score: 4.38671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.60%) |Training time=0.45s (20.74%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4823|ppo_ep: 1|act_loss: -0.0151824951171875|cri_loss: -0.00701141357421875|unsuper_loss: 0.0 +average reward score: 4.98046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.45s (20.77%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4824|ppo_ep: 1|act_loss: -0.048370361328125|cri_loss: -0.0208587646484375|unsuper_loss: 0.0 +average reward score: 5.0 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.33%) |Training time=0.45s (21.02%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4825|ppo_ep: 1|act_loss: -0.0100555419921875|cri_loss: -0.004581451416015625|unsuper_loss: 0.0 +average reward score: 5.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.90%) |Training time=0.44s (20.44%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4826|ppo_ep: 1|act_loss: 0.054901123046875|cri_loss: 0.029449462890625|unsuper_loss: 0.0 +average reward score: 4.85546875 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.62%) |Training time=0.44s (20.06%) |Others=0.16 (7.32%)|CurSamplesPerSec=14.49 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4827|ppo_ep: 1|act_loss: -0.01763916015625|cri_loss: -0.008514404296875|unsuper_loss: 0.0 +average reward score: 4.390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.68%) |Training time=0.44s (20.63%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4828|ppo_ep: 1|act_loss: 0.0816650390625|cri_loss: 0.044952392578125|unsuper_loss: 0.0 +average reward score: 4.6953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.89%) |Training time=0.44s (20.43%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.47 +[2023-04-14 11:45:30,886] [INFO] [logging.py:96:log_dist] [Rank 0] step=4830, skipped=67, lr=[3.7510777743951295e-06, 3.7510777743951295e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:45:30,905] [INFO] [timer.py:199:stop] epoch=0/micro_step=4830/global_step=4830, RunningAvgSamplesPerSec=105.14553518088816, CurrSamplesPerSec=112.7394056175783, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:45:30,998] [INFO] [logging.py:96:log_dist] [Rank 0] step=4830, skipped=78, lr=[1.9538904682383637e-06, 1.9538904682383637e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4829|ppo_ep: 1|act_loss: 0.00909423828125|cri_loss: 0.0047149658203125|unsuper_loss: 0.0 +average reward score: 5.10546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.60%) |Training time=0.45s (20.77%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4830|ppo_ep: 1|act_loss: -0.000301361083984375|cri_loss: 0.00032329559326171875|unsuper_loss: 0.0 +average reward score: 6.6796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.69%) |Training time=0.44s (20.64%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4831|ppo_ep: 1|act_loss: -0.0018453598022460938|cri_loss: -0.0007162094116210938|unsuper_loss: 0.0 +average reward score: 4.7421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.69%) |Training time=0.44s (20.65%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4832|ppo_ep: 1|act_loss: 0.0035800933837890625|cri_loss: 0.00217437744140625|unsuper_loss: 0.0 +average reward score: 5.5703125 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.78s (78.49%) |Training time=0.39s (17.07%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.13 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4833|ppo_ep: 1|act_loss: 0.0008764266967773438|cri_loss: 0.0006198883056640625|unsuper_loss: 0.0 +average reward score: 3.99609375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.77%) |Training time=0.44s (20.58%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4834|ppo_ep: 1|act_loss: -0.00731658935546875|cri_loss: -0.0033473968505859375|unsuper_loss: 0.0 +average reward score: 4.3125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.45s (20.78%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4835|ppo_ep: 1|act_loss: -0.0066375732421875|cri_loss: -0.003192901611328125|unsuper_loss: 0.0 +average reward score: 4.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.96%) |Training time=0.44s (20.35%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4836|ppo_ep: 1|act_loss: 0.0096588134765625|cri_loss: 0.005191802978515625|unsuper_loss: 0.0 +average reward score: 5.4296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.68%) |Training time=0.44s (20.67%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4837|ppo_ep: 1|act_loss: -0.018707275390625|cri_loss: -0.00823974609375|unsuper_loss: 0.0 +average reward score: 5.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.53%) |Training time=0.45s (20.80%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4838|ppo_ep: 1|act_loss: 0.005157470703125|cri_loss: 0.005462646484375|unsuper_loss: 0.0 +average reward score: 5.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.62s (69.41%) |Training time=0.44s (18.96%) |Others=0.27 (11.63%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.47 +[2023-04-14 11:45:52,873] [INFO] [logging.py:96:log_dist] [Rank 0] step=4840, skipped=67, lr=[3.7329755432898005e-06, 3.7329755432898005e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:45:52,891] [INFO] [timer.py:199:stop] epoch=0/micro_step=4840/global_step=4840, RunningAvgSamplesPerSec=105.16561029558453, CurrSamplesPerSec=110.12817981835353, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:45:52,984] [INFO] [logging.py:96:log_dist] [Rank 0] step=4840, skipped=78, lr=[1.944502009031851e-06, 1.944502009031851e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4839|ppo_ep: 1|act_loss: 0.04376220703125|cri_loss: 0.02435302734375|unsuper_loss: 0.0 +average reward score: 5.25 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.36%) |Training time=0.45s (21.01%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4840|ppo_ep: 1|act_loss: 0.02081298828125|cri_loss: 0.01064300537109375|unsuper_loss: 0.0 +average reward score: 5.03515625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.56%) |Training time=0.45s (20.79%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4841|ppo_ep: 1|act_loss: -0.0098876953125|cri_loss: -0.0048065185546875|unsuper_loss: 0.0 +average reward score: 5.546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.52%) |Training time=0.45s (20.81%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4842|ppo_ep: 1|act_loss: 0.0190887451171875|cri_loss: 0.0098876953125|unsuper_loss: 0.0 +average reward score: 6.19140625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.65%) |Training time=0.44s (20.69%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.47 +[2023-04-14 11:46:01,459] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 2048, reducing to 1024 +epoch: 0|step: 4843|ppo_ep: 1|act_loss: -0.08251953125|cri_loss: 0.013916015625|unsuper_loss: 0.0 +average reward score: 5.1640625 +------------------------------------------------------------------------------------- +|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.57%) |Training time=0.42s (19.72%) |Others=0.10 (4.71%)|CurSamplesPerSec=15.13 |AvgSamplesPerSec=14.47 +[2023-04-14 11:46:03,690] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 4844|ppo_ep: 1|act_loss: -0.0209197998046875|cri_loss: -0.01010894775390625|unsuper_loss: 0.0 +average reward score: 5.5703125 +------------------------------------------------------------------------------------- +|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.60s (69.56%) |Training time=0.45s (19.41%) |Others=0.25 (11.03%)|CurSamplesPerSec=13.90 |AvgSamplesPerSec=14.47 +[2023-04-14 11:46:05,989] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 4845|ppo_ep: 1|act_loss: -0.024261474609375|cri_loss: -0.0117340087890625|unsuper_loss: 0.0 +average reward score: 5.56640625 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.92%) |Training time=0.45s (20.90%) |Others=0.09 (4.17%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4846|ppo_ep: 1|act_loss: 0.01186370849609375|cri_loss: 0.00653839111328125|unsuper_loss: 0.0 +average reward score: 4.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.66%) |Training time=0.44s (20.62%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4847|ppo_ep: 1|act_loss: -0.0236663818359375|cri_loss: -0.0113372802734375|unsuper_loss: 0.0 +average reward score: 4.375 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.49%) |Training time=0.44s (19.16%) |Others=0.10 (4.35%)|CurSamplesPerSec=13.86 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4848|ppo_ep: 1|act_loss: -0.0061187744140625|cri_loss: -0.00244903564453125|unsuper_loss: 0.0 +average reward score: 4.9765625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.57%) |Training time=0.44s (20.54%) |Others=0.11 (4.89%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47 +[2023-04-14 11:46:14,645] [INFO] [logging.py:96:log_dist] [Rank 0] step=4850, skipped=68, lr=[3.7166973670563355e-06, 3.7166973670563355e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:46:14,664] [INFO] [timer.py:199:stop] epoch=0/micro_step=4850/global_step=4850, RunningAvgSamplesPerSec=105.18298878559892, CurrSamplesPerSec=112.45244484735046, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:46:14,757] [INFO] [logging.py:96:log_dist] [Rank 0] step=4850, skipped=80, lr=[1.9369971633416747e-06, 1.9369971633416747e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4849|ppo_ep: 1|act_loss: 0.0143890380859375|cri_loss: 0.008087158203125|unsuper_loss: 0.0 +average reward score: 5.74609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.77%) |Training time=0.45s (20.64%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4850|ppo_ep: 1|act_loss: -0.0021076202392578125|cri_loss: -0.0008134841918945312|unsuper_loss: 0.0 +average reward score: 5.41015625 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.78%) |Training time=0.44s (19.76%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.25 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4851|ppo_ep: 1|act_loss: -0.00928497314453125|cri_loss: -0.004505157470703125|unsuper_loss: 0.0 +average reward score: 5.62109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.40%) |Training time=0.45s (20.76%) |Others=0.10 (4.83%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4852|ppo_ep: 1|act_loss: 0.008697509765625|cri_loss: 0.00479888916015625|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.75%) |Training time=0.44s (20.56%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4853|ppo_ep: 1|act_loss: 0.0016317367553710938|cri_loss: 0.00292205810546875|unsuper_loss: 0.0 +average reward score: 6.00390625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.69%) |Training time=0.44s (20.63%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4854|ppo_ep: 1|act_loss: -0.00138092041015625|cri_loss: -0.00043582916259765625|unsuper_loss: 0.0 +average reward score: 5.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.57%) |Training time=0.45s (20.77%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4855|ppo_ep: 1|act_loss: 0.1221923828125|cri_loss: 0.06793212890625|unsuper_loss: 0.0 +average reward score: 4.80078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.97%) |Training time=0.44s (20.36%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4856|ppo_ep: 1|act_loss: -0.00634002685546875|cri_loss: -0.002719879150390625|unsuper_loss: 0.0 +average reward score: 5.53515625 +------------------------------------------------------------------------------------- +|E2E latency=2.87s |Gather latency=0.00s (0.00%) |Generate time=1.61s (56.00%) |Training time=0.44s (15.37%) |Others=0.82 (28.63%)|CurSamplesPerSec=11.16 |AvgSamplesPerSec=14.47 +[2023-04-14 11:46:32,648] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 1024, reducing to 512 +[2023-04-14 11:46:32,734] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384 +epoch: 0|step: 4857|ppo_ep: 1|act_loss: -0.033935546875|cri_loss: 0.1702880859375|unsuper_loss: 0.0 +average reward score: 4.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.84%) |Training time=0.42s (19.84%) |Others=0.09 (4.32%)|CurSamplesPerSec=15.14 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4858|ppo_ep: 1|act_loss: 0.02703857421875|cri_loss: 0.016845703125|unsuper_loss: 0.0 +average reward score: 4.40625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.84%) |Training time=0.44s (20.47%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47 +[2023-04-14 11:46:36,918] [INFO] [logging.py:96:log_dist] [Rank 0] step=4860, skipped=69, lr=[3.700432497278104e-06, 3.700432497278104e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:46:36,937] [INFO] [timer.py:199:stop] epoch=0/micro_step=4860/global_step=4860, RunningAvgSamplesPerSec=105.2016877522836, CurrSamplesPerSec=113.58462629500683, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:46:37,029] [INFO] [logging.py:96:log_dist] [Rank 0] step=4860, skipped=81, lr=[1.9285605994579322e-06, 1.9285605994579322e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4859|ppo_ep: 1|act_loss: 0.02972412109375|cri_loss: 0.0167388916015625|unsuper_loss: 0.0 +average reward score: 5.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.66%) |Training time=0.44s (20.69%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4860|ppo_ep: 1|act_loss: 0.006023406982421875|cri_loss: 0.003147125244140625|unsuper_loss: 0.0 +average reward score: 5.91015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.73%) |Training time=0.44s (20.60%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4861|ppo_ep: 1|act_loss: -0.0489501953125|cri_loss: -0.0228729248046875|unsuper_loss: 0.0 +average reward score: 5.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.27%) |Training time=0.46s (21.01%) |Others=0.13 (5.72%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4862|ppo_ep: 1|act_loss: -0.029449462890625|cri_loss: -0.01416778564453125|unsuper_loss: 0.0 +average reward score: 4.23046875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.80%) |Training time=0.44s (20.23%) |Others=0.11 (4.96%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4863|ppo_ep: 1|act_loss: 0.02264404296875|cri_loss: 0.01192474365234375|unsuper_loss: 0.0 +average reward score: 5.73828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.60%) |Training time=0.45s (20.72%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4864|ppo_ep: 1|act_loss: 0.0120697021484375|cri_loss: 0.006404876708984375|unsuper_loss: 0.0 +average reward score: 6.19921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.64%) |Training time=0.44s (20.68%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4865|ppo_ep: 1|act_loss: 0.007213592529296875|cri_loss: 0.0038242340087890625|unsuper_loss: 0.0 +average reward score: 4.9921875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.85%) |Training time=0.44s (20.49%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4866|ppo_ep: 1|act_loss: 0.0120849609375|cri_loss: 0.006259918212890625|unsuper_loss: 0.0 +average reward score: 6.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.67%) |Training time=0.44s (20.66%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4867|ppo_ep: 1|act_loss: 0.024078369140625|cri_loss: 0.012603759765625|unsuper_loss: 0.0 +average reward score: 4.25 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.70%) |Training time=0.44s (20.63%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47 +[2023-04-14 11:46:56,311] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 512, reducing to 256 +[2023-04-14 11:46:56,397] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192 +epoch: 0|step: 4868|ppo_ep: 1|act_loss: 0.000885009765625|cri_loss: 0.0006785392761230469|unsuper_loss: 0.0 +average reward score: 7.36328125 +------------------------------------------------------------------------------------- +|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.60s (76.11%) |Training time=0.41s (19.55%) |Others=0.09 (4.34%)|CurSamplesPerSec=15.19 |AvgSamplesPerSec=14.47 +[2023-04-14 11:46:58,434] [INFO] [logging.py:96:log_dist] [Rank 0] step=4870, skipped=70, lr=[3.6841811292336835e-06, 3.6841811292336835e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:46:58,453] [INFO] [timer.py:199:stop] epoch=0/micro_step=4870/global_step=4870, RunningAvgSamplesPerSec=105.2193610408015, CurrSamplesPerSec=112.97113388004588, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:46:58,545] [INFO] [logging.py:96:log_dist] [Rank 0] step=4870, skipped=82, lr=[1.9201308963652553e-06, 1.9201308963652553e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4869|ppo_ep: 1|act_loss: 0.039154052734375|cri_loss: 0.0216522216796875|unsuper_loss: 0.0 +average reward score: 5.81640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.59%) |Training time=0.45s (20.76%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4870|ppo_ep: 1|act_loss: 0.03594970703125|cri_loss: 0.01934814453125|unsuper_loss: 0.0 +average reward score: 5.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.36%) |Training time=0.45s (20.99%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4871|ppo_ep: 1|act_loss: -0.0149078369140625|cri_loss: -0.00624847412109375|unsuper_loss: 0.0 +average reward score: 5.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.65%) |Training time=0.44s (20.67%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4872|ppo_ep: 1|act_loss: -0.0032520294189453125|cri_loss: -0.0011539459228515625|unsuper_loss: 0.0 +average reward score: 5.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.66%) |Training time=0.44s (20.68%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4873|ppo_ep: 1|act_loss: -0.00739288330078125|cri_loss: -0.00341796875|unsuper_loss: 0.0 +average reward score: 4.87890625 +------------------------------------------------------------------------------------- +|E2E latency=2.51s |Gather latency=0.00s (0.00%) |Generate time=1.60s (63.72%) |Training time=0.44s (17.57%) |Others=0.47 (18.71%)|CurSamplesPerSec=12.74 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4874|ppo_ep: 1|act_loss: 0.0113525390625|cri_loss: 0.005992889404296875|unsuper_loss: 0.0 +average reward score: 6.44140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.33%) |Training time=0.45s (21.00%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4875|ppo_ep: 1|act_loss: -0.00506591796875|cri_loss: -0.001888275146484375|unsuper_loss: 0.0 +average reward score: 6.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.45s (20.81%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4876|ppo_ep: 1|act_loss: -0.01038360595703125|cri_loss: -0.0050811767578125|unsuper_loss: 0.0 +average reward score: 4.63671875 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.42%) |Training time=0.47s (20.88%) |Others=0.17 (7.70%)|CurSamplesPerSec=14.29 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4877|ppo_ep: 1|act_loss: -0.023468017578125|cri_loss: -0.01141357421875|unsuper_loss: 0.0 +average reward score: 5.8828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.72%) |Training time=0.44s (20.41%) |Others=0.10 (4.87%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4878|ppo_ep: 1|act_loss: 0.00634002685546875|cri_loss: 0.003414154052734375|unsuper_loss: 0.0 +average reward score: 5.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.27%) |Training time=0.45s (20.81%) |Others=0.11 (4.93%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.47 +[2023-04-14 11:47:20,508] [INFO] [logging.py:96:log_dist] [Rank 0] step=4880, skipped=70, lr=[3.6661401263059195e-06, 3.6661401263059195e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:47:20,682] [INFO] [timer.py:199:stop] epoch=0/micro_step=4880/global_step=4880, RunningAvgSamplesPerSec=105.21670595000673, CurrSamplesPerSec=63.53029762995129, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:47:20,777] [INFO] [logging.py:96:log_dist] [Rank 0] step=4880, skipped=82, lr=[1.9107727292970774e-06, 1.9107727292970774e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4879|ppo_ep: 1|act_loss: 0.030609130859375|cri_loss: 0.0157623291015625|unsuper_loss: 0.0 +average reward score: 5.63671875 +------------------------------------------------------------------------------------- +|E2E latency=2.41s |Gather latency=0.00s (0.00%) |Generate time=1.63s (67.36%) |Training time=0.69s (28.47%) |Others=0.10 (4.16%)|CurSamplesPerSec=13.26 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4880|ppo_ep: 1|act_loss: -0.0046234130859375|cri_loss: -0.0019855499267578125|unsuper_loss: 0.0 +average reward score: 6.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.48%) |Training time=0.41s (18.75%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4881|ppo_ep: 1|act_loss: 0.01279449462890625|cri_loss: 0.0069427490234375|unsuper_loss: 0.0 +average reward score: 5.1015625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.03%) |Training time=0.47s (21.30%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4882|ppo_ep: 1|act_loss: -0.03192138671875|cri_loss: -0.0152435302734375|unsuper_loss: 0.0 +average reward score: 4.90625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.37%) |Training time=0.46s (20.94%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4883|ppo_ep: 1|act_loss: -0.0025081634521484375|cri_loss: -0.00106048583984375|unsuper_loss: 0.0 +average reward score: 4.70703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.17%) |Training time=0.46s (21.15%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4884|ppo_ep: 1|act_loss: 0.020965576171875|cri_loss: 0.01074981689453125|unsuper_loss: 0.0 +average reward score: 6.15625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.62%) |Training time=0.48s (21.72%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4885|ppo_ep: 1|act_loss: -0.0005536079406738281|cri_loss: 0.00018978118896484375|unsuper_loss: 0.0 +average reward score: 5.95703125 +------------------------------------------------------------------------------------- +|E2E latency=2.65s |Gather latency=0.00s (0.00%) |Generate time=1.63s (61.59%) |Training time=0.46s (17.21%) |Others=0.56 (21.21%)|CurSamplesPerSec=12.07 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4886|ppo_ep: 1|act_loss: 0.026092529296875|cri_loss: 0.01351165771484375|unsuper_loss: 0.0 +average reward score: 5.44140625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.18%) |Training time=0.46s (21.20%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4887|ppo_ep: 1|act_loss: 0.021514892578125|cri_loss: 0.01136016845703125|unsuper_loss: 0.0 +average reward score: 4.515625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.42%) |Training time=0.48s (21.97%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4888|ppo_ep: 1|act_loss: 0.00982666015625|cri_loss: 0.005466461181640625|unsuper_loss: 0.0 +average reward score: 5.39453125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.69%) |Training time=0.47s (21.70%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.47 +[2023-04-14 11:47:42,965] [INFO] [logging.py:96:log_dist] [Rank 0] step=4890, skipped=70, lr=[3.6481163004770804e-06, 3.6481163004770804e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:47:42,983] [INFO] [timer.py:199:stop] epoch=0/micro_step=4890/global_step=4890, RunningAvgSamplesPerSec=105.22021353634193, CurrSamplesPerSec=99.59072874905486, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:47:43,076] [INFO] [logging.py:96:log_dist] [Rank 0] step=4890, skipped=82, lr=[1.9014232959984846e-06, 1.9014232959984846e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4889|ppo_ep: 1|act_loss: 0.009552001953125|cri_loss: 0.00537109375|unsuper_loss: 0.0 +average reward score: 6.1640625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.29%) |Training time=0.48s (22.12%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4890|ppo_ep: 1|act_loss: 0.017578125|cri_loss: 0.009979248046875|unsuper_loss: 0.0 +average reward score: 6.390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.56%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4891|ppo_ep: 1|act_loss: 0.0318603515625|cri_loss: 0.016632080078125|unsuper_loss: 0.0 +average reward score: 5.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.59s |Gather latency=0.00s (0.00%) |Generate time=1.77s (68.22%) |Training time=0.48s (18.37%) |Others=0.35 (13.41%)|CurSamplesPerSec=12.34 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4892|ppo_ep: 1|act_loss: 0.033935546875|cri_loss: 0.0182037353515625|unsuper_loss: 0.0 +average reward score: 5.05859375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.21%) |Training time=0.48s (22.17%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4893|ppo_ep: 1|act_loss: 0.0110015869140625|cri_loss: 0.006687164306640625|unsuper_loss: 0.0 +average reward score: 4.71484375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.53%) |Training time=0.48s (21.84%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4894|ppo_ep: 1|act_loss: -0.01580810546875|cri_loss: -0.00707244873046875|unsuper_loss: 0.0 +average reward score: 4.953125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.97%) |Training time=0.47s (21.41%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4895|ppo_ep: 1|act_loss: -0.0055999755859375|cri_loss: -0.0019741058349609375|unsuper_loss: 0.0 +average reward score: 5.48046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.00%) |Training time=0.46s (21.38%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4896|ppo_ep: 1|act_loss: -0.039031982421875|cri_loss: -0.0185546875|unsuper_loss: 0.0 +average reward score: 5.296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.46s (21.33%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4897|ppo_ep: 1|act_loss: 0.0193634033203125|cri_loss: 0.0103302001953125|unsuper_loss: 0.0 +average reward score: 5.5859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.47%) |Training time=0.48s (21.91%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4898|ppo_ep: 1|act_loss: 0.00010347366333007812|cri_loss: 0.0005092620849609375|unsuper_loss: 0.0 +average reward score: 4.90625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.55%) |Training time=0.47s (21.82%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.47 +[2023-04-14 11:48:05,138] [INFO] [logging.py:96:log_dist] [Rank 0] step=4900, skipped=70, lr=[3.6301099189037464e-06, 3.6301099189037464e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:48:05,156] [INFO] [timer.py:199:stop] epoch=0/micro_step=4900/global_step=4900, RunningAvgSamplesPerSec=105.21540054820468, CurrSamplesPerSec=98.67209316003058, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:48:05,251] [INFO] [logging.py:96:log_dist] [Rank 0] step=4900, skipped=82, lr=[1.892082735050632e-06, 1.892082735050632e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4899|ppo_ep: 1|act_loss: -0.01531982421875|cri_loss: -0.00739288330078125|unsuper_loss: 0.0 +average reward score: 5.91796875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.09%) |Training time=0.49s (22.24%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4900|ppo_ep: 1|act_loss: -0.0083160400390625|cri_loss: -0.0040283203125|unsuper_loss: 0.0 +average reward score: 5.30078125 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.43%) |Training time=0.51s (23.02%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.49 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4901|ppo_ep: 1|act_loss: -0.04620361328125|cri_loss: -0.0215606689453125|unsuper_loss: 0.0 +average reward score: 5.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.58%) |Training time=0.47s (21.74%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4902|ppo_ep: 1|act_loss: 0.06402587890625|cri_loss: 0.034637451171875|unsuper_loss: 0.0 +average reward score: 4.89453125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.45%) |Training time=0.45s (20.77%) |Others=0.10 (4.78%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4903|ppo_ep: 1|act_loss: 0.04351806640625|cri_loss: 0.0232696533203125|unsuper_loss: 0.0 +average reward score: 4.90234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.85%) |Training time=0.42s (19.47%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4904|ppo_ep: 1|act_loss: -0.001407623291015625|cri_loss: -0.00046324729919433594|unsuper_loss: 0.0 +average reward score: 4.47265625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.50%) |Training time=0.48s (21.84%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4905|ppo_ep: 1|act_loss: 0.0149078369140625|cri_loss: 0.007965087890625|unsuper_loss: 0.0 +average reward score: 5.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.41s |Gather latency=0.00s (0.00%) |Generate time=1.61s (66.96%) |Training time=0.47s (19.60%) |Others=0.32 (13.44%)|CurSamplesPerSec=13.28 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4906|ppo_ep: 1|act_loss: 0.036285400390625|cri_loss: 0.02142333984375|unsuper_loss: 0.0 +average reward score: 5.6484375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.25%) |Training time=0.47s (21.13%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4907|ppo_ep: 1|act_loss: -0.0006389617919921875|cri_loss: 0.000934600830078125|unsuper_loss: 0.0 +average reward score: 5.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.27%) |Training time=0.46s (21.10%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4908|ppo_ep: 1|act_loss: -0.0016384124755859375|cri_loss: -0.0007228851318359375|unsuper_loss: 0.0 +average reward score: 4.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.74s (75.90%) |Training time=0.45s (19.69%) |Others=0.10 (4.42%)|CurSamplesPerSec=13.96 |AvgSamplesPerSec=14.47 +[2023-04-14 11:48:27,335] [INFO] [logging.py:96:log_dist] [Rank 0] step=4910, skipped=70, lr=[3.6121212484839295e-06, 3.6121212484839295e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:48:27,353] [INFO] [timer.py:199:stop] epoch=0/micro_step=4910/global_step=4910, RunningAvgSamplesPerSec=105.21634977782396, CurrSamplesPerSec=102.24609105099212, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:48:27,446] [INFO] [logging.py:96:log_dist] [Rank 0] step=4910, skipped=82, lr=[1.882751184903167e-06, 1.882751184903167e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4909|ppo_ep: 1|act_loss: -0.01042938232421875|cri_loss: -0.00495147705078125|unsuper_loss: 0.0 +average reward score: 4.76953125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.62%) |Training time=0.48s (21.78%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4910|ppo_ep: 1|act_loss: -0.005237579345703125|cri_loss: -0.0025177001953125|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.34%) |Training time=0.48s (22.05%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4911|ppo_ep: 1|act_loss: -0.02166748046875|cri_loss: -0.010040283203125|unsuper_loss: 0.0 +average reward score: 4.90625 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.55%) |Training time=0.47s (21.12%) |Others=0.16 (7.32%)|CurSamplesPerSec=14.28 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4912|ppo_ep: 1|act_loss: -0.04510498046875|cri_loss: -0.02032470703125|unsuper_loss: 0.0 +average reward score: 4.21484375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.35%) |Training time=0.48s (22.03%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4913|ppo_ep: 1|act_loss: -0.019134521484375|cri_loss: -0.00905609130859375|unsuper_loss: 0.0 +average reward score: 5.00390625 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.88%) |Training time=0.48s (21.47%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.46 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4914|ppo_ep: 1|act_loss: 0.01371002197265625|cri_loss: 0.006977081298828125|unsuper_loss: 0.0 +average reward score: 5.13671875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.21%) |Training time=0.46s (21.14%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4915|ppo_ep: 1|act_loss: 0.01544189453125|cri_loss: 0.00787353515625|unsuper_loss: 0.0 +average reward score: 5.33984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.30%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4916|ppo_ep: 1|act_loss: 0.017669677734375|cri_loss: 0.00927734375|unsuper_loss: 0.0 +average reward score: 5.33984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.75%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4917|ppo_ep: 1|act_loss: 0.04022216796875|cri_loss: 0.021209716796875|unsuper_loss: 0.0 +average reward score: 4.89453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.60%) |Training time=0.47s (21.76%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4918|ppo_ep: 1|act_loss: 0.020233154296875|cri_loss: 0.0102386474609375|unsuper_loss: 0.0 +average reward score: 5.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.69%) |Training time=0.47s (21.66%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.47 +[2023-04-14 11:48:49,170] [INFO] [logging.py:96:log_dist] [Rank 0] step=4920, skipped=70, lr=[3.594150555853121e-06, 3.594150555853121e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:48:49,187] [INFO] [timer.py:199:stop] epoch=0/micro_step=4920/global_step=4920, RunningAvgSamplesPerSec=105.21233766377085, CurrSamplesPerSec=101.24344625791944, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:48:49,280] [INFO] [logging.py:96:log_dist] [Rank 0] step=4920, skipped=82, lr=[1.8734287838721742e-06, 1.8734287838721742e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4919|ppo_ep: 1|act_loss: 0.045318603515625|cri_loss: 0.024871826171875|unsuper_loss: 0.0 +average reward score: 5.671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.29%) |Training time=0.48s (22.08%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4920|ppo_ep: 1|act_loss: 0.04046630859375|cri_loss: 0.02252197265625|unsuper_loss: 0.0 +average reward score: 5.15625 +------------------------------------------------------------------------------------- +|E2E latency=2.38s |Gather latency=0.00s (0.00%) |Generate time=1.69s (70.85%) |Training time=0.59s (24.92%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.44 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4921|ppo_ep: 1|act_loss: -0.029449462890625|cri_loss: -0.01418304443359375|unsuper_loss: 0.0 +average reward score: 4.5234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.30%) |Training time=0.48s (22.05%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4922|ppo_ep: 1|act_loss: -0.01837158203125|cri_loss: -0.0085601806640625|unsuper_loss: 0.0 +average reward score: 5.328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.47s (21.93%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4923|ppo_ep: 1|act_loss: 0.047027587890625|cri_loss: 0.0258941650390625|unsuper_loss: 0.0 +average reward score: 4.68359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.74%) |Training time=0.47s (21.63%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4924|ppo_ep: 1|act_loss: 0.0205841064453125|cri_loss: 0.010498046875|unsuper_loss: 0.0 +average reward score: 4.75390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.47s (21.94%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4925|ppo_ep: 1|act_loss: 0.00734710693359375|cri_loss: 0.003772735595703125|unsuper_loss: 0.0 +average reward score: 4.86328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (22.00%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4926|ppo_ep: 1|act_loss: -0.010284423828125|cri_loss: -0.004993438720703125|unsuper_loss: 0.0 +average reward score: 4.35546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.78%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4927|ppo_ep: 1|act_loss: -0.03948974609375|cri_loss: -0.0186767578125|unsuper_loss: 0.0 +average reward score: 5.28515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.31%) |Training time=0.48s (22.07%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4928|ppo_ep: 1|act_loss: -0.00012969970703125|cri_loss: 0.0004787445068359375|unsuper_loss: 0.0 +average reward score: 4.94140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.84%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47 +[2023-04-14 11:49:11,006] [INFO] [logging.py:96:log_dist] [Rank 0] step=4930, skipped=70, lr=[3.576198107380337e-06, 3.576198107380337e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:49:11,024] [INFO] [timer.py:199:stop] epoch=0/micro_step=4930/global_step=4930, RunningAvgSamplesPerSec=105.20099548291466, CurrSamplesPerSec=102.2384583684684, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:49:11,117] [INFO] [logging.py:96:log_dist] [Rank 0] step=4930, skipped=82, lr=[1.8641156701381243e-06, 1.8641156701381243e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4929|ppo_ep: 1|act_loss: 0.003734588623046875|cri_loss: 0.003139495849609375|unsuper_loss: 0.0 +average reward score: 4.70703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (21.97%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4930|ppo_ep: 1|act_loss: 0.0060272216796875|cri_loss: 0.0032634735107421875|unsuper_loss: 0.0 +average reward score: 4.55859375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.78%) |Training time=0.49s (22.61%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4931|ppo_ep: 1|act_loss: 0.07098388671875|cri_loss: 0.03961181640625|unsuper_loss: 0.0 +average reward score: 4.91015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.06%) |Training time=0.49s (22.34%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4932|ppo_ep: 1|act_loss: 0.013092041015625|cri_loss: 0.0085296630859375|unsuper_loss: 0.0 +average reward score: 5.203125 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.59s (69.88%) |Training time=0.49s (21.32%) |Others=0.20 (8.80%)|CurSamplesPerSec=14.06 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4933|ppo_ep: 1|act_loss: -0.0150146484375|cri_loss: -0.007354736328125|unsuper_loss: 0.0 +average reward score: 5.41796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.47s (21.85%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4934|ppo_ep: 1|act_loss: -0.023773193359375|cri_loss: -0.0106201171875|unsuper_loss: 0.0 +average reward score: 4.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.26%) |Training time=0.48s (22.13%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4935|ppo_ep: 1|act_loss: -0.062286376953125|cri_loss: -0.0286865234375|unsuper_loss: 0.0 +average reward score: 4.9921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.57%) |Training time=0.47s (21.81%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4936|ppo_ep: 1|act_loss: -0.036224365234375|cri_loss: -0.01629638671875|unsuper_loss: 0.0 +average reward score: 6.27734375 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.50%) |Training time=0.47s (20.21%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4937|ppo_ep: 1|act_loss: -0.03900146484375|cri_loss: -0.0185546875|unsuper_loss: 0.0 +average reward score: 5.05859375 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.62s (71.80%) |Training time=0.49s (21.56%) |Others=0.15 (6.64%)|CurSamplesPerSec=14.18 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4938|ppo_ep: 1|act_loss: 0.005672454833984375|cri_loss: 0.00292205810546875|unsuper_loss: 0.0 +average reward score: 5.17578125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.64%) |Training time=0.47s (21.75%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.47 +[2023-04-14 11:49:33,102] [INFO] [logging.py:96:log_dist] [Rank 0] step=4940, skipped=70, lr=[3.5582641691641734e-06, 3.5582641691641734e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:49:33,121] [INFO] [timer.py:199:stop] epoch=0/micro_step=4940/global_step=4940, RunningAvgSamplesPerSec=105.19055610924273, CurrSamplesPerSec=97.9630665250696, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:49:33,213] [INFO] [logging.py:96:log_dist] [Rank 0] step=4940, skipped=82, lr=[1.8548119817438313e-06, 1.8548119817438313e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4939|ppo_ep: 1|act_loss: -0.02252197265625|cri_loss: -0.01056671142578125|unsuper_loss: 0.0 +average reward score: 5.61328125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.97%) |Training time=0.49s (22.45%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4940|ppo_ep: 1|act_loss: 0.033447265625|cri_loss: 0.0177001953125|unsuper_loss: 0.0 +average reward score: 5.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.62%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4941|ppo_ep: 1|act_loss: 0.01522064208984375|cri_loss: 0.00787353515625|unsuper_loss: 0.0 +average reward score: 4.125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.80%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4942|ppo_ep: 1|act_loss: 0.037078857421875|cri_loss: 0.0198516845703125|unsuper_loss: 0.0 +average reward score: 4.9296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.68%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4943|ppo_ep: 1|act_loss: 0.009552001953125|cri_loss: 0.00495147705078125|unsuper_loss: 0.0 +average reward score: 5.87890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.70%) |Training time=0.47s (21.68%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4944|ppo_ep: 1|act_loss: -0.0019121170043945312|cri_loss: -0.0006513595581054688|unsuper_loss: 0.0 +average reward score: 4.28515625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.36%) |Training time=0.48s (22.00%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4945|ppo_ep: 1|act_loss: 0.001495361328125|cri_loss: 0.0010557174682617188|unsuper_loss: 0.0 +average reward score: 5.1015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.65%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4946|ppo_ep: 1|act_loss: 0.0030002593994140625|cri_loss: 0.001708984375|unsuper_loss: 0.0 +average reward score: 4.83203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (21.98%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4947|ppo_ep: 1|act_loss: 0.00046539306640625|cri_loss: 0.0007371902465820312|unsuper_loss: 0.0 +average reward score: 5.08203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.91%) |Training time=0.49s (22.48%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4948|ppo_ep: 1|act_loss: -0.00258636474609375|cri_loss: -0.0011606216430664062|unsuper_loss: 0.0 +average reward score: 5.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.06%) |Training time=0.48s (22.31%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47 +[2023-04-14 11:49:54,750] [INFO] [logging.py:96:log_dist] [Rank 0] step=4950, skipped=70, lr=[3.5403490070288552e-06, 3.5403490070288552e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:49:54,769] [INFO] [timer.py:199:stop] epoch=0/micro_step=4950/global_step=4950, RunningAvgSamplesPerSec=105.18537741541601, CurrSamplesPerSec=103.79347362934539, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:49:54,861] [INFO] [logging.py:96:log_dist] [Rank 0] step=4950, skipped=82, lr=[1.8455178565923993e-06, 1.8455178565923993e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4949|ppo_ep: 1|act_loss: -0.017669677734375|cri_loss: -0.00864410400390625|unsuper_loss: 0.0 +average reward score: 4.9296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.55%) |Training time=0.47s (21.83%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4950|ppo_ep: 1|act_loss: -0.01171875|cri_loss: -0.0056610107421875|unsuper_loss: 0.0 +average reward score: 5.71875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.08%) |Training time=0.48s (22.32%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4951|ppo_ep: 1|act_loss: -0.0146484375|cri_loss: -0.007099151611328125|unsuper_loss: 0.0 +average reward score: 4.75 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.97%) |Training time=0.49s (22.41%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4952|ppo_ep: 1|act_loss: -0.01824951171875|cri_loss: -0.0089569091796875|unsuper_loss: 0.0 +average reward score: 5.54296875 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.72s (73.83%) |Training time=0.50s (21.57%) |Others=0.11 (4.60%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4953|ppo_ep: 1|act_loss: 0.00946044921875|cri_loss: 0.005218505859375|unsuper_loss: 0.0 +average reward score: 5.58203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.10%) |Training time=0.48s (22.30%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4954|ppo_ep: 1|act_loss: -0.01007080078125|cri_loss: -0.0048370361328125|unsuper_loss: 0.0 +average reward score: 5.49609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.89%) |Training time=0.49s (22.52%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4955|ppo_ep: 1|act_loss: -0.00392913818359375|cri_loss: -0.0012559890747070312|unsuper_loss: 0.0 +average reward score: 4.26171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.31%) |Training time=0.48s (22.06%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4956|ppo_ep: 1|act_loss: -0.0238494873046875|cri_loss: -0.0116119384765625|unsuper_loss: 0.0 +average reward score: 5.6640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.09%) |Training time=0.48s (22.31%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4957|ppo_ep: 1|act_loss: -0.0015230178833007812|cri_loss: -3.814697265625e-05|unsuper_loss: 0.0 +average reward score: 4.828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.18%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4958|ppo_ep: 1|act_loss: -0.002532958984375|cri_loss: -0.0006256103515625|unsuper_loss: 0.0 +average reward score: 4.70703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (21.98%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.47 +[2023-04-14 11:50:16,589] [INFO] [logging.py:96:log_dist] [Rank 0] step=4960, skipped=70, lr=[3.5224528865203054e-06, 3.5224528865203054e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:50:16,607] [INFO] [timer.py:199:stop] epoch=0/micro_step=4960/global_step=4960, RunningAvgSamplesPerSec=105.17390656645385, CurrSamplesPerSec=102.20575977508565, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:50:16,700] [INFO] [logging.py:96:log_dist] [Rank 0] step=4960, skipped=82, lr=[1.8362334324451853e-06, 1.8362334324451853e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4959|ppo_ep: 1|act_loss: -0.0043487548828125|cri_loss: -0.002040863037109375|unsuper_loss: 0.0 +average reward score: 4.890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.48s (21.94%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4960|ppo_ep: 1|act_loss: -0.0313720703125|cri_loss: -0.0146636962890625|unsuper_loss: 0.0 +average reward score: 5.5 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.30%) |Training time=0.48s (21.95%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4961|ppo_ep: 1|act_loss: -0.01314544677734375|cri_loss: -0.00518798828125|unsuper_loss: 0.0 +average reward score: 5.328125 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.72%) |Training time=0.50s (22.71%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4962|ppo_ep: 1|act_loss: -0.002475738525390625|cri_loss: -0.001026153564453125|unsuper_loss: 0.0 +average reward score: 5.55859375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.03%) |Training time=0.49s (22.35%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4963|ppo_ep: 1|act_loss: 0.06939697265625|cri_loss: 0.03741455078125|unsuper_loss: 0.0 +average reward score: 6.1875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.13%) |Training time=0.48s (22.27%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4964|ppo_ep: 1|act_loss: 0.00904083251953125|cri_loss: 0.00525665283203125|unsuper_loss: 0.0 +average reward score: 5.7578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.79%) |Training time=0.49s (22.60%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4965|ppo_ep: 1|act_loss: 0.002620697021484375|cri_loss: 0.0014257431030273438|unsuper_loss: 0.0 +average reward score: 4.8515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.12%) |Training time=0.48s (21.96%) |Others=0.11 (4.92%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4966|ppo_ep: 1|act_loss: 0.003570556640625|cri_loss: 0.0019779205322265625|unsuper_loss: 0.0 +average reward score: 4.97265625 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.60s (70.65%) |Training time=0.51s (22.61%) |Others=0.15 (6.74%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4967|ppo_ep: 1|act_loss: 0.01971435546875|cri_loss: 0.01021575927734375|unsuper_loss: 0.0 +average reward score: 5.37109375 +------------------------------------------------------------------------------------- +|E2E latency=2.43s |Gather latency=0.00s (0.00%) |Generate time=1.80s (73.83%) |Training time=0.54s (22.05%) |Others=0.10 (4.12%)|CurSamplesPerSec=13.14 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4968|ppo_ep: 1|act_loss: 0.004993438720703125|cri_loss: 0.0026988983154296875|unsuper_loss: 0.0 +average reward score: 7.1328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.48s (22.33%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.47 +[2023-04-14 11:50:38,698] [INFO] [logging.py:96:log_dist] [Rank 0] step=4970, skipped=70, lr=[3.5045760729022005e-06, 3.5045760729022005e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:50:38,716] [INFO] [timer.py:199:stop] epoch=0/micro_step=4970/global_step=4970, RunningAvgSamplesPerSec=105.15504260283433, CurrSamplesPerSec=98.29984817535782, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:50:38,809] [INFO] [logging.py:96:log_dist] [Rank 0] step=4970, skipped=82, lr=[1.8269588469197518e-06, 1.8269588469197518e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4969|ppo_ep: 1|act_loss: 0.00392913818359375|cri_loss: 0.00235748291015625|unsuper_loss: 0.0 +average reward score: 5.50390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.90%) |Training time=0.49s (22.51%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4970|ppo_ep: 1|act_loss: 0.01861572265625|cri_loss: 0.00971221923828125|unsuper_loss: 0.0 +average reward score: 5.15234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.92%) |Training time=0.49s (22.45%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4971|ppo_ep: 1|act_loss: 0.010284423828125|cri_loss: 0.005462646484375|unsuper_loss: 0.0 +average reward score: 5.0 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.94%) |Training time=0.49s (22.44%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4972|ppo_ep: 1|act_loss: 0.0051422119140625|cri_loss: 0.002857208251953125|unsuper_loss: 0.0 +average reward score: 5.84765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.81%) |Training time=0.49s (22.56%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4973|ppo_ep: 1|act_loss: -0.009521484375|cri_loss: -0.0037899017333984375|unsuper_loss: 0.0 +average reward score: 5.8125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.59%) |Training time=0.50s (22.80%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4974|ppo_ep: 1|act_loss: -0.014678955078125|cri_loss: -0.00605010986328125|unsuper_loss: 0.0 +average reward score: 4.63671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.68%) |Training time=0.47s (21.69%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4975|ppo_ep: 1|act_loss: 0.0279693603515625|cri_loss: 0.01470947265625|unsuper_loss: 0.0 +average reward score: 5.94140625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.75%) |Training time=0.47s (21.66%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4976|ppo_ep: 1|act_loss: 0.014923095703125|cri_loss: 0.00783538818359375|unsuper_loss: 0.0 +average reward score: 4.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.15%) |Training time=0.48s (22.23%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4977|ppo_ep: 1|act_loss: 4.2438507080078125e-05|cri_loss: 0.00018358230590820312|unsuper_loss: 0.0 +average reward score: 5.58203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.47%) |Training time=0.47s (21.89%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4978|ppo_ep: 1|act_loss: 0.0005669593811035156|cri_loss: 0.0005650520324707031|unsuper_loss: 0.0 +average reward score: 5.31640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.08%) |Training time=0.48s (22.32%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.47 +[2023-04-14 11:51:00,406] [INFO] [logging.py:96:log_dist] [Rank 0] step=4980, skipped=70, lr=[3.4867188311520448e-06, 3.4867188311520448e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:51:00,424] [INFO] [timer.py:199:stop] epoch=0/micro_step=4980/global_step=4980, RunningAvgSamplesPerSec=105.14420331571479, CurrSamplesPerSec=101.33762187033065, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:51:00,517] [INFO] [logging.py:96:log_dist] [Rank 0] step=4980, skipped=82, lr=[1.817694237487831e-06, 1.817694237487831e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4979|ppo_ep: 1|act_loss: -0.0105438232421875|cri_loss: -0.004886627197265625|unsuper_loss: 0.0 +average reward score: 4.90234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.25%) |Training time=0.48s (22.14%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4980|ppo_ep: 1|act_loss: -0.0079193115234375|cri_loss: -0.003604888916015625|unsuper_loss: 0.0 +average reward score: 4.36328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.18%) |Training time=0.48s (22.20%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4981|ppo_ep: 1|act_loss: 0.03680419921875|cri_loss: 0.0190582275390625|unsuper_loss: 0.0 +average reward score: 6.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.06%) |Training time=0.48s (22.27%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4982|ppo_ep: 1|act_loss: -0.0011386871337890625|cri_loss: -0.00029850006103515625|unsuper_loss: 0.0 +average reward score: 5.54296875 +------------------------------------------------------------------------------------- +|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.37%) |Training time=0.50s (21.38%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.60 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4983|ppo_ep: 1|act_loss: -0.024505615234375|cri_loss: -0.0117034912109375|unsuper_loss: 0.0 +average reward score: 4.6015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.78%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4984|ppo_ep: 1|act_loss: -0.02398681640625|cri_loss: -0.011444091796875|unsuper_loss: 0.0 +average reward score: 4.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.12%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4985|ppo_ep: 1|act_loss: 0.014617919921875|cri_loss: 0.007610321044921875|unsuper_loss: 0.0 +average reward score: 4.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.48s (22.32%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4986|ppo_ep: 1|act_loss: 0.00223541259765625|cri_loss: 0.0012683868408203125|unsuper_loss: 0.0 +average reward score: 5.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.15%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4987|ppo_ep: 1|act_loss: -0.008880615234375|cri_loss: -0.00402069091796875|unsuper_loss: 0.0 +average reward score: 5.1640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.35%) |Training time=0.47s (21.90%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4988|ppo_ep: 1|act_loss: 0.018798828125|cri_loss: 0.01041412353515625|unsuper_loss: 0.0 +average reward score: 4.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.47s (21.94%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47 +[2023-04-14 11:51:22,236] [INFO] [logging.py:96:log_dist] [Rank 0] step=4990, skipped=70, lr=[3.4688814259572385e-06, 3.4688814259572385e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:51:22,255] [INFO] [timer.py:199:stop] epoch=0/micro_step=4990/global_step=4990, RunningAvgSamplesPerSec=105.13524135430049, CurrSamplesPerSec=104.26232727521308, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:51:22,348] [INFO] [logging.py:96:log_dist] [Rank 0] step=4990, skipped=82, lr=[1.8084397414732826e-06, 1.8084397414732826e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4989|ppo_ep: 1|act_loss: 0.007671356201171875|cri_loss: 0.004161834716796875|unsuper_loss: 0.0 +average reward score: 6.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.77%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4990|ppo_ep: 1|act_loss: 0.00853729248046875|cri_loss: 0.00528717041015625|unsuper_loss: 0.0 +average reward score: 4.984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.89%) |Training time=0.46s (21.44%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4991|ppo_ep: 1|act_loss: 0.004535675048828125|cri_loss: 0.0027008056640625|unsuper_loss: 0.0 +average reward score: 5.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.41%) |Training time=0.47s (21.95%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4992|ppo_ep: 1|act_loss: 0.0101776123046875|cri_loss: 0.005615234375|unsuper_loss: 0.0 +average reward score: 5.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.35%) |Training time=0.45s (21.01%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4993|ppo_ep: 1|act_loss: 0.0082550048828125|cri_loss: 0.00426483154296875|unsuper_loss: 0.0 +average reward score: 5.05859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.24%) |Training time=0.46s (21.11%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4994|ppo_ep: 1|act_loss: -0.0047760009765625|cri_loss: -0.002117156982421875|unsuper_loss: 0.0 +average reward score: 5.609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.22%) |Training time=0.46s (21.12%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4995|ppo_ep: 1|act_loss: 0.05169677734375|cri_loss: 0.0268707275390625|unsuper_loss: 0.0 +average reward score: 4.87890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.31%) |Training time=0.46s (21.08%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4996|ppo_ep: 1|act_loss: -0.016204833984375|cri_loss: -0.00792694091796875|unsuper_loss: 0.0 +average reward score: 4.71875 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.51%) |Training time=0.45s (20.20%) |Others=0.14 (6.29%)|CurSamplesPerSec=14.37 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4997|ppo_ep: 1|act_loss: 0.01568603515625|cri_loss: 0.0084228515625|unsuper_loss: 0.0 +average reward score: 5.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.50%) |Training time=0.45s (20.89%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.47 +epoch: 0|step: 4998|ppo_ep: 1|act_loss: 0.03155517578125|cri_loss: 0.0169525146484375|unsuper_loss: 0.0 +average reward score: 5.49609375 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.23%) |Training time=0.45s (19.46%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.78 |AvgSamplesPerSec=14.47 +[2023-04-14 11:51:44,060] [INFO] [logging.py:96:log_dist] [Rank 0] step=5000, skipped=70, lr=[3.4510641217111588e-06, 3.4510641217111588e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:51:44,078] [INFO] [timer.py:199:stop] epoch=0/micro_step=5000/global_step=5000, RunningAvgSamplesPerSec=105.14249500141725, CurrSamplesPerSec=110.73403004604508, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:51:44,171] [INFO] [logging.py:96:log_dist] [Rank 0] step=5000, skipped=82, lr=[1.7991954960500646e-06, 1.7991954960500646e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 4999|ppo_ep: 1|act_loss: 0.030792236328125|cri_loss: 0.01605224609375|unsuper_loss: 0.0 +average reward score: 5.6640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.36%) |Training time=0.45s (21.01%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5000|ppo_ep: 1|act_loss: 0.006744384765625|cri_loss: 0.003589630126953125|unsuper_loss: 0.0 +average reward score: 6.08203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.17%) |Training time=0.46s (21.17%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5001|ppo_ep: 1|act_loss: 0.0019435882568359375|cri_loss: 0.001239776611328125|unsuper_loss: 0.0 +average reward score: 5.2109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.11%) |Training time=0.46s (21.24%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5002|ppo_ep: 1|act_loss: -0.00066375732421875|cri_loss: -0.0001690387725830078|unsuper_loss: 0.0 +average reward score: 4.44140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.70%) |Training time=0.46s (21.08%) |Others=0.11 (5.23%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5003|ppo_ep: 1|act_loss: -0.00406646728515625|cri_loss: -0.0018291473388671875|unsuper_loss: 0.0 +average reward score: 4.70703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.08%) |Training time=0.46s (21.26%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5004|ppo_ep: 1|act_loss: -0.012481689453125|cri_loss: -0.00572967529296875|unsuper_loss: 0.0 +average reward score: 5.55859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.45s (21.06%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5005|ppo_ep: 1|act_loss: -0.0100860595703125|cri_loss: -0.00489044189453125|unsuper_loss: 0.0 +average reward score: 4.91796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.06%) |Training time=0.46s (21.27%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5006|ppo_ep: 1|act_loss: 0.0216827392578125|cri_loss: 0.01190185546875|unsuper_loss: 0.0 +average reward score: 5.67578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.16%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5007|ppo_ep: 1|act_loss: -0.004222869873046875|cri_loss: -0.0015840530395507812|unsuper_loss: 0.0 +average reward score: 5.71484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.29%) |Training time=0.45s (21.07%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5008|ppo_ep: 1|act_loss: 0.00676727294921875|cri_loss: 0.003551483154296875|unsuper_loss: 0.0 +average reward score: 6.16015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.18%) |Training time=0.46s (21.18%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.47 +[2023-04-14 11:52:05,635] [INFO] [logging.py:96:log_dist] [Rank 0] step=5010, skipped=70, lr=[3.4332671825092336e-06, 3.4332671825092336e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:52:05,654] [INFO] [timer.py:199:stop] epoch=0/micro_step=5010/global_step=5010, RunningAvgSamplesPerSec=105.14970221940123, CurrSamplesPerSec=110.66372811649983, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:52:05,747] [INFO] [logging.py:96:log_dist] [Rank 0] step=5010, skipped=82, lr=[1.7899616382401935e-06, 1.7899616382401935e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5009|ppo_ep: 1|act_loss: 0.01934814453125|cri_loss: 0.0098876953125|unsuper_loss: 0.0 +average reward score: 5.3046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.36%) |Training time=0.45s (20.99%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5010|ppo_ep: 1|act_loss: 0.0182952880859375|cri_loss: 0.0097503662109375|unsuper_loss: 0.0 +average reward score: 4.62109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.46s (21.16%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5011|ppo_ep: 1|act_loss: 0.02813720703125|cri_loss: 0.01523590087890625|unsuper_loss: 0.0 +average reward score: 6.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.16%) |Training time=0.46s (21.19%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5012|ppo_ep: 1|act_loss: 0.058380126953125|cri_loss: 0.0302886962890625|unsuper_loss: 0.0 +average reward score: 6.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.58%) |Training time=0.45s (20.86%) |Others=0.12 (5.56%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5013|ppo_ep: 1|act_loss: -0.017242431640625|cri_loss: -0.00830841064453125|unsuper_loss: 0.0 +average reward score: 6.109375 +------------------------------------------------------------------------------------- +|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.73s (75.64%) |Training time=0.46s (19.97%) |Others=0.10 (4.39%)|CurSamplesPerSec=13.98 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5014|ppo_ep: 1|act_loss: -0.01389312744140625|cri_loss: -0.0064849853515625|unsuper_loss: 0.0 +average reward score: 4.3046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.09%) |Training time=0.46s (21.27%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5015|ppo_ep: 1|act_loss: 0.0277099609375|cri_loss: 0.0140838623046875|unsuper_loss: 0.0 +average reward score: 5.85546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.17%) |Training time=0.46s (21.20%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5016|ppo_ep: 1|act_loss: 0.0049591064453125|cri_loss: 0.002826690673828125|unsuper_loss: 0.0 +average reward score: 4.80078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.45s (20.81%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5017|ppo_ep: 1|act_loss: 0.002994537353515625|cri_loss: 0.0016851425170898438|unsuper_loss: 0.0 +average reward score: 5.60546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (20.97%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5018|ppo_ep: 1|act_loss: 0.0037860870361328125|cri_loss: 0.0021533966064453125|unsuper_loss: 0.0 +average reward score: 5.61328125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.18%) |Training time=0.46s (21.17%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.47 +[2023-04-14 11:52:27,314] [INFO] [logging.py:96:log_dist] [Rank 0] step=5020, skipped=70, lr=[3.4154908721450375e-06, 3.4154908721450375e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:52:27,333] [INFO] [timer.py:199:stop] epoch=0/micro_step=5020/global_step=5020, RunningAvgSamplesPerSec=105.15842373077857, CurrSamplesPerSec=111.53061617927632, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:52:27,425] [INFO] [logging.py:96:log_dist] [Rank 0] step=5020, skipped=82, lr=[1.7807383049117185e-06, 1.7807383049117185e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5019|ppo_ep: 1|act_loss: -0.00528717041015625|cri_loss: -0.0023040771484375|unsuper_loss: 0.0 +average reward score: 5.6640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.44%) |Training time=0.45s (20.91%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5020|ppo_ep: 1|act_loss: -0.02325439453125|cri_loss: -0.01102447509765625|unsuper_loss: 0.0 +average reward score: 5.89453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.74%) |Training time=0.45s (20.60%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5021|ppo_ep: 1|act_loss: -0.0023365020751953125|cri_loss: -0.0009179115295410156|unsuper_loss: 0.0 +average reward score: 4.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.46%) |Training time=0.45s (20.89%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5022|ppo_ep: 1|act_loss: 0.02752685546875|cri_loss: 0.014404296875|unsuper_loss: 0.0 +average reward score: 4.6875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.24%) |Training time=0.46s (21.12%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5023|ppo_ep: 1|act_loss: -0.00722503662109375|cri_loss: -0.0035037994384765625|unsuper_loss: 0.0 +average reward score: 5.44140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.23%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5024|ppo_ep: 1|act_loss: -0.030426025390625|cri_loss: -0.01476287841796875|unsuper_loss: 0.0 +average reward score: 6.5 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.39%) |Training time=0.45s (20.97%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5025|ppo_ep: 1|act_loss: -0.03466796875|cri_loss: -0.0169677734375|unsuper_loss: 0.0 +average reward score: 5.98828125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.48%) |Training time=0.45s (20.80%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5026|ppo_ep: 1|act_loss: 0.00052642822265625|cri_loss: 0.0003490447998046875|unsuper_loss: 0.0 +average reward score: 4.234375 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.56%) |Training time=0.45s (19.97%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.20 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5027|ppo_ep: 1|act_loss: 0.012939453125|cri_loss: 0.0068511962890625|unsuper_loss: 0.0 +average reward score: 6.3203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.03%) |Training time=0.46s (21.34%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5028|ppo_ep: 1|act_loss: 0.01593017578125|cri_loss: 0.00855255126953125|unsuper_loss: 0.0 +average reward score: 5.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.34%) |Training time=0.45s (19.32%) |Others=0.10 (4.34%)|CurSamplesPerSec=13.80 |AvgSamplesPerSec=14.47 +[2023-04-14 11:52:49,163] [INFO] [logging.py:96:log_dist] [Rank 0] step=5030, skipped=70, lr=[3.397735454106371e-06, 3.397735454106371e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:52:49,181] [INFO] [timer.py:199:stop] epoch=0/micro_step=5030/global_step=5030, RunningAvgSamplesPerSec=105.16850634145916, CurrSamplesPerSec=111.15689603482346, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:52:49,274] [INFO] [logging.py:96:log_dist] [Rank 0] step=5030, skipped=82, lr=[1.7715256327766887e-06, 1.7715256327766887e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5029|ppo_ep: 1|act_loss: 0.00400543212890625|cri_loss: 0.002166748046875|unsuper_loss: 0.0 +average reward score: 5.8359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.40%) |Training time=0.45s (20.95%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5030|ppo_ep: 1|act_loss: 0.051605224609375|cri_loss: 0.027069091796875|unsuper_loss: 0.0 +average reward score: 5.23828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.30%) |Training time=0.46s (21.10%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5031|ppo_ep: 1|act_loss: 0.003597259521484375|cri_loss: 0.0019397735595703125|unsuper_loss: 0.0 +average reward score: 5.23046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.34%) |Training time=0.45s (21.01%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5032|ppo_ep: 1|act_loss: 0.017059326171875|cri_loss: 0.0095672607421875|unsuper_loss: 0.0 +average reward score: 6.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.22%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5033|ppo_ep: 1|act_loss: 0.00244903564453125|cri_loss: 0.00151824951171875|unsuper_loss: 0.0 +average reward score: 5.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.16%) |Training time=0.45s (20.97%) |Others=0.10 (4.87%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5034|ppo_ep: 1|act_loss: -0.0243072509765625|cri_loss: -0.01183319091796875|unsuper_loss: 0.0 +average reward score: 4.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.45s (21.07%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5035|ppo_ep: 1|act_loss: -0.00789642333984375|cri_loss: -0.0035724639892578125|unsuper_loss: 0.0 +average reward score: 5.9140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.89%) |Training time=0.47s (21.48%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5036|ppo_ep: 1|act_loss: -0.0139312744140625|cri_loss: -0.006259918212890625|unsuper_loss: 0.0 +average reward score: 5.37890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.07%) |Training time=0.46s (21.23%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5037|ppo_ep: 1|act_loss: 0.03485107421875|cri_loss: 0.0181884765625|unsuper_loss: 0.0 +average reward score: 5.23046875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.86%) |Training time=0.44s (20.45%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5038|ppo_ep: 1|act_loss: -0.0124359130859375|cri_loss: -0.005916595458984375|unsuper_loss: 0.0 +average reward score: 4.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.44s (20.76%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.47 +[2023-04-14 11:53:10,712] [INFO] [logging.py:96:log_dist] [Rank 0] step=5040, skipped=70, lr=[3.380001191571363e-06, 3.380001191571363e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:53:10,731] [INFO] [timer.py:199:stop] epoch=0/micro_step=5040/global_step=5040, RunningAvgSamplesPerSec=105.17816044808174, CurrSamplesPerSec=113.32568471926469, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:53:10,823] [INFO] [logging.py:96:log_dist] [Rank 0] step=5040, skipped=82, lr=[1.7623237583891302e-06, 1.7623237583891302e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5039|ppo_ep: 1|act_loss: 0.00580596923828125|cri_loss: 0.0030307769775390625|unsuper_loss: 0.0 +average reward score: 6.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.57%) |Training time=0.44s (20.76%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5040|ppo_ep: 1|act_loss: 0.0025310516357421875|cri_loss: 0.0014247894287109375|unsuper_loss: 0.0 +average reward score: 5.55078125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.55%) |Training time=0.45s (20.79%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5041|ppo_ep: 1|act_loss: 0.0207977294921875|cri_loss: 0.0117645263671875|unsuper_loss: 0.0 +average reward score: 5.35546875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.51%) |Training time=0.45s (20.80%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5042|ppo_ep: 1|act_loss: -0.002902984619140625|cri_loss: -0.0012054443359375|unsuper_loss: 0.0 +average reward score: 6.1796875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.61%) |Training time=0.44s (20.71%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5043|ppo_ep: 1|act_loss: -0.0191192626953125|cri_loss: -0.0092010498046875|unsuper_loss: 0.0 +average reward score: 6.94140625 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.54%) |Training time=0.47s (20.16%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5044|ppo_ep: 1|act_loss: 0.036712646484375|cri_loss: 0.019195556640625|unsuper_loss: 0.0 +average reward score: 5.23046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.28%) |Training time=0.45s (21.07%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5045|ppo_ep: 1|act_loss: -0.0135040283203125|cri_loss: -0.00640869140625|unsuper_loss: 0.0 +average reward score: 5.015625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.92%) |Training time=0.44s (20.39%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.47 +epoch: 0|step: 5046|ppo_ep: 1|act_loss: -0.01305389404296875|cri_loss: -0.005558013916015625|unsuper_loss: 0.0 +average reward score: 5.72265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.61%) |Training time=0.44s (20.73%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5047|ppo_ep: 1|act_loss: 0.01013946533203125|cri_loss: 0.005229949951171875|unsuper_loss: 0.0 +average reward score: 4.7109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.42%) |Training time=0.45s (20.92%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5048|ppo_ep: 1|act_loss: -0.0214691162109375|cri_loss: -0.010406494140625|unsuper_loss: 0.0 +average reward score: 5.8828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.71%) |Training time=0.44s (20.62%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.48 +[2023-04-14 11:53:32,354] [INFO] [logging.py:96:log_dist] [Rank 0] step=5050, skipped=70, lr=[3.3622883474045655e-06, 3.3622883474045655e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:53:32,372] [INFO] [timer.py:199:stop] epoch=0/micro_step=5050/global_step=5050, RunningAvgSamplesPerSec=105.19147333167017, CurrSamplesPerSec=115.78938432684527, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:53:32,465] [INFO] [logging.py:96:log_dist] [Rank 0] step=5050, skipped=82, lr=[1.7531328181430188e-06, 1.7531328181430188e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5049|ppo_ep: 1|act_loss: -0.008087158203125|cri_loss: -0.0038394927978515625|unsuper_loss: 0.0 +average reward score: 5.66015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.85%) |Training time=0.44s (20.47%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5050|ppo_ep: 1|act_loss: -0.0283050537109375|cri_loss: -0.01366424560546875|unsuper_loss: 0.0 +average reward score: 4.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.51%) |Training time=0.45s (20.83%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5051|ppo_ep: 1|act_loss: -0.0078582763671875|cri_loss: -0.0035495758056640625|unsuper_loss: 0.0 +average reward score: 3.90234375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.23%) |Training time=0.49s (22.18%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5052|ppo_ep: 1|act_loss: 0.010894775390625|cri_loss: 0.005992889404296875|unsuper_loss: 0.0 +average reward score: 6.34375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.46s (21.05%) |Others=0.10 (4.78%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5053|ppo_ep: 1|act_loss: -0.018035888671875|cri_loss: -0.00865936279296875|unsuper_loss: 0.0 +average reward score: 5.54296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.06%) |Training time=0.44s (20.20%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5054|ppo_ep: 1|act_loss: 0.00310516357421875|cri_loss: 0.00226593017578125|unsuper_loss: 0.0 +average reward score: 6.3359375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.80%) |Training time=0.45s (20.50%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5055|ppo_ep: 1|act_loss: 0.0013713836669921875|cri_loss: 0.0008897781372070312|unsuper_loss: 0.0 +average reward score: 5.65625 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.48%) |Training time=0.44s (19.84%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.51 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5056|ppo_ep: 1|act_loss: 0.0013103485107421875|cri_loss: 0.0010128021240234375|unsuper_loss: 0.0 +average reward score: 5.65625 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.73s (76.33%) |Training time=0.43s (19.13%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.13 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5057|ppo_ep: 1|act_loss: -0.0010585784912109375|cri_loss: -0.0004253387451171875|unsuper_loss: 0.0 +average reward score: 5.15234375 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.10%) |Training time=0.46s (20.41%) |Others=0.15 (6.49%)|CurSamplesPerSec=14.27 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5058|ppo_ep: 1|act_loss: 0.0149688720703125|cri_loss: 0.00787353515625|unsuper_loss: 0.0 +average reward score: 5.3125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.50%) |Training time=0.41s (18.78%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.48 +[2023-04-14 11:53:54,288] [INFO] [logging.py:96:log_dist] [Rank 0] step=5060, skipped=70, lr=[3.34459718415306e-06, 3.34459718415306e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:53:54,306] [INFO] [timer.py:199:stop] epoch=0/micro_step=5060/global_step=5060, RunningAvgSamplesPerSec=105.20452398487495, CurrSamplesPerSec=101.51252440100772, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:53:54,399] [INFO] [logging.py:96:log_dist] [Rank 0] step=5060, skipped=82, lr=[1.7439529482702621e-06, 1.7439529482702621e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5059|ppo_ep: 1|act_loss: 0.025421142578125|cri_loss: 0.01319122314453125|unsuper_loss: 0.0 +average reward score: 4.3359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.06%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5060|ppo_ep: 1|act_loss: -0.00881195068359375|cri_loss: -0.00376129150390625|unsuper_loss: 0.0 +average reward score: 5.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.47s (21.76%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5061|ppo_ep: 1|act_loss: -0.020477294921875|cri_loss: -0.0099945068359375|unsuper_loss: 0.0 +average reward score: 5.015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.46s (21.51%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5062|ppo_ep: 1|act_loss: -0.0039825439453125|cri_loss: -0.0017499923706054688|unsuper_loss: 0.0 +average reward score: 6.15234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.47s (21.88%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5063|ppo_ep: 1|act_loss: -0.0006098747253417969|cri_loss: -0.00013113021850585938|unsuper_loss: 0.0 +average reward score: 4.94140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.62%) |Training time=0.47s (21.66%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5064|ppo_ep: 1|act_loss: -0.018096923828125|cri_loss: -0.00875091552734375|unsuper_loss: 0.0 +average reward score: 5.40234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.47s (21.85%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5065|ppo_ep: 1|act_loss: -0.0052947998046875|cri_loss: -0.00177001953125|unsuper_loss: 0.0 +average reward score: 5.28125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.51%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5066|ppo_ep: 1|act_loss: -0.033935546875|cri_loss: -0.0165252685546875|unsuper_loss: 0.0 +average reward score: 6.1953125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.06%) |Training time=0.49s (22.25%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5067|ppo_ep: 1|act_loss: -0.0284423828125|cri_loss: -0.012725830078125|unsuper_loss: 0.0 +average reward score: 4.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.47s (21.73%) |Others=0.10 (4.82%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5068|ppo_ep: 1|act_loss: 0.0450439453125|cri_loss: 0.0234527587890625|unsuper_loss: 0.0 +average reward score: 6.26171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.47%) |Training time=0.47s (21.87%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.48 +[2023-04-14 11:54:15,945] [INFO] [logging.py:96:log_dist] [Rank 0] step=5070, skipped=70, lr=[3.326927964042562e-06, 3.326927964042562e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:54:15,963] [INFO] [timer.py:199:stop] epoch=0/micro_step=5070/global_step=5070, RunningAvgSamplesPerSec=105.20114928702438, CurrSamplesPerSec=102.08806252258077, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:54:16,056] [INFO] [logging.py:96:log_dist] [Rank 0] step=5070, skipped=82, lr=[1.734784284838676e-06, 1.734784284838676e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5069|ppo_ep: 1|act_loss: 0.03436279296875|cri_loss: 0.0176849365234375|unsuper_loss: 0.0 +average reward score: 4.53515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (21.96%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5070|ppo_ep: 1|act_loss: 0.022369384765625|cri_loss: 0.0124053955078125|unsuper_loss: 0.0 +average reward score: 4.06640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.92%) |Training time=0.49s (22.42%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5071|ppo_ep: 1|act_loss: 0.0203399658203125|cri_loss: 0.0111236572265625|unsuper_loss: 0.0 +average reward score: 6.125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.79%) |Training time=0.49s (22.55%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5072|ppo_ep: 1|act_loss: 0.00518798828125|cri_loss: 0.0029449462890625|unsuper_loss: 0.0 +average reward score: 4.8125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.17%) |Training time=0.48s (22.16%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5073|ppo_ep: 1|act_loss: -0.034881591796875|cri_loss: -0.01507568359375|unsuper_loss: 0.0 +average reward score: 5.96875 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.68%) |Training time=0.49s (20.99%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5074|ppo_ep: 1|act_loss: 0.001644134521484375|cri_loss: 0.0011005401611328125|unsuper_loss: 0.0 +average reward score: 5.12109375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.73%) |Training time=0.49s (22.64%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5075|ppo_ep: 1|act_loss: 0.001613616943359375|cri_loss: 0.004695892333984375|unsuper_loss: 0.0 +average reward score: 4.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.97%) |Training time=0.49s (22.41%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5076|ppo_ep: 1|act_loss: -0.02197265625|cri_loss: -0.01043701171875|unsuper_loss: 0.0 +average reward score: 4.91015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.78%) |Training time=0.49s (22.57%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5077|ppo_ep: 1|act_loss: -0.00782012939453125|cri_loss: -0.0032196044921875|unsuper_loss: 0.0 +average reward score: 5.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.26%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5078|ppo_ep: 1|act_loss: 0.00946044921875|cri_loss: 0.00511932373046875|unsuper_loss: 0.0 +average reward score: 5.64453125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.66%) |Training time=0.49s (22.74%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.48 +[2023-04-14 11:54:37,791] [INFO] [logging.py:96:log_dist] [Rank 0] step=5080, skipped=70, lr=[3.309280948973539e-06, 3.309280948973539e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:54:37,809] [INFO] [timer.py:199:stop] epoch=0/micro_step=5080/global_step=5080, RunningAvgSamplesPerSec=105.1869830044589, CurrSamplesPerSec=97.66836872434452, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:54:37,902] [INFO] [logging.py:96:log_dist] [Rank 0] step=5080, skipped=82, lr=[1.725626963749971e-06, 1.725626963749971e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5079|ppo_ep: 1|act_loss: 0.003009796142578125|cri_loss: 0.00188446044921875|unsuper_loss: 0.0 +average reward score: 5.93359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.81%) |Training time=0.49s (22.58%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5080|ppo_ep: 1|act_loss: 0.009307861328125|cri_loss: 0.00501251220703125|unsuper_loss: 0.0 +average reward score: 4.3515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.95%) |Training time=0.49s (22.45%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5081|ppo_ep: 1|act_loss: 0.03912353515625|cri_loss: 0.0212554931640625|unsuper_loss: 0.0 +average reward score: 4.7578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.16%) |Training time=0.46s (21.19%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5082|ppo_ep: 1|act_loss: -0.02783203125|cri_loss: -0.0126953125|unsuper_loss: 0.0 +average reward score: 5.19140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.29%) |Training time=0.48s (22.09%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5083|ppo_ep: 1|act_loss: -0.0003559589385986328|cri_loss: 4.00543212890625e-05|unsuper_loss: 0.0 +average reward score: 5.48828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (21.91%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5084|ppo_ep: 1|act_loss: -0.01495361328125|cri_loss: -0.007282257080078125|unsuper_loss: 0.0 +average reward score: 6.19140625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.66s (75.45%) |Training time=0.44s (19.90%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5085|ppo_ep: 1|act_loss: -0.029693603515625|cri_loss: -0.01422119140625|unsuper_loss: 0.0 +average reward score: 6.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.73s (73.78%) |Training time=0.50s (21.41%) |Others=0.11 (4.80%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5086|ppo_ep: 1|act_loss: -0.016754150390625|cri_loss: -0.0082244873046875|unsuper_loss: 0.0 +average reward score: 4.765625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.00%) |Training time=0.46s (21.31%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5087|ppo_ep: 1|act_loss: -0.0077972412109375|cri_loss: -0.003520965576171875|unsuper_loss: 0.0 +average reward score: 5.06640625 +------------------------------------------------------------------------------------- +|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.05%) |Training time=0.49s (20.63%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.50 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5088|ppo_ep: 1|act_loss: -0.0093994140625|cri_loss: -0.004535675048828125|unsuper_loss: 0.0 +average reward score: 6.8359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.20%) |Training time=0.46s (21.04%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.48 +[2023-04-14 11:54:59,905] [INFO] [logging.py:96:log_dist] [Rank 0] step=5090, skipped=70, lr=[3.291656400517325e-06, 3.291656400517325e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:54:59,923] [INFO] [timer.py:199:stop] epoch=0/micro_step=5090/global_step=5090, RunningAvgSamplesPerSec=105.1857576987737, CurrSamplesPerSec=112.81056883645637, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:55:00,016] [INFO] [logging.py:96:log_dist] [Rank 0] step=5090, skipped=82, lr=[1.7164811207377364e-06, 1.7164811207377364e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5089|ppo_ep: 1|act_loss: 0.03240966796875|cri_loss: 0.016815185546875|unsuper_loss: 0.0 +average reward score: 5.37890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.76%) |Training time=0.45s (20.55%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5090|ppo_ep: 1|act_loss: 0.01641845703125|cri_loss: 0.00855255126953125|unsuper_loss: 0.0 +average reward score: 4.61328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.59%) |Training time=0.45s (20.72%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5091|ppo_ep: 1|act_loss: 0.004547119140625|cri_loss: 0.002780914306640625|unsuper_loss: 0.0 +average reward score: 4.9609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.77%) |Training time=0.47s (21.56%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5092|ppo_ep: 1|act_loss: 0.001316070556640625|cri_loss: 0.0007786750793457031|unsuper_loss: 0.0 +average reward score: 5.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.94%) |Training time=0.46s (21.36%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5093|ppo_ep: 1|act_loss: 0.020843505859375|cri_loss: 0.01065826416015625|unsuper_loss: 0.0 +average reward score: 3.98046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.91%) |Training time=0.46s (21.38%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5094|ppo_ep: 1|act_loss: 0.0036144256591796875|cri_loss: 0.002353668212890625|unsuper_loss: 0.0 +average reward score: 4.765625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.63%) |Training time=0.47s (21.71%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5095|ppo_ep: 1|act_loss: -0.01236724853515625|cri_loss: -0.005863189697265625|unsuper_loss: 0.0 +average reward score: 5.55859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.80%) |Training time=0.47s (21.51%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5096|ppo_ep: 1|act_loss: -0.01849365234375|cri_loss: -0.00894927978515625|unsuper_loss: 0.0 +average reward score: 6.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.37%) |Training time=0.48s (21.97%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5097|ppo_ep: 1|act_loss: 0.0105133056640625|cri_loss: 0.0054931640625|unsuper_loss: 0.0 +average reward score: 5.24609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.27%) |Training time=0.46s (21.04%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5098|ppo_ep: 1|act_loss: -0.031341552734375|cri_loss: -0.01502227783203125|unsuper_loss: 0.0 +average reward score: 4.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.57%) |Training time=0.45s (20.76%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.48 +[2023-04-14 11:55:21,634] [INFO] [logging.py:96:log_dist] [Rank 0] step=5100, skipped=70, lr=[3.2740545799122467e-06, 3.2740545799122467e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:55:21,653] [INFO] [timer.py:199:stop] epoch=0/micro_step=5100/global_step=5100, RunningAvgSamplesPerSec=105.18997521642397, CurrSamplesPerSec=117.28935534330132, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:55:21,745] [INFO] [logging.py:96:log_dist] [Rank 0] step=5100, skipped=82, lr=[1.707346891365429e-06, 1.707346891365429e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5099|ppo_ep: 1|act_loss: -0.01549530029296875|cri_loss: -0.007568359375|unsuper_loss: 0.0 +average reward score: 4.921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.24%) |Training time=0.44s (20.11%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5100|ppo_ep: 1|act_loss: 0.0120697021484375|cri_loss: 0.006183624267578125|unsuper_loss: 0.0 +average reward score: 5.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.84%) |Training time=0.44s (20.44%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5101|ppo_ep: 1|act_loss: 0.01096343994140625|cri_loss: 0.005687713623046875|unsuper_loss: 0.0 +average reward score: 4.8515625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.55%) |Training time=0.44s (20.76%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5102|ppo_ep: 1|act_loss: -0.017669677734375|cri_loss: -0.00824737548828125|unsuper_loss: 0.0 +average reward score: 5.90625 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.65s (73.06%) |Training time=0.51s (22.46%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5103|ppo_ep: 1|act_loss: -0.026336669921875|cri_loss: -0.01267242431640625|unsuper_loss: 0.0 +average reward score: 4.98046875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.46%) |Training time=0.45s (20.89%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5104|ppo_ep: 1|act_loss: 0.019775390625|cri_loss: 0.01068115234375|unsuper_loss: 0.0 +average reward score: 5.125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.58%) |Training time=0.44s (20.71%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5105|ppo_ep: 1|act_loss: 0.024261474609375|cri_loss: 0.0125885009765625|unsuper_loss: 0.0 +average reward score: 5.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.62%) |Training time=0.44s (20.69%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5106|ppo_ep: 1|act_loss: -0.05169677734375|cri_loss: -0.0247955322265625|unsuper_loss: 0.0 +average reward score: 4.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.44%) |Training time=0.45s (20.89%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5107|ppo_ep: 1|act_loss: -0.0169830322265625|cri_loss: -0.00818634033203125|unsuper_loss: 0.0 +average reward score: 5.28515625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.50%) |Training time=0.45s (20.79%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5108|ppo_ep: 1|act_loss: 0.0013885498046875|cri_loss: 0.0008983612060546875|unsuper_loss: 0.0 +average reward score: 4.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.33%) |Training time=0.45s (20.98%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.48 +[2023-04-14 11:55:43,186] [INFO] [logging.py:96:log_dist] [Rank 0] step=5110, skipped=70, lr=[3.256475748059745e-06, 3.256475748059745e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:55:43,204] [INFO] [timer.py:199:stop] epoch=0/micro_step=5110/global_step=5110, RunningAvgSamplesPerSec=105.20440380442844, CurrSamplesPerSec=112.62106255936578, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:55:43,297] [INFO] [logging.py:96:log_dist] [Rank 0] step=5110, skipped=82, lr=[1.6982244110243626e-06, 1.6982244110243626e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5109|ppo_ep: 1|act_loss: 0.00341033935546875|cri_loss: 0.001926422119140625|unsuper_loss: 0.0 +average reward score: 5.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.49%) |Training time=0.45s (20.83%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5110|ppo_ep: 1|act_loss: 0.006072998046875|cri_loss: 0.0032405853271484375|unsuper_loss: 0.0 +average reward score: 6.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.32%) |Training time=0.45s (21.02%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5111|ppo_ep: 1|act_loss: -0.02947998046875|cri_loss: -0.01439666748046875|unsuper_loss: 0.0 +average reward score: 7.17578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.19%) |Training time=0.45s (21.15%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5112|ppo_ep: 1|act_loss: -0.005096435546875|cri_loss: -0.00234222412109375|unsuper_loss: 0.0 +average reward score: 4.328125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.46s (21.19%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5113|ppo_ep: 1|act_loss: -0.017669677734375|cri_loss: -0.00860595703125|unsuper_loss: 0.0 +average reward score: 5.546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.03%) |Training time=0.46s (21.34%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5114|ppo_ep: 1|act_loss: -0.01715087890625|cri_loss: -0.0082550048828125|unsuper_loss: 0.0 +average reward score: 5.71875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.95%) |Training time=0.44s (20.41%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5115|ppo_ep: 1|act_loss: -0.0175323486328125|cri_loss: -0.00847625732421875|unsuper_loss: 0.0 +average reward score: 5.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (75.06%) |Training time=0.46s (20.48%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.19 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5116|ppo_ep: 1|act_loss: 0.01445770263671875|cri_loss: 0.007415771484375|unsuper_loss: 0.0 +average reward score: 4.45703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.78%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5117|ppo_ep: 1|act_loss: 0.035400390625|cri_loss: 0.018798828125|unsuper_loss: 0.0 +average reward score: 5.4296875 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.64s (71.04%) |Training time=0.55s (23.99%) |Others=0.11 (4.96%)|CurSamplesPerSec=13.84 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5118|ppo_ep: 1|act_loss: 0.00244140625|cri_loss: 0.001384735107421875|unsuper_loss: 0.0 +average reward score: 6.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.85%) |Training time=0.46s (21.50%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.48 +[2023-04-14 11:56:05,008] [INFO] [logging.py:96:log_dist] [Rank 0] step=5120, skipped=70, lr=[3.2389201655205164e-06, 3.2389201655205164e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:56:05,026] [INFO] [timer.py:199:stop] epoch=0/micro_step=5120/global_step=5120, RunningAvgSamplesPerSec=105.20632463741921, CurrSamplesPerSec=105.29688544396153, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:56:05,120] [INFO] [logging.py:96:log_dist] [Rank 0] step=5120, skipped=82, lr=[1.6891138149317038e-06, 1.6891138149317038e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5119|ppo_ep: 1|act_loss: 0.02301025390625|cri_loss: 0.011810302734375|unsuper_loss: 0.0 +average reward score: 5.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.65%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5120|ppo_ep: 1|act_loss: 0.00733184814453125|cri_loss: 0.00391387939453125|unsuper_loss: 0.0 +average reward score: 5.390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.85%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5121|ppo_ep: 1|act_loss: 0.0180816650390625|cri_loss: 0.00943756103515625|unsuper_loss: 0.0 +average reward score: 5.26171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.61%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5122|ppo_ep: 1|act_loss: 0.0029296875|cri_loss: 0.00183868408203125|unsuper_loss: 0.0 +average reward score: 4.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.58%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5123|ppo_ep: 1|act_loss: 0.0094451904296875|cri_loss: 0.004840850830078125|unsuper_loss: 0.0 +average reward score: 5.57421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.60%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5124|ppo_ep: 1|act_loss: -0.0184326171875|cri_loss: -0.00870513916015625|unsuper_loss: 0.0 +average reward score: 5.76171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.64%) |Training time=0.47s (21.73%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5125|ppo_ep: 1|act_loss: -0.023223876953125|cri_loss: -0.01103973388671875|unsuper_loss: 0.0 +average reward score: 5.48046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.90%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5126|ppo_ep: 1|act_loss: -0.01800537109375|cri_loss: -0.00847625732421875|unsuper_loss: 0.0 +average reward score: 5.42578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.75%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5127|ppo_ep: 1|act_loss: -0.0146942138671875|cri_loss: -0.00698089599609375|unsuper_loss: 0.0 +average reward score: 5.24609375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.28%) |Training time=0.48s (22.12%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5128|ppo_ep: 1|act_loss: 0.027069091796875|cri_loss: 0.01407623291015625|unsuper_loss: 0.0 +average reward score: 5.0859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.13%) |Training time=0.48s (22.26%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.48 +[2023-04-14 11:56:26,608] [INFO] [logging.py:96:log_dist] [Rank 0] step=5130, skipped=70, lr=[3.221388092510643e-06, 3.221388092510643e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:56:26,627] [INFO] [timer.py:199:stop] epoch=0/micro_step=5130/global_step=5130, RunningAvgSamplesPerSec=105.2030162387455, CurrSamplesPerSec=103.4807402444506, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:56:26,719] [INFO] [logging.py:96:log_dist] [Rank 0] step=5130, skipped=82, lr=[1.6800152381284634e-06, 1.6800152381284634e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5129|ppo_ep: 1|act_loss: 0.0220489501953125|cri_loss: 0.0125732421875|unsuper_loss: 0.0 +average reward score: 5.390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.90%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5130|ppo_ep: 1|act_loss: -0.0015277862548828125|cri_loss: -0.000614166259765625|unsuper_loss: 0.0 +average reward score: 5.55078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (21.81%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5131|ppo_ep: 1|act_loss: 5.0961971282958984e-05|cri_loss: 0.00015354156494140625|unsuper_loss: 0.0 +average reward score: 5.3515625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.73%) |Training time=0.47s (21.63%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5132|ppo_ep: 1|act_loss: 0.022308349609375|cri_loss: 0.0114288330078125|unsuper_loss: 0.0 +average reward score: 5.109375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.24%) |Training time=0.51s (23.17%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5133|ppo_ep: 1|act_loss: -0.03240966796875|cri_loss: -0.01531219482421875|unsuper_loss: 0.0 +average reward score: 5.75390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.34%) |Training time=0.45s (21.02%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5134|ppo_ep: 1|act_loss: -0.001499176025390625|cri_loss: -0.0005617141723632812|unsuper_loss: 0.0 +average reward score: 4.89453125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.60%) |Training time=0.47s (21.77%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5135|ppo_ep: 1|act_loss: -0.013702392578125|cri_loss: -0.00635528564453125|unsuper_loss: 0.0 +average reward score: 5.03515625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.73%) |Training time=0.50s (22.69%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5136|ppo_ep: 1|act_loss: -0.02093505859375|cri_loss: -0.01007080078125|unsuper_loss: 0.0 +average reward score: 6.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.02%) |Training time=0.49s (22.37%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5137|ppo_ep: 1|act_loss: -0.005767822265625|cri_loss: -0.0024394989013671875|unsuper_loss: 0.0 +average reward score: 4.74609375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.88%) |Training time=0.49s (22.55%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5138|ppo_ep: 1|act_loss: -0.01102447509765625|cri_loss: -0.00304412841796875|unsuper_loss: 0.0 +average reward score: 5.5234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.22%) |Training time=0.48s (22.15%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.48 +[2023-04-14 11:56:48,341] [INFO] [logging.py:96:log_dist] [Rank 0] step=5140, skipped=70, lr=[3.203879788897743e-06, 3.203879788897743e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:56:48,359] [INFO] [timer.py:199:stop] epoch=0/micro_step=5140/global_step=5140, RunningAvgSamplesPerSec=105.19522715135726, CurrSamplesPerSec=101.88409671789803, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:56:48,452] [INFO] [logging.py:96:log_dist] [Rank 0] step=5140, skipped=82, lr=[1.6709288154775e-06, 1.6709288154775e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5139|ppo_ep: 1|act_loss: 0.0029087066650390625|cri_loss: 0.0019779205322265625|unsuper_loss: 0.0 +average reward score: 5.828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (21.96%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5140|ppo_ep: 1|act_loss: -0.0158843994140625|cri_loss: -0.007659912109375|unsuper_loss: 0.0 +average reward score: 5.62890625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.00%) |Training time=0.49s (22.41%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5141|ppo_ep: 1|act_loss: 0.006500244140625|cri_loss: 0.0035247802734375|unsuper_loss: 0.0 +average reward score: 4.609375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.65%) |Training time=0.47s (21.71%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5142|ppo_ep: 1|act_loss: -0.00617218017578125|cri_loss: -0.0020294189453125|unsuper_loss: 0.0 +average reward score: 5.5703125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.31%) |Training time=0.46s (21.09%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5143|ppo_ep: 1|act_loss: 0.007320404052734375|cri_loss: 0.003997802734375|unsuper_loss: 0.0 +average reward score: 4.8203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.68%) |Training time=0.47s (21.43%) |Others=0.11 (4.89%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5144|ppo_ep: 1|act_loss: 0.0160675048828125|cri_loss: 0.00841522216796875|unsuper_loss: 0.0 +average reward score: 4.828125 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.62s (71.75%) |Training time=0.54s (23.79%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.21 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5145|ppo_ep: 1|act_loss: 0.04425048828125|cri_loss: 0.0236968994140625|unsuper_loss: 0.0 +average reward score: 5.625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.37%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5146|ppo_ep: 1|act_loss: 0.02178955078125|cri_loss: 0.01132965087890625|unsuper_loss: 0.0 +average reward score: 4.28515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.46s (21.56%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5147|ppo_ep: 1|act_loss: 0.03814697265625|cri_loss: 0.019622802734375|unsuper_loss: 0.0 +average reward score: 5.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.73%) |Training time=0.47s (21.64%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5148|ppo_ep: 1|act_loss: 0.012054443359375|cri_loss: 0.0062408447265625|unsuper_loss: 0.0 +average reward score: 4.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.19%) |Training time=0.51s (23.24%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.48 +[2023-04-14 11:57:10,149] [INFO] [logging.py:96:log_dist] [Rank 0] step=5150, skipped=70, lr=[3.186395514197109e-06, 3.186395514197109e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:57:10,168] [INFO] [timer.py:199:stop] epoch=0/micro_step=5150/global_step=5150, RunningAvgSamplesPerSec=105.18848918908238, CurrSamplesPerSec=107.31580260338376, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:57:10,261] [INFO] [logging.py:96:log_dist] [Rank 0] step=5150, skipped=82, lr=[1.6618546816615162e-06, 1.6618546816615162e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5149|ppo_ep: 1|act_loss: 0.004528045654296875|cri_loss: 0.002605438232421875|unsuper_loss: 0.0 +average reward score: 4.08984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.79%) |Training time=0.47s (21.59%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5150|ppo_ep: 1|act_loss: -0.020782470703125|cri_loss: -0.01001739501953125|unsuper_loss: 0.0 +average reward score: 4.7421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.82%) |Training time=0.47s (21.55%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5151|ppo_ep: 1|act_loss: 0.0063323974609375|cri_loss: 0.0055999755859375|unsuper_loss: 0.0 +average reward score: 4.18359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.10%) |Training time=0.46s (21.23%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5152|ppo_ep: 1|act_loss: -0.0015277862548828125|cri_loss: -0.0006008148193359375|unsuper_loss: 0.0 +average reward score: 4.44140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.73%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5153|ppo_ep: 1|act_loss: 0.0142669677734375|cri_loss: 0.0084686279296875|unsuper_loss: 0.0 +average reward score: 5.046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.42%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5154|ppo_ep: 1|act_loss: -0.004833221435546875|cri_loss: -0.002147674560546875|unsuper_loss: 0.0 +average reward score: 5.59375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.58%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5155|ppo_ep: 1|act_loss: -0.0033206939697265625|cri_loss: -0.0005054473876953125|unsuper_loss: 0.0 +average reward score: 5.3125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.65%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5156|ppo_ep: 1|act_loss: -0.02227783203125|cri_loss: -0.01050567626953125|unsuper_loss: 0.0 +average reward score: 5.1875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.32%) |Training time=0.45s (21.00%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5157|ppo_ep: 1|act_loss: -0.030426025390625|cri_loss: -0.01453399658203125|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.50%) |Training time=0.45s (20.74%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5158|ppo_ep: 1|act_loss: 0.0006160736083984375|cri_loss: 0.0019016265869140625|unsuper_loss: 0.0 +average reward score: 5.59375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.50%) |Training time=0.45s (20.83%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.48 +[2023-04-14 11:57:31,697] [INFO] [logging.py:96:log_dist] [Rank 0] step=5160, skipped=70, lr=[3.1689355275678734e-06, 3.1689355275678734e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:57:31,715] [INFO] [timer.py:199:stop] epoch=0/micro_step=5160/global_step=5160, RunningAvgSamplesPerSec=105.19364901967187, CurrSamplesPerSec=107.49044406429995, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:57:31,809] [INFO] [logging.py:96:log_dist] [Rank 0] step=5160, skipped=82, lr=[1.652792971181065e-06, 1.652792971181065e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5159|ppo_ep: 1|act_loss: -0.00487518310546875|cri_loss: -0.0021419525146484375|unsuper_loss: 0.0 +average reward score: 6.12890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.36%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5160|ppo_ep: 1|act_loss: 0.019805908203125|cri_loss: 0.011138916015625|unsuper_loss: 0.0 +average reward score: 5.14453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.45s (21.07%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5161|ppo_ep: 1|act_loss: 0.0233612060546875|cri_loss: 0.012054443359375|unsuper_loss: 0.0 +average reward score: 5.2734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.43%) |Training time=0.45s (20.91%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5162|ppo_ep: 1|act_loss: 0.00848388671875|cri_loss: 0.004398345947265625|unsuper_loss: 0.0 +average reward score: 5.92578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.96%) |Training time=0.46s (21.42%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5163|ppo_ep: 1|act_loss: 0.00827789306640625|cri_loss: 0.004608154296875|unsuper_loss: 0.0 +average reward score: 5.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.19%) |Training time=0.48s (20.50%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5164|ppo_ep: 1|act_loss: 0.013702392578125|cri_loss: 0.00714874267578125|unsuper_loss: 0.0 +average reward score: 4.94140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.23%) |Training time=0.45s (21.11%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5165|ppo_ep: 1|act_loss: 0.0002040863037109375|cri_loss: 0.00017511844635009766|unsuper_loss: 0.0 +average reward score: 4.8125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.33%) |Training time=0.45s (21.03%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5166|ppo_ep: 1|act_loss: -0.012786865234375|cri_loss: -0.006092071533203125|unsuper_loss: 0.0 +average reward score: 5.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.39%) |Training time=0.45s (20.95%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5167|ppo_ep: 1|act_loss: -0.0180206298828125|cri_loss: -0.0085296630859375|unsuper_loss: 0.0 +average reward score: 5.51171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.22%) |Training time=0.46s (21.14%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5168|ppo_ep: 1|act_loss: 0.002552032470703125|cri_loss: 0.0014467239379882812|unsuper_loss: 0.0 +average reward score: 5.18359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.45s (21.09%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.48 +[2023-04-14 11:57:53,391] [INFO] [logging.py:96:log_dist] [Rank 0] step=5170, skipped=70, lr=[3.151500087809158e-06, 3.151500087809158e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:57:53,410] [INFO] [timer.py:199:stop] epoch=0/micro_step=5170/global_step=5170, RunningAvgSamplesPerSec=105.20113500419411, CurrSamplesPerSec=112.40855699709468, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:57:53,494] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +[2023-04-14 11:57:53,494] [INFO] [logging.py:96:log_dist] [Rank 0] step=5170, skipped=83, lr=[1.6446481647168905e-06, 1.6446481647168905e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5169|ppo_ep: 1|act_loss: -0.0045623779296875|cri_loss: -0.00177001953125|unsuper_loss: 0.0 +average reward score: 4.703125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.93%) |Training time=0.45s (20.91%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.48 +[2023-04-14 11:57:55,633] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 5170|ppo_ep: 1|act_loss: 0.01143646240234375|cri_loss: 0.00624847412109375|unsuper_loss: 0.0 +average reward score: 5.3203125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.79%) |Training time=0.45s (21.02%) |Others=0.09 (4.18%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5171|ppo_ep: 1|act_loss: 0.0195159912109375|cri_loss: 0.01044464111328125|unsuper_loss: 0.0 +average reward score: 5.30859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.68%) |Training time=0.44s (20.66%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5172|ppo_ep: 1|act_loss: -0.0211944580078125|cri_loss: -0.0100250244140625|unsuper_loss: 0.0 +average reward score: 5.875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.90%) |Training time=0.44s (20.23%) |Others=0.11 (4.87%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5173|ppo_ep: 1|act_loss: -0.00702667236328125|cri_loss: -0.0029582977294921875|unsuper_loss: 0.0 +average reward score: 5.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.37%) |Training time=0.43s (20.02%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5174|ppo_ep: 1|act_loss: -0.0139312744140625|cri_loss: -0.006595611572265625|unsuper_loss: 0.0 +average reward score: 6.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.71s (76.28%) |Training time=0.43s (19.25%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.26 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5175|ppo_ep: 1|act_loss: 0.001678466796875|cri_loss: 0.00147247314453125|unsuper_loss: 0.0 +average reward score: 4.75390625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.45s (21.15%) |Others=0.10 (4.82%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5176|ppo_ep: 1|act_loss: -0.0050201416015625|cri_loss: -0.0023059844970703125|unsuper_loss: 0.0 +average reward score: 4.6171875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.21%) |Training time=0.45s (21.12%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5177|ppo_ep: 1|act_loss: 0.0244293212890625|cri_loss: 0.01271820068359375|unsuper_loss: 0.0 +average reward score: 5.6875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.86%) |Training time=0.46s (21.47%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5178|ppo_ep: 1|act_loss: 0.016876220703125|cri_loss: 0.008758544921875|unsuper_loss: 0.0 +average reward score: 6.0859375 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (76.19%) |Training time=0.45s (19.46%) |Others=0.10 (4.35%)|CurSamplesPerSec=13.82 |AvgSamplesPerSec=14.48 +[2023-04-14 11:58:15,143] [INFO] [logging.py:96:log_dist] [Rank 0] step=5180, skipped=70, lr=[3.13408945335624e-06, 3.13408945335624e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:58:15,161] [INFO] [timer.py:199:stop] epoch=0/micro_step=5180/global_step=5180, RunningAvgSamplesPerSec=105.21499798949904, CurrSamplesPerSec=114.18577494421204, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:58:15,254] [INFO] [logging.py:96:log_dist] [Rank 0] step=5180, skipped=84, lr=[1.6365136277409998e-06, 1.6365136277409998e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5179|ppo_ep: 1|act_loss: -0.024688720703125|cri_loss: -0.0120391845703125|unsuper_loss: 0.0 +average reward score: 6.1796875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.67%) |Training time=0.44s (20.67%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5180|ppo_ep: 1|act_loss: 0.00354766845703125|cri_loss: 0.0020904541015625|unsuper_loss: 0.0 +average reward score: 4.97265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.01%) |Training time=0.43s (20.27%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5181|ppo_ep: 1|act_loss: 0.008453369140625|cri_loss: 0.0045013427734375|unsuper_loss: 0.0 +average reward score: 5.0234375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.71%) |Training time=0.44s (20.59%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5182|ppo_ep: 1|act_loss: -0.015106201171875|cri_loss: -0.006755828857421875|unsuper_loss: 0.0 +average reward score: 5.6953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.63%) |Training time=0.44s (20.71%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5183|ppo_ep: 1|act_loss: 0.004367828369140625|cri_loss: 0.002498626708984375|unsuper_loss: 0.0 +average reward score: 5.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.82%) |Training time=0.44s (20.51%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5184|ppo_ep: 1|act_loss: -0.016021728515625|cri_loss: -0.00772857666015625|unsuper_loss: 0.0 +average reward score: 5.8359375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.69%) |Training time=0.44s (20.58%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5185|ppo_ep: 1|act_loss: 0.039581298828125|cri_loss: 0.0210113525390625|unsuper_loss: 0.0 +average reward score: 5.1953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.73%) |Training time=0.44s (20.60%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5186|ppo_ep: 1|act_loss: 0.04901123046875|cri_loss: 0.0275726318359375|unsuper_loss: 0.0 +average reward score: 4.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.72%) |Training time=0.44s (20.59%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5187|ppo_ep: 1|act_loss: 0.002056121826171875|cri_loss: 0.0012464523315429688|unsuper_loss: 0.0 +average reward score: 5.76171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.83%) |Training time=0.44s (20.49%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5188|ppo_ep: 1|act_loss: 0.0018367767333984375|cri_loss: 0.001033782958984375|unsuper_loss: 0.0 +average reward score: 5.02734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.95%) |Training time=0.44s (20.40%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.48 +[2023-04-14 11:58:36,588] [INFO] [logging.py:96:log_dist] [Rank 0] step=5190, skipped=70, lr=[3.116703882276723e-06, 3.116703882276723e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:58:36,607] [INFO] [timer.py:199:stop] epoch=0/micro_step=5190/global_step=5190, RunningAvgSamplesPerSec=105.23254817548194, CurrSamplesPerSec=114.20268043044129, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:58:36,699] [INFO] [logging.py:96:log_dist] [Rank 0] step=5190, skipped=84, lr=[1.6274874165679296e-06, 1.6274874165679296e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5189|ppo_ep: 1|act_loss: -0.01806640625|cri_loss: -0.008819580078125|unsuper_loss: 0.0 +average reward score: 4.8125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.66%) |Training time=0.44s (20.70%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5190|ppo_ep: 1|act_loss: -0.032012939453125|cri_loss: -0.015045166015625|unsuper_loss: 0.0 +average reward score: 4.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.81%) |Training time=0.44s (20.50%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5191|ppo_ep: 1|act_loss: -0.02008056640625|cri_loss: -0.0090179443359375|unsuper_loss: 0.0 +average reward score: 5.01953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.64%) |Training time=0.44s (20.69%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5192|ppo_ep: 1|act_loss: -0.008148193359375|cri_loss: -0.003566741943359375|unsuper_loss: 0.0 +average reward score: 6.046875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.74%) |Training time=0.44s (20.58%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5193|ppo_ep: 1|act_loss: -0.00064849853515625|cri_loss: -0.000164031982421875|unsuper_loss: 0.0 +average reward score: 4.40234375 +------------------------------------------------------------------------------------- +|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.68%) |Training time=0.44s (18.98%) |Others=0.10 (4.34%)|CurSamplesPerSec=13.89 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5194|ppo_ep: 1|act_loss: -0.017242431640625|cri_loss: -0.0084228515625|unsuper_loss: 0.0 +average reward score: 5.2734375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.61%) |Training time=0.44s (20.71%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5195|ppo_ep: 1|act_loss: 0.015167236328125|cri_loss: 0.00780487060546875|unsuper_loss: 0.0 +average reward score: 4.328125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.60%) |Training time=0.44s (20.73%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5196|ppo_ep: 1|act_loss: 0.0142059326171875|cri_loss: 0.00827789306640625|unsuper_loss: 0.0 +average reward score: 4.98046875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.64%) |Training time=0.44s (20.69%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5197|ppo_ep: 1|act_loss: 0.00450897216796875|cri_loss: 0.0025196075439453125|unsuper_loss: 0.0 +average reward score: 4.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.35%) |Training time=0.45s (21.01%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5198|ppo_ep: 1|act_loss: -0.00450897216796875|cri_loss: -0.00199127197265625|unsuper_loss: 0.0 +average reward score: 5.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.76%) |Training time=0.44s (20.58%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.48 +[2023-04-14 11:58:58,181] [INFO] [logging.py:96:log_dist] [Rank 0] step=5200, skipped=70, lr=[3.0993436322667104e-06, 3.0993436322667104e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:58:58,199] [INFO] [timer.py:199:stop] epoch=0/micro_step=5200/global_step=5200, RunningAvgSamplesPerSec=105.24831529325668, CurrSamplesPerSec=111.86488639962661, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:58:58,292] [INFO] [logging.py:96:log_dist] [Rank 0] step=5200, skipped=84, lr=[1.6184741381361684e-06, 1.6184741381361684e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5199|ppo_ep: 1|act_loss: -0.01104736328125|cri_loss: -0.004955291748046875|unsuper_loss: 0.0 +average reward score: 4.76953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.46%) |Training time=0.45s (20.90%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5200|ppo_ep: 1|act_loss: 0.0209808349609375|cri_loss: 0.0107574462890625|unsuper_loss: 0.0 +average reward score: 4.74609375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.99%) |Training time=0.44s (20.34%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5201|ppo_ep: 1|act_loss: 0.00376129150390625|cri_loss: 0.002201080322265625|unsuper_loss: 0.0 +average reward score: 6.21484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.92%) |Training time=0.44s (20.44%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5202|ppo_ep: 1|act_loss: -0.01300048828125|cri_loss: -0.0059051513671875|unsuper_loss: 0.0 +average reward score: 5.15625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.65%) |Training time=0.44s (20.67%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5203|ppo_ep: 1|act_loss: -0.0003268718719482422|cri_loss: -6.699562072753906e-05|unsuper_loss: 0.0 +average reward score: 5.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.53%) |Training time=0.46s (20.87%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5204|ppo_ep: 1|act_loss: 0.033721923828125|cri_loss: 0.01727294921875|unsuper_loss: 0.0 +average reward score: 5.1171875 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.71s (76.03%) |Training time=0.44s (19.50%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.24 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5205|ppo_ep: 1|act_loss: 0.0024261474609375|cri_loss: 0.0014257431030273438|unsuper_loss: 0.0 +average reward score: 5.38671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.59%) |Training time=0.44s (20.70%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5206|ppo_ep: 1|act_loss: -0.006072998046875|cri_loss: -0.0028514862060546875|unsuper_loss: 0.0 +average reward score: 4.96484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.85%) |Training time=0.46s (21.48%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5207|ppo_ep: 1|act_loss: 0.021270751953125|cri_loss: 0.01160430908203125|unsuper_loss: 0.0 +average reward score: 6.6015625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.08%) |Training time=0.46s (21.23%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5208|ppo_ep: 1|act_loss: -0.006290435791015625|cri_loss: -0.0029544830322265625|unsuper_loss: 0.0 +average reward score: 4.88671875 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.67%) |Training time=0.46s (20.01%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.48 +[2023-04-14 11:59:19,968] [INFO] [logging.py:96:log_dist] [Rank 0] step=5210, skipped=70, lr=[3.0820089606469873e-06, 3.0820089606469873e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:59:19,986] [INFO] [timer.py:199:stop] epoch=0/micro_step=5210/global_step=5210, RunningAvgSamplesPerSec=105.25921683045586, CurrSamplesPerSec=106.84806254005278, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:59:20,078] [INFO] [logging.py:96:log_dist] [Rank 0] step=5210, skipped=84, lr=[1.6094739260442448e-06, 1.6094739260442448e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5209|ppo_ep: 1|act_loss: -0.01486968994140625|cri_loss: -0.007076263427734375|unsuper_loss: 0.0 +average reward score: 4.4296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.82%) |Training time=0.46s (21.55%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5210|ppo_ep: 1|act_loss: -0.00803375244140625|cri_loss: -0.0034542083740234375|unsuper_loss: 0.0 +average reward score: 4.625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.94%) |Training time=0.46s (21.38%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5211|ppo_ep: 1|act_loss: 0.0084381103515625|cri_loss: 0.004375457763671875|unsuper_loss: 0.0 +average reward score: 5.1015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.67%) |Training time=0.47s (21.64%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5212|ppo_ep: 1|act_loss: 0.0023822784423828125|cri_loss: 0.0013141632080078125|unsuper_loss: 0.0 +average reward score: 5.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.41%) |Training time=0.45s (20.92%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5213|ppo_ep: 1|act_loss: 0.011505126953125|cri_loss: 0.00585174560546875|unsuper_loss: 0.0 +average reward score: 5.5703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.61%) |Training time=0.45s (20.73%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5214|ppo_ep: 1|act_loss: 0.05206298828125|cri_loss: 0.0285491943359375|unsuper_loss: 0.0 +average reward score: 4.56640625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.65%) |Training time=0.44s (20.68%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5215|ppo_ep: 1|act_loss: -0.00803375244140625|cri_loss: -0.00379180908203125|unsuper_loss: 0.0 +average reward score: 5.59765625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.45s (20.80%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5216|ppo_ep: 1|act_loss: -0.02471923828125|cri_loss: -0.0115203857421875|unsuper_loss: 0.0 +average reward score: 4.6796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (20.98%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5217|ppo_ep: 1|act_loss: -0.0132598876953125|cri_loss: -0.006458282470703125|unsuper_loss: 0.0 +average reward score: 5.859375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.45s (20.76%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5218|ppo_ep: 1|act_loss: 0.0123748779296875|cri_loss: 0.00687408447265625|unsuper_loss: 0.0 +average reward score: 5.87109375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.45%) |Training time=0.42s (19.84%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.48 +[2023-04-14 11:59:41,416] [INFO] [logging.py:96:log_dist] [Rank 0] step=5220, skipped=70, lr=[3.0647001243592035e-06, 3.0647001243592035e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 11:59:41,434] [INFO] [timer.py:199:stop] epoch=0/micro_step=5220/global_step=5220, RunningAvgSamplesPerSec=105.27236962780096, CurrSamplesPerSec=115.6615648442179, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 11:59:41,527] [INFO] [logging.py:96:log_dist] [Rank 0] step=5220, skipped=84, lr=[1.6004869136970152e-06, 1.6004869136970152e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5219|ppo_ep: 1|act_loss: -0.003662109375|cri_loss: -0.0016155242919921875|unsuper_loss: 0.0 +average reward score: 5.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.81%) |Training time=0.44s (20.53%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5220|ppo_ep: 1|act_loss: -0.03924560546875|cri_loss: -0.01666259765625|unsuper_loss: 0.0 +average reward score: 4.4921875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.43%) |Training time=0.45s (20.89%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5221|ppo_ep: 1|act_loss: -0.0004165172576904297|cri_loss: -7.677078247070312e-05|unsuper_loss: 0.0 +average reward score: 4.9765625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.15%) |Training time=0.45s (21.02%) |Others=0.10 (4.83%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5222|ppo_ep: 1|act_loss: -0.00646209716796875|cri_loss: -0.002994537353515625|unsuper_loss: 0.0 +average reward score: 5.828125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.34%) |Training time=0.45s (20.99%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5223|ppo_ep: 1|act_loss: 0.0120849609375|cri_loss: 0.006378173828125|unsuper_loss: 0.0 +average reward score: 4.95703125 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.76s (76.22%) |Training time=0.45s (19.42%) |Others=0.10 (4.36%)|CurSamplesPerSec=13.86 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5224|ppo_ep: 1|act_loss: 0.036102294921875|cri_loss: 0.0192718505859375|unsuper_loss: 0.0 +average reward score: 5.59375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.34%) |Training time=0.45s (21.00%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5225|ppo_ep: 1|act_loss: -0.00815582275390625|cri_loss: -0.0037250518798828125|unsuper_loss: 0.0 +average reward score: 5.4453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.51%) |Training time=0.45s (20.87%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5226|ppo_ep: 1|act_loss: -0.00539398193359375|cri_loss: -0.002559661865234375|unsuper_loss: 0.0 +average reward score: 4.5859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.65%) |Training time=0.44s (20.66%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5227|ppo_ep: 1|act_loss: 0.01152801513671875|cri_loss: 0.006114959716796875|unsuper_loss: 0.0 +average reward score: 5.23046875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.39%) |Training time=0.45s (20.95%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5228|ppo_ep: 1|act_loss: -0.00246429443359375|cri_loss: -0.0006666183471679688|unsuper_loss: 0.0 +average reward score: 5.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.34%) |Training time=0.45s (21.01%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.48 +[2023-04-14 12:00:03,045] [INFO] [logging.py:96:log_dist] [Rank 0] step=5230, skipped=70, lr=[3.04741737996207e-06, 3.04741737996207e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:00:03,063] [INFO] [timer.py:199:stop] epoch=0/micro_step=5230/global_step=5230, RunningAvgSamplesPerSec=105.28411092329453, CurrSamplesPerSec=113.17489367012611, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:00:03,156] [INFO] [logging.py:96:log_dist] [Rank 0] step=5230, skipped=84, lr=[1.591513234303681e-06, 1.591513234303681e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5229|ppo_ep: 1|act_loss: 0.0034198760986328125|cri_loss: 0.0023326873779296875|unsuper_loss: 0.0 +average reward score: 5.30078125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.55%) |Training time=0.45s (20.78%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5230|ppo_ep: 1|act_loss: 0.002960205078125|cri_loss: 0.001796722412109375|unsuper_loss: 0.0 +average reward score: 5.58203125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.62%) |Training time=0.47s (21.72%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5231|ppo_ep: 1|act_loss: 0.0132293701171875|cri_loss: 0.00711822509765625|unsuper_loss: 0.0 +average reward score: 4.48828125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.59%) |Training time=0.47s (21.74%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5232|ppo_ep: 1|act_loss: -0.0017986297607421875|cri_loss: -0.0006337165832519531|unsuper_loss: 0.0 +average reward score: 5.44921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.91%) |Training time=0.46s (21.44%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5233|ppo_ep: 1|act_loss: 0.0102081298828125|cri_loss: 0.00537109375|unsuper_loss: 0.0 +average reward score: 5.6015625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.53%) |Training time=0.45s (20.69%) |Others=0.10 (4.78%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5234|ppo_ep: 1|act_loss: -0.009002685546875|cri_loss: -0.004108428955078125|unsuper_loss: 0.0 +average reward score: 5.44921875 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.69s (75.68%) |Training time=0.44s (19.84%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.31 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5235|ppo_ep: 1|act_loss: -0.029693603515625|cri_loss: -0.014312744140625|unsuper_loss: 0.0 +average reward score: 5.37109375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.74%) |Training time=0.46s (21.58%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5236|ppo_ep: 1|act_loss: -0.00720977783203125|cri_loss: -0.0032634735107421875|unsuper_loss: 0.0 +average reward score: 5.20703125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.52%) |Training time=0.44s (20.78%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5237|ppo_ep: 1|act_loss: -0.004589080810546875|cri_loss: -0.0019083023071289062|unsuper_loss: 0.0 +average reward score: 5.37109375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.62%) |Training time=0.44s (20.72%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5238|ppo_ep: 1|act_loss: 0.00301361083984375|cri_loss: 0.00196075439453125|unsuper_loss: 0.0 +average reward score: 4.9375 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.70%) |Training time=0.44s (19.01%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.48 +[2023-04-14 12:00:24,816] [INFO] [logging.py:96:log_dist] [Rank 0] step=5240, skipped=70, lr=[3.0301609836275488e-06, 3.0301609836275488e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:00:24,834] [INFO] [timer.py:199:stop] epoch=0/micro_step=5240/global_step=5240, RunningAvgSamplesPerSec=105.29263773127231, CurrSamplesPerSec=111.4146109844281, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:00:24,926] [INFO] [logging.py:96:log_dist] [Rank 0] step=5240, skipped=84, lr=[1.58255302087582e-06, 1.58255302087582e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5239|ppo_ep: 1|act_loss: 0.002498626708984375|cri_loss: 0.0015401840209960938|unsuper_loss: 0.0 +average reward score: 4.515625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.36%) |Training time=0.45s (20.98%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5240|ppo_ep: 1|act_loss: 0.0056915283203125|cri_loss: 0.0032215118408203125|unsuper_loss: 0.0 +average reward score: 5.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.39%) |Training time=0.45s (20.94%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5241|ppo_ep: 1|act_loss: 0.02130126953125|cri_loss: 0.01110076904296875|unsuper_loss: 0.0 +average reward score: 4.5234375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.42%) |Training time=0.45s (20.89%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5242|ppo_ep: 1|act_loss: 0.1400146484375|cri_loss: 0.08123779296875|unsuper_loss: 0.0 +average reward score: 5.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.40%) |Training time=0.45s (20.93%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.48 +epoch: 0|step: 5243|ppo_ep: 1|act_loss: 0.0140228271484375|cri_loss: 0.00774383544921875|unsuper_loss: 0.0 +average reward score: 5.0859375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.50%) |Training time=0.45s (20.86%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5244|ppo_ep: 1|act_loss: 0.0035495758056640625|cri_loss: 0.00228118896484375|unsuper_loss: 0.0 +average reward score: 5.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.48%) |Training time=0.45s (20.84%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5245|ppo_ep: 1|act_loss: 0.01265716552734375|cri_loss: 0.006671905517578125|unsuper_loss: 0.0 +average reward score: 4.2734375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.61%) |Training time=0.44s (20.70%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5246|ppo_ep: 1|act_loss: -0.0294189453125|cri_loss: -0.01203155517578125|unsuper_loss: 0.0 +average reward score: 4.44140625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.60%) |Training time=0.44s (20.74%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5247|ppo_ep: 1|act_loss: -0.04681396484375|cri_loss: -0.0164337158203125|unsuper_loss: 0.0 +average reward score: 5.25390625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.59%) |Training time=0.44s (20.71%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5248|ppo_ep: 1|act_loss: -0.00452423095703125|cri_loss: -0.0018215179443359375|unsuper_loss: 0.0 +average reward score: 4.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.49%) |Training time=0.45s (20.84%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.49 +[2023-04-14 12:00:46,261] [INFO] [logging.py:96:log_dist] [Rank 0] step=5250, skipped=70, lr=[3.012931191137065e-06, 3.012931191137065e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:00:46,279] [INFO] [timer.py:199:stop] epoch=0/micro_step=5250/global_step=5250, RunningAvgSamplesPerSec=105.3044718220239, CurrSamplesPerSec=103.76530800000619, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:00:46,372] [INFO] [logging.py:96:log_dist] [Rank 0] step=5250, skipped=84, lr=[1.5736064062254094e-06, 1.5736064062254094e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5249|ppo_ep: 1|act_loss: -0.00366973876953125|cri_loss: -0.0006198883056640625|unsuper_loss: 0.0 +average reward score: 4.3671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.70%) |Training time=0.47s (21.73%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5250|ppo_ep: 1|act_loss: 0.0237884521484375|cri_loss: 0.0128631591796875|unsuper_loss: 0.0 +average reward score: 4.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.69%) |Training time=0.44s (20.61%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5251|ppo_ep: 1|act_loss: -0.011444091796875|cri_loss: -0.005619049072265625|unsuper_loss: 0.0 +average reward score: 4.7109375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.61%) |Training time=0.44s (20.71%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5252|ppo_ep: 1|act_loss: -0.0035037994384765625|cri_loss: -0.0016393661499023438|unsuper_loss: 0.0 +average reward score: 4.2109375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.52%) |Training time=0.45s (20.83%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5253|ppo_ep: 1|act_loss: -0.0030460357666015625|cri_loss: -0.0012302398681640625|unsuper_loss: 0.0 +average reward score: 5.62109375 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.53%) |Training time=0.44s (19.12%) |Others=0.10 (4.35%)|CurSamplesPerSec=13.80 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5254|ppo_ep: 1|act_loss: 0.006267547607421875|cri_loss: 0.0032138824462890625|unsuper_loss: 0.0 +average reward score: 5.19921875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.44%) |Training time=0.45s (20.88%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5255|ppo_ep: 1|act_loss: 0.012847900390625|cri_loss: 0.006938934326171875|unsuper_loss: 0.0 +average reward score: 5.234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.09%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5256|ppo_ep: 1|act_loss: -0.0014257431030273438|cri_loss: -0.0003261566162109375|unsuper_loss: 0.0 +average reward score: 5.234375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.45s (20.79%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5257|ppo_ep: 1|act_loss: 0.0113677978515625|cri_loss: 0.005828857421875|unsuper_loss: 0.0 +average reward score: 5.19921875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.49%) |Training time=0.45s (20.83%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5258|ppo_ep: 1|act_loss: -0.03814697265625|cri_loss: -0.0180816650390625|unsuper_loss: 0.0 +average reward score: 4.875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.84%) |Training time=0.44s (20.47%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49 +[2023-04-14 12:01:07,849] [INFO] [logging.py:96:log_dist] [Rank 0] step=5260, skipped=70, lr=[2.9957282578777047e-06, 2.9957282578777047e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:01:07,867] [INFO] [timer.py:199:stop] epoch=0/micro_step=5260/global_step=5260, RunningAvgSamplesPerSec=105.31875428619172, CurrSamplesPerSec=111.46577904454388, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:01:07,960] [INFO] [logging.py:96:log_dist] [Rank 0] step=5260, skipped=84, lr=[1.5646735229628619e-06, 1.5646735229628619e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5259|ppo_ep: 1|act_loss: 0.012237548828125|cri_loss: 0.00710296630859375|unsuper_loss: 0.0 +average reward score: 4.65625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.40%) |Training time=0.45s (20.96%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5260|ppo_ep: 1|act_loss: -0.004589080810546875|cri_loss: -0.0017518997192382812|unsuper_loss: 0.0 +average reward score: 4.87890625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.58%) |Training time=0.44s (20.75%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5261|ppo_ep: 1|act_loss: -0.028533935546875|cri_loss: -0.0138702392578125|unsuper_loss: 0.0 +average reward score: 4.8359375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.50%) |Training time=0.45s (20.83%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5262|ppo_ep: 1|act_loss: -0.03631591796875|cri_loss: -0.017333984375|unsuper_loss: 0.0 +average reward score: 4.24609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.52%) |Training time=0.45s (20.83%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5263|ppo_ep: 1|act_loss: -0.007282257080078125|cri_loss: -0.003139495849609375|unsuper_loss: 0.0 +average reward score: 5.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.21%) |Training time=0.49s (22.26%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.44 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5264|ppo_ep: 1|act_loss: -0.0225830078125|cri_loss: -0.0083465576171875|unsuper_loss: 0.0 +average reward score: 5.3125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.81%) |Training time=0.44s (20.50%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5265|ppo_ep: 1|act_loss: -0.0156707763671875|cri_loss: -0.00771331787109375|unsuper_loss: 0.0 +average reward score: 5.08203125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.73%) |Training time=0.44s (20.57%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5266|ppo_ep: 1|act_loss: 0.018646240234375|cri_loss: 0.01043701171875|unsuper_loss: 0.0 +average reward score: 4.8046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.37%) |Training time=0.45s (20.97%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5267|ppo_ep: 1|act_loss: -0.0035762786865234375|cri_loss: -0.0013942718505859375|unsuper_loss: 0.0 +average reward score: 5.6328125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.50%) |Training time=0.45s (20.80%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5268|ppo_ep: 1|act_loss: 0.0124969482421875|cri_loss: 0.00662994384765625|unsuper_loss: 0.0 +average reward score: 4.65234375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.14%) |Training time=0.48s (22.14%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.49 +[2023-04-14 12:01:29,408] [INFO] [logging.py:96:log_dist] [Rank 0] step=5270, skipped=70, lr=[2.978552438838442e-06, 2.978552438838442e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:01:29,426] [INFO] [timer.py:199:stop] epoch=0/micro_step=5270/global_step=5270, RunningAvgSamplesPerSec=105.33081301411826, CurrSamplesPerSec=112.13020758057145, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:01:29,519] [INFO] [logging.py:96:log_dist] [Rank 0] step=5270, skipped=84, lr=[1.5557545034950558e-06, 1.5557545034950558e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5269|ppo_ep: 1|act_loss: 0.025146484375|cri_loss: 0.01287841796875|unsuper_loss: 0.0 +average reward score: 4.71875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.46%) |Training time=0.45s (20.89%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5270|ppo_ep: 1|act_loss: 0.0148773193359375|cri_loss: 0.007671356201171875|unsuper_loss: 0.0 +average reward score: 5.5859375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.57%) |Training time=0.44s (20.75%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.49 +[2023-04-14 12:01:33,792] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 5271|ppo_ep: 1|act_loss: 0.027099609375|cri_loss: 0.013824462890625|unsuper_loss: 0.0 +average reward score: 5.21484375 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.05%) |Training time=0.44s (20.73%) |Others=0.09 (4.22%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.49 +[2023-04-14 12:01:35,924] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 5272|ppo_ep: 1|act_loss: 0.023406982421875|cri_loss: 0.0120086669921875|unsuper_loss: 0.0 +average reward score: 4.51953125 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.87%) |Training time=0.45s (20.94%) |Others=0.09 (4.19%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5273|ppo_ep: 1|act_loss: 0.009521484375|cri_loss: 0.005828857421875|unsuper_loss: 0.0 +average reward score: 4.5546875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.51%) |Training time=0.45s (20.81%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5274|ppo_ep: 1|act_loss: 0.0012540817260742188|cri_loss: 0.0006866455078125|unsuper_loss: 0.0 +average reward score: 5.36328125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.61%) |Training time=0.45s (20.72%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5275|ppo_ep: 1|act_loss: 0.0109710693359375|cri_loss: 0.005619049072265625|unsuper_loss: 0.0 +average reward score: 5.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.55%) |Training time=0.44s (20.79%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5276|ppo_ep: 1|act_loss: -0.01338958740234375|cri_loss: -0.006500244140625|unsuper_loss: 0.0 +average reward score: 4.58203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (20.92%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5277|ppo_ep: 1|act_loss: -0.017333984375|cri_loss: -0.008331298828125|unsuper_loss: 0.0 +average reward score: 5.984375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.64%) |Training time=0.44s (20.66%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5278|ppo_ep: 1|act_loss: -0.021484375|cri_loss: -0.00748443603515625|unsuper_loss: 0.0 +average reward score: 5.3671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.53%) |Training time=0.44s (20.81%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49 +[2023-04-14 12:01:50,806] [INFO] [logging.py:96:log_dist] [Rank 0] step=5280, skipped=70, lr=[2.9614039886063483e-06, 2.9614039886063483e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:01:50,824] [INFO] [timer.py:199:stop] epoch=0/micro_step=5280/global_step=5280, RunningAvgSamplesPerSec=105.34468671241142, CurrSamplesPerSec=112.25146359250908, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:01:50,917] [INFO] [logging.py:96:log_dist] [Rank 0] step=5280, skipped=86, lr=[1.5486293586993107e-06, 1.5486293586993107e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5279|ppo_ep: 1|act_loss: -0.0190277099609375|cri_loss: -0.0081939697265625|unsuper_loss: 0.0 +average reward score: 5.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.48%) |Training time=0.45s (20.88%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5280|ppo_ep: 1|act_loss: -0.025604248046875|cri_loss: -0.01247406005859375|unsuper_loss: 0.0 +average reward score: 5.6875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.99%) |Training time=0.46s (21.38%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5281|ppo_ep: 1|act_loss: -0.0189208984375|cri_loss: -0.0078582763671875|unsuper_loss: 0.0 +average reward score: 5.2109375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.22%) |Training time=0.46s (21.17%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5282|ppo_ep: 1|act_loss: 0.01389312744140625|cri_loss: 0.00717926025390625|unsuper_loss: 0.0 +average reward score: 4.76171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.47%) |Training time=0.45s (20.88%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5283|ppo_ep: 1|act_loss: 0.03839111328125|cri_loss: 0.02044677734375|unsuper_loss: 0.0 +average reward score: 4.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.64s (72.98%) |Training time=0.50s (22.23%) |Others=0.11 (4.79%)|CurSamplesPerSec=14.24 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5284|ppo_ep: 1|act_loss: 0.01202392578125|cri_loss: 0.0062408447265625|unsuper_loss: 0.0 +average reward score: 5.13671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.95%) |Training time=0.46s (21.40%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5285|ppo_ep: 1|act_loss: 0.0244903564453125|cri_loss: 0.01262664794921875|unsuper_loss: 0.0 +average reward score: 4.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.11%) |Training time=0.46s (21.25%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5286|ppo_ep: 1|act_loss: 0.04376220703125|cri_loss: 0.0239715576171875|unsuper_loss: 0.0 +average reward score: 5.5859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.98%) |Training time=0.46s (21.35%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5287|ppo_ep: 1|act_loss: -0.0039215087890625|cri_loss: -0.0018444061279296875|unsuper_loss: 0.0 +average reward score: 4.80078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (20.96%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5288|ppo_ep: 1|act_loss: -0.0125885009765625|cri_loss: -0.004970550537109375|unsuper_loss: 0.0 +average reward score: 5.9296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.32%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.49 +[2023-04-14 12:02:12,452] [INFO] [logging.py:96:log_dist] [Rank 0] step=5290, skipped=70, lr=[2.9442831613628225e-06, 2.9442831613628225e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:02:12,471] [INFO] [timer.py:199:stop] epoch=0/micro_step=5290/global_step=5290, RunningAvgSamplesPerSec=105.35216406875476, CurrSamplesPerSec=109.4159719697328, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:02:12,563] [INFO] [logging.py:96:log_dist] [Rank 0] step=5290, skipped=86, lr=[1.5397356270701858e-06, 1.5397356270701858e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5289|ppo_ep: 1|act_loss: 0.0186767578125|cri_loss: 0.009857177734375|unsuper_loss: 0.0 +average reward score: 5.7578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.18%) |Training time=0.46s (21.19%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5290|ppo_ep: 1|act_loss: -0.0070648193359375|cri_loss: -0.00283050537109375|unsuper_loss: 0.0 +average reward score: 4.8828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.30%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5291|ppo_ep: 1|act_loss: 0.008026123046875|cri_loss: 0.00466156005859375|unsuper_loss: 0.0 +average reward score: 4.828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.33%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5292|ppo_ep: 1|act_loss: 0.007476806640625|cri_loss: 0.00417327880859375|unsuper_loss: 0.0 +average reward score: 4.8515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.34%) |Training time=0.45s (21.00%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5293|ppo_ep: 1|act_loss: 0.0211029052734375|cri_loss: 0.0108184814453125|unsuper_loss: 0.0 +average reward score: 4.484375 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.49%) |Training time=0.46s (20.11%) |Others=0.10 (4.40%)|CurSamplesPerSec=14.06 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5294|ppo_ep: 1|act_loss: 0.002414703369140625|cri_loss: 0.001720428466796875|unsuper_loss: 0.0 +average reward score: 5.109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.36%) |Training time=0.45s (20.99%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5295|ppo_ep: 1|act_loss: 0.03570556640625|cri_loss: 0.0189056396484375|unsuper_loss: 0.0 +average reward score: 5.8125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.67%) |Training time=0.45s (20.66%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5296|ppo_ep: 1|act_loss: 0.05523681640625|cri_loss: 0.029052734375|unsuper_loss: 0.0 +average reward score: 4.55078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.60%) |Training time=0.45s (20.72%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5297|ppo_ep: 1|act_loss: 0.0104522705078125|cri_loss: 0.00646209716796875|unsuper_loss: 0.0 +average reward score: 4.38671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.45s (20.79%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5298|ppo_ep: 1|act_loss: -0.01441192626953125|cri_loss: -0.0060272216796875|unsuper_loss: 0.0 +average reward score: 4.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.59s (70.73%) |Training time=0.48s (21.46%) |Others=0.18 (7.80%)|CurSamplesPerSec=14.20 |AvgSamplesPerSec=14.49 +[2023-04-14 12:02:34,208] [INFO] [logging.py:96:log_dist] [Rank 0] step=5300, skipped=70, lr=[2.927190210879829e-06, 2.927190210879829e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:02:34,226] [INFO] [timer.py:199:stop] epoch=0/micro_step=5300/global_step=5300, RunningAvgSamplesPerSec=105.36035663269071, CurrSamplesPerSec=112.99862095906488, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:02:34,319] [INFO] [logging.py:96:log_dist] [Rank 0] step=5300, skipped=86, lr=[1.5308561288755794e-06, 1.5308561288755794e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5299|ppo_ep: 1|act_loss: -0.004306793212890625|cri_loss: -0.001880645751953125|unsuper_loss: 0.0 +average reward score: 4.703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.52%) |Training time=0.45s (20.77%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5300|ppo_ep: 1|act_loss: -0.01861572265625|cri_loss: -0.008331298828125|unsuper_loss: 0.0 +average reward score: 5.01171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.45s (21.15%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5301|ppo_ep: 1|act_loss: 0.006916046142578125|cri_loss: 0.0038356781005859375|unsuper_loss: 0.0 +average reward score: 4.9296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.45s (21.13%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5302|ppo_ep: 1|act_loss: -0.03961181640625|cri_loss: -0.018798828125|unsuper_loss: 0.0 +average reward score: 4.8984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.45s (21.06%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5303|ppo_ep: 1|act_loss: 0.013214111328125|cri_loss: 0.008453369140625|unsuper_loss: 0.0 +average reward score: 4.42578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.05%) |Training time=0.46s (21.31%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5304|ppo_ep: 1|act_loss: -0.0126495361328125|cri_loss: -0.0062103271484375|unsuper_loss: 0.0 +average reward score: 4.890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.09%) |Training time=0.46s (21.25%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5305|ppo_ep: 1|act_loss: 0.01092529296875|cri_loss: 0.005596160888671875|unsuper_loss: 0.0 +average reward score: 6.02734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.23%) |Training time=0.45s (21.11%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5306|ppo_ep: 1|act_loss: -0.0031070709228515625|cri_loss: -0.000335693359375|unsuper_loss: 0.0 +average reward score: 4.3125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.00%) |Training time=0.46s (21.37%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5307|ppo_ep: 1|act_loss: -0.013458251953125|cri_loss: -0.00453948974609375|unsuper_loss: 0.0 +average reward score: 5.23828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.05%) |Training time=0.46s (21.27%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5308|ppo_ep: 1|act_loss: 0.03436279296875|cri_loss: 0.0176849365234375|unsuper_loss: 0.0 +average reward score: 5.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.10%) |Training time=0.46s (21.26%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.49 +[2023-04-14 12:02:55,722] [INFO] [logging.py:96:log_dist] [Rank 0] step=5310, skipped=70, lr=[2.910125390516126e-06, 2.910125390516126e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:02:55,740] [INFO] [timer.py:199:stop] epoch=0/micro_step=5310/global_step=5310, RunningAvgSamplesPerSec=105.3658343095268, CurrSamplesPerSec=102.50064569932177, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:02:55,833] [INFO] [logging.py:96:log_dist] [Rank 0] step=5310, skipped=86, lr=[1.521990995731075e-06, 1.521990995731075e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5309|ppo_ep: 1|act_loss: 0.00766754150390625|cri_loss: 0.00418853759765625|unsuper_loss: 0.0 +average reward score: 5.671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.90%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5310|ppo_ep: 1|act_loss: 0.01290130615234375|cri_loss: 0.007411956787109375|unsuper_loss: 0.0 +average reward score: 5.0234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.11%) |Training time=0.46s (21.28%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5311|ppo_ep: 1|act_loss: 0.0115509033203125|cri_loss: 0.00615692138671875|unsuper_loss: 0.0 +average reward score: 5.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.70%) |Training time=0.44s (20.63%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5312|ppo_ep: 1|act_loss: 0.018096923828125|cri_loss: 0.01111602783203125|unsuper_loss: 0.0 +average reward score: 4.6015625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.32%) |Training time=0.45s (21.01%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5313|ppo_ep: 1|act_loss: -0.019775390625|cri_loss: -0.00923919677734375|unsuper_loss: 0.0 +average reward score: 4.77734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.16%) |Training time=0.45s (21.11%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5314|ppo_ep: 1|act_loss: 0.0172271728515625|cri_loss: 0.0090484619140625|unsuper_loss: 0.0 +average reward score: 5.8359375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.29%) |Training time=0.45s (21.04%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5315|ppo_ep: 1|act_loss: -0.00021028518676757812|cri_loss: 0.0001347064971923828|unsuper_loss: 0.0 +average reward score: 5.19140625 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.67%) |Training time=0.47s (20.04%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5316|ppo_ep: 1|act_loss: -0.00426483154296875|cri_loss: -0.000179290771484375|unsuper_loss: 0.0 +average reward score: 5.34375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.09%) |Training time=0.46s (21.28%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5317|ppo_ep: 1|act_loss: -0.0338134765625|cri_loss: -0.0160064697265625|unsuper_loss: 0.0 +average reward score: 5.75 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.47s (21.99%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5318|ppo_ep: 1|act_loss: -0.045074462890625|cri_loss: -0.022003173828125|unsuper_loss: 0.0 +average reward score: 4.75390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.87%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.49 +[2023-04-14 12:03:17,417] [INFO] [logging.py:96:log_dist] [Rank 0] step=5320, skipped=70, lr=[2.89308895321352e-06, 2.89308895321352e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:03:17,435] [INFO] [timer.py:199:stop] epoch=0/micro_step=5320/global_step=5320, RunningAvgSamplesPerSec=105.3698549311855, CurrSamplesPerSec=103.94926211193594, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:03:17,527] [INFO] [logging.py:96:log_dist] [Rank 0] step=5320, skipped=86, lr=[1.5131403590393323e-06, 1.5131403590393323e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5319|ppo_ep: 1|act_loss: 0.01412200927734375|cri_loss: 0.007457733154296875|unsuper_loss: 0.0 +average reward score: 5.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.48%) |Training time=0.47s (21.90%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5320|ppo_ep: 1|act_loss: 0.02178955078125|cri_loss: 0.01132965087890625|unsuper_loss: 0.0 +average reward score: 5.10546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.15%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5321|ppo_ep: 1|act_loss: 0.0303497314453125|cri_loss: 0.0167083740234375|unsuper_loss: 0.0 +average reward score: 5.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.42%) |Training time=0.47s (21.92%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5322|ppo_ep: 1|act_loss: -0.023895263671875|cri_loss: -0.0081329345703125|unsuper_loss: 0.0 +average reward score: 4.63671875 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.54%) |Training time=0.50s (22.91%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5323|ppo_ep: 1|act_loss: -0.0027008056640625|cri_loss: -0.001178741455078125|unsuper_loss: 0.0 +average reward score: 5.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.68s (73.67%) |Training time=0.50s (21.88%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.02 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5324|ppo_ep: 1|act_loss: 0.0748291015625|cri_loss: 0.04034423828125|unsuper_loss: 0.0 +average reward score: 5.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.33%) |Training time=0.47s (21.99%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5325|ppo_ep: 1|act_loss: -0.0117340087890625|cri_loss: -0.00556182861328125|unsuper_loss: 0.0 +average reward score: 5.19921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.41%) |Training time=0.45s (20.92%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5326|ppo_ep: 1|act_loss: 0.0284271240234375|cri_loss: 0.0148162841796875|unsuper_loss: 0.0 +average reward score: 4.53515625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.49%) |Training time=0.45s (20.80%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5327|ppo_ep: 1|act_loss: 0.02947998046875|cri_loss: 0.01580810546875|unsuper_loss: 0.0 +average reward score: 5.3125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.47s (21.84%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5328|ppo_ep: 1|act_loss: 0.0015621185302734375|cri_loss: 0.0009093284606933594|unsuper_loss: 0.0 +average reward score: 4.6015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.74%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.49 +[2023-04-14 12:03:39,193] [INFO] [logging.py:96:log_dist] [Rank 0] step=5330, skipped=70, lr=[2.8760811514931076e-06, 2.8760811514931076e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:03:39,211] [INFO] [timer.py:199:stop] epoch=0/micro_step=5330/global_step=5330, RunningAvgSamplesPerSec=105.36426483738646, CurrSamplesPerSec=100.47936851593158, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:03:39,303] [INFO] [logging.py:96:log_dist] [Rank 0] step=5330, skipped=86, lr=[1.5043043499881378e-06, 1.5043043499881378e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5329|ppo_ep: 1|act_loss: 0.021636962890625|cri_loss: 0.0137786865234375|unsuper_loss: 0.0 +average reward score: 5.13671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.29%) |Training time=0.48s (22.12%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5330|ppo_ep: 1|act_loss: 0.02362060546875|cri_loss: 0.013427734375|unsuper_loss: 0.0 +average reward score: 5.1953125 +------------------------------------------------------------------------------------- +|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.20%) |Training time=0.48s (20.50%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.65 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5331|ppo_ep: 1|act_loss: 0.01336669921875|cri_loss: 0.00736236572265625|unsuper_loss: 0.0 +average reward score: 5.44921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.37%) |Training time=0.45s (20.97%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5332|ppo_ep: 1|act_loss: 0.004276275634765625|cri_loss: 0.0023345947265625|unsuper_loss: 0.0 +average reward score: 5.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.40%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5333|ppo_ep: 1|act_loss: -0.00860595703125|cri_loss: -0.003955841064453125|unsuper_loss: 0.0 +average reward score: 4.453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.86%) |Training time=0.46s (21.50%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5334|ppo_ep: 1|act_loss: -0.0253448486328125|cri_loss: -0.0120697021484375|unsuper_loss: 0.0 +average reward score: 4.625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.16%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5335|ppo_ep: 1|act_loss: -0.0122528076171875|cri_loss: -0.005382537841796875|unsuper_loss: 0.0 +average reward score: 6.08984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.22%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5336|ppo_ep: 1|act_loss: -0.0132598876953125|cri_loss: -0.005645751953125|unsuper_loss: 0.0 +average reward score: 4.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.55%) |Training time=0.46s (21.13%) |Others=0.12 (5.32%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5337|ppo_ep: 1|act_loss: 0.0557861328125|cri_loss: 0.02984619140625|unsuper_loss: 0.0 +average reward score: 4.90234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.13%) |Training time=0.46s (21.22%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5338|ppo_ep: 1|act_loss: 0.036956787109375|cri_loss: 0.01953125|unsuper_loss: 0.0 +average reward score: 4.61328125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.21%) |Training time=0.46s (21.16%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.49 +[2023-04-14 12:04:00,950] [INFO] [logging.py:96:log_dist] [Rank 0] step=5340, skipped=70, lr=[2.8591022374515417e-06, 2.8591022374515417e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:04:00,968] [INFO] [timer.py:199:stop] epoch=0/micro_step=5340/global_step=5340, RunningAvgSamplesPerSec=105.36797444530119, CurrSamplesPerSec=107.08180623625253, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:04:01,061] [INFO] [logging.py:96:log_dist] [Rank 0] step=5340, skipped=86, lr=[1.495483099548462e-06, 1.495483099548462e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5339|ppo_ep: 1|act_loss: 0.0024166107177734375|cri_loss: 0.0014972686767578125|unsuper_loss: 0.0 +average reward score: 5.10546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.99%) |Training time=0.46s (21.39%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5340|ppo_ep: 1|act_loss: 0.0214385986328125|cri_loss: 0.013397216796875|unsuper_loss: 0.0 +average reward score: 4.5234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.30%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5341|ppo_ep: 1|act_loss: 0.0792236328125|cri_loss: 0.044647216796875|unsuper_loss: 0.0 +average reward score: 4.71484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.24%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5342|ppo_ep: 1|act_loss: -0.01378631591796875|cri_loss: -0.006000518798828125|unsuper_loss: 0.0 +average reward score: 4.6953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.06%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5343|ppo_ep: 1|act_loss: 0.0090484619140625|cri_loss: 0.005382537841796875|unsuper_loss: 0.0 +average reward score: 4.8984375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.87%) |Training time=0.44s (20.43%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5344|ppo_ep: 1|act_loss: -0.015045166015625|cri_loss: -0.00681304931640625|unsuper_loss: 0.0 +average reward score: 5.390625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.14%) |Training time=0.49s (22.28%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5345|ppo_ep: 1|act_loss: -0.0557861328125|cri_loss: -0.0258026123046875|unsuper_loss: 0.0 +average reward score: 5.00390625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.78%) |Training time=0.44s (20.50%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5346|ppo_ep: 1|act_loss: -0.0288238525390625|cri_loss: -0.01389312744140625|unsuper_loss: 0.0 +average reward score: 4.21484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.86%) |Training time=0.44s (20.44%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5347|ppo_ep: 1|act_loss: -0.026397705078125|cri_loss: -0.01287841796875|unsuper_loss: 0.0 +average reward score: 5.21875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.83%) |Training time=0.44s (20.45%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5348|ppo_ep: 1|act_loss: -0.0306396484375|cri_loss: -0.0144195556640625|unsuper_loss: 0.0 +average reward score: 5.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.61%) |Training time=0.44s (20.63%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.49 +[2023-04-14 12:04:22,444] [INFO] [logging.py:96:log_dist] [Rank 0] step=5350, skipped=70, lr=[2.8421524627572866e-06, 2.8421524627572866e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:04:22,463] [INFO] [timer.py:199:stop] epoch=0/micro_step=5350/global_step=5350, RunningAvgSamplesPerSec=105.38047175709768, CurrSamplesPerSec=112.89862840396556, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:04:22,555] [INFO] [logging.py:96:log_dist] [Rank 0] step=5350, skipped=86, lr=[1.486676738472515e-06, 1.486676738472515e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5349|ppo_ep: 1|act_loss: 0.0217742919921875|cri_loss: 0.01186370849609375|unsuper_loss: 0.0 +average reward score: 4.67578125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.53%) |Training time=0.45s (20.81%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5350|ppo_ep: 1|act_loss: 0.1251220703125|cri_loss: 0.07086181640625|unsuper_loss: 0.0 +average reward score: 4.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.77%) |Training time=0.44s (20.55%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5351|ppo_ep: 1|act_loss: 0.006988525390625|cri_loss: 0.003627777099609375|unsuper_loss: 0.0 +average reward score: 6.07421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.70%) |Training time=0.44s (20.65%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5352|ppo_ep: 1|act_loss: 0.0302734375|cri_loss: 0.0184783935546875|unsuper_loss: 0.0 +average reward score: 4.58984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.89%) |Training time=0.45s (20.49%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5353|ppo_ep: 1|act_loss: 0.1055908203125|cri_loss: 0.05938720703125|unsuper_loss: 0.0 +average reward score: 4.58203125 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.76%) |Training time=0.44s (19.74%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.28 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5354|ppo_ep: 1|act_loss: -0.09637451171875|cri_loss: -0.041259765625|unsuper_loss: 0.0 +average reward score: 4.77734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.59%) |Training time=0.45s (20.76%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5355|ppo_ep: 1|act_loss: -0.01036834716796875|cri_loss: -0.00457000732421875|unsuper_loss: 0.0 +average reward score: 5.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.89%) |Training time=0.44s (20.39%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5356|ppo_ep: 1|act_loss: -0.0211181640625|cri_loss: -0.0093994140625|unsuper_loss: 0.0 +average reward score: 4.48046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.31%) |Training time=0.45s (21.05%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5357|ppo_ep: 1|act_loss: 0.004425048828125|cri_loss: 0.0028228759765625|unsuper_loss: 0.0 +average reward score: 4.546875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.54%) |Training time=0.44s (20.79%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5358|ppo_ep: 1|act_loss: 0.0533447265625|cri_loss: 0.0287322998046875|unsuper_loss: 0.0 +average reward score: 5.3515625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.56%) |Training time=0.44s (20.71%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.49 +[2023-04-14 12:04:44,014] [INFO] [logging.py:96:log_dist] [Rank 0] step=5360, skipped=70, lr=[2.8252320786468936e-06, 2.8252320786468936e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:04:44,033] [INFO] [timer.py:199:stop] epoch=0/micro_step=5360/global_step=5360, RunningAvgSamplesPerSec=105.39471890271973, CurrSamplesPerSec=112.58941413422879, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:04:44,125] [INFO] [logging.py:96:log_dist] [Rank 0] step=5360, skipped=86, lr=[1.477885397291814e-06, 1.477885397291814e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5359|ppo_ep: 1|act_loss: 0.03546142578125|cri_loss: 0.018402099609375|unsuper_loss: 0.0 +average reward score: 4.31640625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.50%) |Training time=0.45s (20.85%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5360|ppo_ep: 1|act_loss: -0.01268768310546875|cri_loss: -0.005405426025390625|unsuper_loss: 0.0 +average reward score: 4.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.74s (75.74%) |Training time=0.46s (19.90%) |Others=0.10 (4.36%)|CurSamplesPerSec=13.89 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5361|ppo_ep: 1|act_loss: 0.218994140625|cri_loss: 0.1324462890625|unsuper_loss: 0.0 +average reward score: 4.7578125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.71%) |Training time=0.46s (21.61%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5362|ppo_ep: 1|act_loss: -0.01126861572265625|cri_loss: -0.0054168701171875|unsuper_loss: 0.0 +average reward score: 5.40625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.68%) |Training time=0.46s (21.64%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5363|ppo_ep: 1|act_loss: 0.004543304443359375|cri_loss: 0.0038318634033203125|unsuper_loss: 0.0 +average reward score: 4.3125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.77%) |Training time=0.46s (21.55%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5364|ppo_ep: 1|act_loss: 0.0152435302734375|cri_loss: 0.0079498291015625|unsuper_loss: 0.0 +average reward score: 4.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.62%) |Training time=0.46s (21.70%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5365|ppo_ep: 1|act_loss: 0.0078277587890625|cri_loss: 0.00411224365234375|unsuper_loss: 0.0 +average reward score: 5.47265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.76%) |Training time=0.46s (21.54%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5366|ppo_ep: 1|act_loss: 0.002910614013671875|cri_loss: 0.0019168853759765625|unsuper_loss: 0.0 +average reward score: 4.17578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.55%) |Training time=0.47s (21.76%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5367|ppo_ep: 1|act_loss: -0.0033702850341796875|cri_loss: -0.0005016326904296875|unsuper_loss: 0.0 +average reward score: 4.6875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.72%) |Training time=0.46s (21.60%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5368|ppo_ep: 1|act_loss: 0.004711151123046875|cri_loss: 0.0026111602783203125|unsuper_loss: 0.0 +average reward score: 4.46484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.81%) |Training time=0.46s (21.50%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49 +[2023-04-14 12:05:05,578] [INFO] [logging.py:96:log_dist] [Rank 0] step=5370, skipped=70, lr=[2.808341335921272e-06, 2.808341335921272e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:05:05,596] [INFO] [timer.py:199:stop] epoch=0/micro_step=5370/global_step=5370, RunningAvgSamplesPerSec=105.39691619380453, CurrSamplesPerSec=104.9207163684404, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:05:05,689] [INFO] [logging.py:96:log_dist] [Rank 0] step=5370, skipped=86, lr=[1.4691092063152417e-06, 1.4691092063152417e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5369|ppo_ep: 1|act_loss: -0.0125885009765625|cri_loss: -0.006072998046875|unsuper_loss: 0.0 +average reward score: 4.87890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.57%) |Training time=0.47s (21.79%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5370|ppo_ep: 1|act_loss: -0.005306243896484375|cri_loss: -0.002552032470703125|unsuper_loss: 0.0 +average reward score: 4.66796875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.59%) |Training time=0.47s (21.74%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5371|ppo_ep: 1|act_loss: -0.149658203125|cri_loss: -0.0469970703125|unsuper_loss: 0.0 +average reward score: 4.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.23%) |Training time=0.48s (22.12%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5372|ppo_ep: 1|act_loss: -0.0189666748046875|cri_loss: -0.009185791015625|unsuper_loss: 0.0 +average reward score: 4.51953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.37%) |Training time=0.45s (20.93%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.49 +[2023-04-14 12:05:14,283] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 5373|ppo_ep: 1|act_loss: 0.0158843994140625|cri_loss: 0.0085906982421875|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.09%) |Training time=0.46s (21.71%) |Others=0.09 (4.19%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.49 +[2023-04-14 12:05:16,416] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 5374|ppo_ep: 1|act_loss: -0.03131103515625|cri_loss: -0.0146484375|unsuper_loss: 0.0 +average reward score: 4.73046875 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.21%) |Training time=0.46s (21.59%) |Others=0.09 (4.19%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5375|ppo_ep: 1|act_loss: -0.0276336669921875|cri_loss: -0.0133819580078125|unsuper_loss: 0.0 +average reward score: 4.17578125 +------------------------------------------------------------------------------------- +|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.90%) |Training time=0.45s (19.74%) |Others=0.10 (4.36%)|CurSamplesPerSec=13.88 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5376|ppo_ep: 1|act_loss: -0.02777099609375|cri_loss: -0.01239776611328125|unsuper_loss: 0.0 +average reward score: 4.32421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.54%) |Training time=0.47s (21.74%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5377|ppo_ep: 1|act_loss: -0.025543212890625|cri_loss: -0.00637054443359375|unsuper_loss: 0.0 +average reward score: 4.20703125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.72%) |Training time=0.46s (21.62%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5378|ppo_ep: 1|act_loss: -0.023193359375|cri_loss: -0.011199951171875|unsuper_loss: 0.0 +average reward score: 4.1875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.79%) |Training time=0.46s (21.51%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.49 +[2023-04-14 12:05:27,179] [INFO] [logging.py:96:log_dist] [Rank 0] step=5380, skipped=70, lr=[2.7914804849419754e-06, 2.7914804849419754e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:05:27,197] [INFO] [timer.py:199:stop] epoch=0/micro_step=5380/global_step=5380, RunningAvgSamplesPerSec=105.39935198633347, CurrSamplesPerSec=106.7170614225843, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:05:27,294] [INFO] [logging.py:96:log_dist] [Rank 0] step=5380, skipped=88, lr=[1.462099249105216e-06, 1.462099249105216e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5379|ppo_ep: 1|act_loss: 0.0011959075927734375|cri_loss: 0.00070953369140625|unsuper_loss: 0.0 +average reward score: 4.671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.70%) |Others=0.10 (4.82%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5380|ppo_ep: 1|act_loss: -0.0013675689697265625|cri_loss: 0.0005321502685546875|unsuper_loss: 0.0 +average reward score: 5.3046875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.72%) |Training time=0.46s (21.61%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5381|ppo_ep: 1|act_loss: 0.01001739501953125|cri_loss: 0.0052490234375|unsuper_loss: 0.0 +average reward score: 5.57421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.84%) |Training time=0.46s (21.13%) |Others=0.11 (5.03%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5382|ppo_ep: 1|act_loss: 0.0014362335205078125|cri_loss: 0.0013036727905273438|unsuper_loss: 0.0 +average reward score: 4.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.42%) |Training time=0.48s (22.01%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5383|ppo_ep: 1|act_loss: 0.016265869140625|cri_loss: 0.00885772705078125|unsuper_loss: 0.0 +average reward score: 5.296875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.61%) |Training time=0.47s (21.73%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5384|ppo_ep: 1|act_loss: 0.021728515625|cri_loss: 0.01099395751953125|unsuper_loss: 0.0 +average reward score: 4.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.82%) |Training time=0.46s (21.51%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5385|ppo_ep: 1|act_loss: 0.01617431640625|cri_loss: 0.008880615234375|unsuper_loss: 0.0 +average reward score: 4.75 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.90%) |Training time=0.46s (21.43%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5386|ppo_ep: 1|act_loss: 0.01033782958984375|cri_loss: 0.005344390869140625|unsuper_loss: 0.0 +average reward score: 4.85546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.00%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5387|ppo_ep: 1|act_loss: 0.0189056396484375|cri_loss: 0.0101318359375|unsuper_loss: 0.0 +average reward score: 5.10546875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.06%) |Training time=0.46s (21.26%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5388|ppo_ep: 1|act_loss: -0.087646484375|cri_loss: -0.0062255859375|unsuper_loss: 0.0 +average reward score: 4.29296875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.74%) |Training time=0.46s (21.53%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.49 +[2023-04-14 12:05:48,661] [INFO] [logging.py:96:log_dist] [Rank 0] step=5390, skipped=70, lr=[2.774649775627491e-06, 2.774649775627491e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:05:48,679] [INFO] [timer.py:199:stop] epoch=0/micro_step=5390/global_step=5390, RunningAvgSamplesPerSec=105.40158110513609, CurrSamplesPerSec=108.0063218002324, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:05:48,771] [INFO] [logging.py:96:log_dist] [Rank 0] step=5390, skipped=88, lr=[1.4533506561564305e-06, 1.4533506561564305e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5389|ppo_ep: 1|act_loss: -0.0211029052734375|cri_loss: -0.0100860595703125|unsuper_loss: 0.0 +average reward score: 3.73828125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.64%) |Training time=0.46s (21.40%) |Others=0.11 (4.95%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5390|ppo_ep: 1|act_loss: 0.06304931640625|cri_loss: 0.03387451171875|unsuper_loss: 0.0 +average reward score: 4.76171875 +------------------------------------------------------------------------------------- +|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.20%) |Training time=0.46s (20.09%) |Others=0.11 (4.71%)|CurSamplesPerSec=14.00 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5391|ppo_ep: 1|act_loss: -0.00705718994140625|cri_loss: -0.00333404541015625|unsuper_loss: 0.0 +average reward score: 4.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.79%) |Training time=0.46s (21.55%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5392|ppo_ep: 1|act_loss: 0.036102294921875|cri_loss: 0.0203704833984375|unsuper_loss: 0.0 +average reward score: 5.90234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.04%) |Training time=0.48s (22.32%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5393|ppo_ep: 1|act_loss: -0.0137176513671875|cri_loss: -0.0059967041015625|unsuper_loss: 0.0 +average reward score: 5.15234375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.58%) |Training time=0.47s (21.74%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5394|ppo_ep: 1|act_loss: 0.02825927734375|cri_loss: 0.0151519775390625|unsuper_loss: 0.0 +average reward score: 5.45703125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.93%) |Training time=0.46s (21.40%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5395|ppo_ep: 1|act_loss: 0.01000213623046875|cri_loss: 0.00513458251953125|unsuper_loss: 0.0 +average reward score: 4.45703125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.70%) |Training time=0.46s (21.51%) |Others=0.10 (4.78%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5396|ppo_ep: 1|act_loss: -0.00942230224609375|cri_loss: -0.00244140625|unsuper_loss: 0.0 +average reward score: 5.80078125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.76%) |Training time=0.46s (21.54%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5397|ppo_ep: 1|act_loss: -0.0196685791015625|cri_loss: -0.0056610107421875|unsuper_loss: 0.0 +average reward score: 3.70703125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.95%) |Training time=0.46s (21.39%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5398|ppo_ep: 1|act_loss: -0.02227783203125|cri_loss: -0.0088958740234375|unsuper_loss: 0.0 +average reward score: 4.4453125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.66%) |Training time=0.46s (21.68%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.49 +[2023-04-14 12:06:10,231] [INFO] [logging.py:96:log_dist] [Rank 0] step=5400, skipped=70, lr=[2.757849457449532e-06, 2.757849457449532e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:06:10,249] [INFO] [timer.py:199:stop] epoch=0/micro_step=5400/global_step=5400, RunningAvgSamplesPerSec=105.40350440139551, CurrSamplesPerSec=107.39463033761628, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:06:10,342] [INFO] [logging.py:96:log_dist] [Rank 0] step=5400, skipped=88, lr=[1.4446175770758245e-06, 1.4446175770758245e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5399|ppo_ep: 1|act_loss: -0.012451171875|cri_loss: -0.00595855712890625|unsuper_loss: 0.0 +average reward score: 4.16015625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.82%) |Training time=0.46s (21.52%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5400|ppo_ep: 1|act_loss: -0.0237579345703125|cri_loss: -0.011627197265625|unsuper_loss: 0.0 +average reward score: 4.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.45%) |Training time=0.47s (21.91%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5401|ppo_ep: 1|act_loss: -0.0233154296875|cri_loss: -0.0113372802734375|unsuper_loss: 0.0 +average reward score: 4.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.63%) |Training time=0.46s (21.64%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5402|ppo_ep: 1|act_loss: -0.0163421630859375|cri_loss: -0.00795745849609375|unsuper_loss: 0.0 +average reward score: 4.75390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.00%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5403|ppo_ep: 1|act_loss: -0.006744384765625|cri_loss: -0.0031108856201171875|unsuper_loss: 0.0 +average reward score: 4.984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.76%) |Training time=0.49s (22.64%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5404|ppo_ep: 1|act_loss: -0.060516357421875|cri_loss: -0.02740478515625|unsuper_loss: 0.0 +average reward score: 4.55859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.88%) |Training time=0.49s (22.52%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5405|ppo_ep: 1|act_loss: -0.004638671875|cri_loss: -0.0005283355712890625|unsuper_loss: 0.0 +average reward score: 4.53515625 +------------------------------------------------------------------------------------- +|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.57%) |Training time=0.50s (21.16%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.59 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5406|ppo_ep: 1|act_loss: 0.019073486328125|cri_loss: 0.0097198486328125|unsuper_loss: 0.0 +average reward score: 5.9375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.69%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5407|ppo_ep: 1|act_loss: 0.024169921875|cri_loss: 0.012420654296875|unsuper_loss: 0.0 +average reward score: 4.74609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.50%) |Training time=0.47s (21.87%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5408|ppo_ep: 1|act_loss: 0.007778167724609375|cri_loss: 0.0053558349609375|unsuper_loss: 0.0 +average reward score: 5.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.33%) |Training time=0.48s (22.02%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.49 +[2023-04-14 12:06:31,997] [INFO] [logging.py:96:log_dist] [Rank 0] step=5410, skipped=70, lr=[2.7410797794293427e-06, 2.7410797794293427e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:06:32,015] [INFO] [timer.py:199:stop] epoch=0/micro_step=5410/global_step=5410, RunningAvgSamplesPerSec=105.39676190547151, CurrSamplesPerSec=106.50273442583405, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:06:32,108] [INFO] [logging.py:96:log_dist] [Rank 0] step=5410, skipped=88, lr=[1.4359001413086976e-06, 1.4359001413086976e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5409|ppo_ep: 1|act_loss: -0.00499725341796875|cri_loss: -0.002079010009765625|unsuper_loss: 0.0 +average reward score: 5.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.84%) |Training time=0.46s (21.52%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5410|ppo_ep: 1|act_loss: -0.00609588623046875|cri_loss: -0.002689361572265625|unsuper_loss: 0.0 +average reward score: 4.4453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.46s (21.41%) |Others=0.11 (4.88%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5411|ppo_ep: 1|act_loss: 0.01546478271484375|cri_loss: 0.00794219970703125|unsuper_loss: 0.0 +average reward score: 5.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.50%) |Training time=0.49s (22.34%) |Others=0.11 (5.15%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5412|ppo_ep: 1|act_loss: -0.0040435791015625|cri_loss: -0.0018091201782226562|unsuper_loss: 0.0 +average reward score: 4.921875 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.68s (73.97%) |Training time=0.49s (21.62%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.05 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5413|ppo_ep: 1|act_loss: 0.0044708251953125|cri_loss: 0.0024280548095703125|unsuper_loss: 0.0 +average reward score: 5.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.81%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.49 +epoch: 0|step: 5414|ppo_ep: 1|act_loss: -0.035888671875|cri_loss: -0.0156097412109375|unsuper_loss: 0.0 +average reward score: 5.58984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.52%) |Training time=0.47s (21.80%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5415|ppo_ep: 1|act_loss: 0.0011444091796875|cri_loss: 0.0019741058349609375|unsuper_loss: 0.0 +average reward score: 6.15625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.42%) |Training time=0.47s (21.96%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5416|ppo_ep: 1|act_loss: -0.031219482421875|cri_loss: -0.01428985595703125|unsuper_loss: 0.0 +average reward score: 5.03515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.37%) |Training time=0.47s (21.97%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5417|ppo_ep: 1|act_loss: -0.01873779296875|cri_loss: -0.005611419677734375|unsuper_loss: 0.0 +average reward score: 5.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.59%) |Training time=0.47s (21.79%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5418|ppo_ep: 1|act_loss: 0.0180816650390625|cri_loss: 0.0113677978515625|unsuper_loss: 0.0 +average reward score: 4.5703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.21%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50 +[2023-04-14 12:06:53,724] [INFO] [logging.py:96:log_dist] [Rank 0] step=5420, skipped=70, lr=[2.724340990134006e-06, 2.724340990134006e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:06:53,742] [INFO] [timer.py:199:stop] epoch=0/micro_step=5420/global_step=5420, RunningAvgSamplesPerSec=105.39063099074835, CurrSamplesPerSec=103.22773090500765, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:06:53,834] [INFO] [logging.py:96:log_dist] [Rank 0] step=5420, skipped=88, lr=[1.4271984780684778e-06, 1.4271984780684778e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5419|ppo_ep: 1|act_loss: 1.4781951904296875e-05|cri_loss: 0.00043654441833496094|unsuper_loss: 0.0 +average reward score: 5.55859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.47s (21.99%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5420|ppo_ep: 1|act_loss: -0.0020809173583984375|cri_loss: -0.0009274482727050781|unsuper_loss: 0.0 +average reward score: 4.046875 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.58%) |Training time=0.50s (21.18%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.57 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5421|ppo_ep: 1|act_loss: 0.0184173583984375|cri_loss: 0.00998687744140625|unsuper_loss: 0.0 +average reward score: 4.9921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.58%) |Training time=0.47s (21.68%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5422|ppo_ep: 1|act_loss: 0.0377197265625|cri_loss: 0.019622802734375|unsuper_loss: 0.0 +average reward score: 6.3125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.48s (22.14%) |Others=0.10 (4.80%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5423|ppo_ep: 1|act_loss: 0.007061004638671875|cri_loss: 0.0047607421875|unsuper_loss: 0.0 +average reward score: 5.26171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.13%) |Training time=0.48s (22.23%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5424|ppo_ep: 1|act_loss: -0.003932952880859375|cri_loss: -0.0017566680908203125|unsuper_loss: 0.0 +average reward score: 4.0078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.37%) |Training time=0.47s (21.97%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5425|ppo_ep: 1|act_loss: 0.0092010498046875|cri_loss: 0.004894256591796875|unsuper_loss: 0.0 +average reward score: 5.08984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.17%) |Training time=0.48s (22.21%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5426|ppo_ep: 1|act_loss: 0.0012369155883789062|cri_loss: 0.0012493133544921875|unsuper_loss: 0.0 +average reward score: 4.671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.31%) |Training time=0.47s (22.03%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5427|ppo_ep: 1|act_loss: 0.018707275390625|cri_loss: 0.01065826416015625|unsuper_loss: 0.0 +average reward score: 5.1875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.21%) |Training time=0.48s (22.14%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5428|ppo_ep: 1|act_loss: -0.0004367828369140625|cri_loss: 4.744529724121094e-05|unsuper_loss: 0.0 +average reward score: 6.0 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.10%) |Training time=0.48s (22.26%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.50 +[2023-04-14 12:07:15,439] [INFO] [logging.py:96:log_dist] [Rank 0] step=5430, skipped=70, lr=[2.7076333376727596e-06, 2.7076333376727596e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:07:15,458] [INFO] [timer.py:199:stop] epoch=0/micro_step=5430/global_step=5430, RunningAvgSamplesPerSec=105.38353117169935, CurrSamplesPerSec=104.13257693310327, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:07:15,550] [INFO] [logging.py:96:log_dist] [Rank 0] step=5430, skipped=88, lr=[1.4185127163348057e-06, 1.4185127163348057e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5429|ppo_ep: 1|act_loss: -0.0153350830078125|cri_loss: -0.007526397705078125|unsuper_loss: 0.0 +average reward score: 5.08984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.87%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5430|ppo_ep: 1|act_loss: -0.00426483154296875|cri_loss: -0.002044677734375|unsuper_loss: 0.0 +average reward score: 5.65234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.30%) |Training time=0.47s (22.05%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5431|ppo_ep: 1|act_loss: 0.0103607177734375|cri_loss: 0.0079803466796875|unsuper_loss: 0.0 +average reward score: 5.43359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.25%) |Training time=0.48s (22.08%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5432|ppo_ep: 1|act_loss: 0.003936767578125|cri_loss: 0.00244903564453125|unsuper_loss: 0.0 +average reward score: 5.59765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (21.94%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5433|ppo_ep: 1|act_loss: -0.005859375|cri_loss: -0.00262451171875|unsuper_loss: 0.0 +average reward score: 5.69921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.47s (21.79%) |Others=0.10 (4.78%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5434|ppo_ep: 1|act_loss: 0.02886962890625|cri_loss: 0.01512908935546875|unsuper_loss: 0.0 +average reward score: 4.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.96%) |Training time=0.49s (22.45%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5435|ppo_ep: 1|act_loss: 0.0050048828125|cri_loss: 0.0025997161865234375|unsuper_loss: 0.0 +average reward score: 4.9765625 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.87%) |Training time=0.49s (21.83%) |Others=0.14 (6.30%)|CurSamplesPerSec=14.40 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5436|ppo_ep: 1|act_loss: -0.01375579833984375|cri_loss: -0.006671905517578125|unsuper_loss: 0.0 +average reward score: 5.58984375 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.68s (74.51%) |Training time=0.47s (20.98%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.19 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5437|ppo_ep: 1|act_loss: 0.007053375244140625|cri_loss: 0.00396728515625|unsuper_loss: 0.0 +average reward score: 5.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.48%) |Training time=0.48s (21.94%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5438|ppo_ep: 1|act_loss: -0.014312744140625|cri_loss: -0.0068359375|unsuper_loss: 0.0 +average reward score: 5.15625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.68%) |Training time=0.47s (21.71%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.50 +[2023-04-14 12:07:37,238] [INFO] [logging.py:96:log_dist] [Rank 0] step=5440, skipped=70, lr=[2.69095706969332e-06, 2.69095706969332e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:07:37,257] [INFO] [timer.py:199:stop] epoch=0/micro_step=5440/global_step=5440, RunningAvgSamplesPerSec=105.37736171331294, CurrSamplesPerSec=105.67992189222387, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:07:37,349] [INFO] [logging.py:96:log_dist] [Rank 0] step=5440, skipped=88, lr=[1.4098429848516231e-06, 1.4098429848516231e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5439|ppo_ep: 1|act_loss: -0.002811431884765625|cri_loss: -0.0012006759643554688|unsuper_loss: 0.0 +average reward score: 4.47265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.88%) |Training time=0.47s (21.51%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5440|ppo_ep: 1|act_loss: -0.0201873779296875|cri_loss: -0.00963592529296875|unsuper_loss: 0.0 +average reward score: 5.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.59%) |Training time=0.48s (21.83%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5441|ppo_ep: 1|act_loss: -0.0162506103515625|cri_loss: -0.007709503173828125|unsuper_loss: 0.0 +average reward score: 4.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.19%) |Training time=0.47s (21.26%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5442|ppo_ep: 1|act_loss: 0.10546875|cri_loss: 0.05682373046875|unsuper_loss: 0.0 +average reward score: 5.04296875 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.90%) |Training time=0.47s (20.68%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.08 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5443|ppo_ep: 1|act_loss: 0.0166015625|cri_loss: 0.0086212158203125|unsuper_loss: 0.0 +average reward score: 5.61328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.87%) |Training time=0.46s (21.49%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5444|ppo_ep: 1|act_loss: -0.0185394287109375|cri_loss: -0.0087432861328125|unsuper_loss: 0.0 +average reward score: 4.95703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.70%) |Training time=0.47s (21.67%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5445|ppo_ep: 1|act_loss: 0.0125274658203125|cri_loss: 0.00646209716796875|unsuper_loss: 0.0 +average reward score: 6.6328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.80%) |Training time=0.47s (21.55%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5446|ppo_ep: 1|act_loss: 0.0301513671875|cri_loss: 0.0159759521484375|unsuper_loss: 0.0 +average reward score: 5.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.79%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5447|ppo_ep: 1|act_loss: 0.0225982666015625|cri_loss: 0.01230621337890625|unsuper_loss: 0.0 +average reward score: 5.01953125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.62%) |Training time=0.47s (21.69%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5448|ppo_ep: 1|act_loss: -0.020538330078125|cri_loss: -0.009857177734375|unsuper_loss: 0.0 +average reward score: 5.35546875 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.44%) |Training time=0.48s (21.91%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.50 +[2023-04-14 12:07:59,090] [INFO] [logging.py:96:log_dist] [Rank 0] step=5450, skipped=70, lr=[2.6743124333782032e-06, 2.6743124333782032e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:07:59,109] [INFO] [timer.py:199:stop] epoch=0/micro_step=5450/global_step=5450, RunningAvgSamplesPerSec=105.37402939249111, CurrSamplesPerSec=102.53619653180074, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:07:59,202] [INFO] [logging.py:96:log_dist] [Rank 0] step=5450, skipped=88, lr=[1.4011894121252672e-06, 1.4011894121252672e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5449|ppo_ep: 1|act_loss: -0.00958251953125|cri_loss: -0.00408935546875|unsuper_loss: 0.0 +average reward score: 5.09375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.47s (21.90%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5450|ppo_ep: 1|act_loss: 0.00853729248046875|cri_loss: 0.004505157470703125|unsuper_loss: 0.0 +average reward score: 4.96875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.04%) |Training time=0.49s (22.37%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5451|ppo_ep: 1|act_loss: -0.00821685791015625|cri_loss: -0.0039215087890625|unsuper_loss: 0.0 +average reward score: 4.9140625 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.30%) |Training time=0.48s (20.40%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5452|ppo_ep: 1|act_loss: -0.01424407958984375|cri_loss: -0.006511688232421875|unsuper_loss: 0.0 +average reward score: 5.15625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.13%) |Training time=0.48s (22.23%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5453|ppo_ep: 1|act_loss: -0.027374267578125|cri_loss: -0.0124053955078125|unsuper_loss: 0.0 +average reward score: 4.53515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.47s (21.87%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5454|ppo_ep: 1|act_loss: -0.0054931640625|cri_loss: -0.002529144287109375|unsuper_loss: 0.0 +average reward score: 5.34765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.47s (21.88%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5455|ppo_ep: 1|act_loss: -0.007137298583984375|cri_loss: -0.003376007080078125|unsuper_loss: 0.0 +average reward score: 5.12109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.67%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5456|ppo_ep: 1|act_loss: 0.020599365234375|cri_loss: 0.0107574462890625|unsuper_loss: 0.0 +average reward score: 5.07421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.81%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5457|ppo_ep: 1|act_loss: -0.03179931640625|cri_loss: -0.01404571533203125|unsuper_loss: 0.0 +average reward score: 4.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.82%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5458|ppo_ep: 1|act_loss: 0.0020694732666015625|cri_loss: 0.0013256072998046875|unsuper_loss: 0.0 +average reward score: 4.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.79%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50 +[2023-04-14 12:08:20,904] [INFO] [logging.py:96:log_dist] [Rank 0] step=5460, skipped=70, lr=[2.6576996754410752e-06, 2.6576996754410752e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:08:20,922] [INFO] [timer.py:199:stop] epoch=0/micro_step=5460/global_step=5460, RunningAvgSamplesPerSec=105.36840179701572, CurrSamplesPerSec=102.48804438627545, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:08:21,015] [INFO] [logging.py:96:log_dist] [Rank 0] step=5460, skipped=88, lr=[1.3925521264225587e-06, 1.3925521264225587e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5459|ppo_ep: 1|act_loss: -0.00409698486328125|cri_loss: -0.0016803741455078125|unsuper_loss: 0.0 +average reward score: 5.59375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.47s (21.93%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5460|ppo_ep: 1|act_loss: 0.01171875|cri_loss: 0.0059967041015625|unsuper_loss: 0.0 +average reward score: 4.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.62%) |Training time=0.47s (21.73%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5461|ppo_ep: 1|act_loss: -0.004016876220703125|cri_loss: 0.001621246337890625|unsuper_loss: 0.0 +average reward score: 5.6953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.93%) |Training time=0.46s (21.44%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5462|ppo_ep: 1|act_loss: -0.0219879150390625|cri_loss: -0.0105743408203125|unsuper_loss: 0.0 +average reward score: 4.41015625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.87%) |Training time=0.47s (21.52%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5463|ppo_ep: 1|act_loss: 0.010955810546875|cri_loss: 0.0068206787109375|unsuper_loss: 0.0 +average reward score: 4.73828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.75%) |Training time=0.47s (21.62%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5464|ppo_ep: 1|act_loss: 0.005199432373046875|cri_loss: 0.0028209686279296875|unsuper_loss: 0.0 +average reward score: 4.35546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.90%) |Training time=0.46s (21.47%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5465|ppo_ep: 1|act_loss: 0.00040841102600097656|cri_loss: 0.0008087158203125|unsuper_loss: 0.0 +average reward score: 5.42578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.86%) |Training time=0.46s (21.49%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5466|ppo_ep: 1|act_loss: -0.018341064453125|cri_loss: -0.00862884521484375|unsuper_loss: 0.0 +average reward score: 5.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.94%) |Training time=0.48s (20.75%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5467|ppo_ep: 1|act_loss: 0.006927490234375|cri_loss: 0.0036773681640625|unsuper_loss: 0.0 +average reward score: 5.17578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.06%) |Training time=0.48s (22.30%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5468|ppo_ep: 1|act_loss: 0.01983642578125|cri_loss: 0.01010894775390625|unsuper_loss: 0.0 +average reward score: 5.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.14%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50 +[2023-04-14 12:08:42,709] [INFO] [logging.py:96:log_dist] [Rank 0] step=5470, skipped=70, lr=[2.641119042123085e-06, 2.641119042123085e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:08:42,727] [INFO] [timer.py:199:stop] epoch=0/micro_step=5470/global_step=5470, RunningAvgSamplesPerSec=105.36424124670562, CurrSamplesPerSec=99.5353366808979, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:08:42,821] [INFO] [logging.py:96:log_dist] [Rank 0] step=5470, skipped=88, lr=[1.383931255768909e-06, 1.383931255768909e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5469|ppo_ep: 1|act_loss: 0.00701904296875|cri_loss: 0.00399017333984375|unsuper_loss: 0.0 +average reward score: 5.46484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.97%) |Training time=0.48s (22.37%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5470|ppo_ep: 1|act_loss: -0.0755615234375|cri_loss: -0.035247802734375|unsuper_loss: 0.0 +average reward score: 5.08203125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.50%) |Training time=0.49s (22.24%) |Others=0.12 (5.27%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5471|ppo_ep: 1|act_loss: -0.0297088623046875|cri_loss: -0.013824462890625|unsuper_loss: 0.0 +average reward score: 7.29296875 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.68s (74.39%) |Training time=0.48s (21.17%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.15 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5472|ppo_ep: 1|act_loss: -0.00472259521484375|cri_loss: -0.0017681121826171875|unsuper_loss: 0.0 +average reward score: 5.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.72%) |Training time=0.49s (22.65%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5473|ppo_ep: 1|act_loss: -0.00201416015625|cri_loss: -0.00042247772216796875|unsuper_loss: 0.0 +average reward score: 5.02734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.74%) |Training time=0.49s (22.47%) |Others=0.10 (4.79%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5474|ppo_ep: 1|act_loss: 0.040985107421875|cri_loss: 0.02191162109375|unsuper_loss: 0.0 +average reward score: 4.43359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.97%) |Training time=0.48s (22.40%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50 +[2023-04-14 12:08:55,918] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 5475|ppo_ep: 1|act_loss: -0.01708984375|cri_loss: -0.0081634521484375|unsuper_loss: 0.0 +average reward score: 5.72265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.26%) |Training time=0.49s (22.58%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.50 +[2023-04-14 12:08:58,069] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 5476|ppo_ep: 1|act_loss: 0.01763916015625|cri_loss: 0.0101776123046875|unsuper_loss: 0.0 +average reward score: 5.32421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.17%) |Training time=0.49s (22.68%) |Others=0.09 (4.15%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5477|ppo_ep: 1|act_loss: -0.0136566162109375|cri_loss: -0.006420135498046875|unsuper_loss: 0.0 +average reward score: 4.8203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.73%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5478|ppo_ep: 1|act_loss: -0.00641632080078125|cri_loss: -0.0027980804443359375|unsuper_loss: 0.0 +average reward score: 4.64453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.47s (21.60%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50 +[2023-04-14 12:09:04,432] [INFO] [logging.py:96:log_dist] [Rank 0] step=5480, skipped=70, lr=[2.624570779189218e-06, 2.624570779189218e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:09:04,450] [INFO] [timer.py:199:stop] epoch=0/micro_step=5480/global_step=5480, RunningAvgSamplesPerSec=105.35506986444176, CurrSamplesPerSec=103.88135870015302, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:09:04,543] [INFO] [logging.py:96:log_dist] [Rank 0] step=5480, skipped=90, lr=[1.3770464639591064e-06, 1.3770464639591064e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5479|ppo_ep: 1|act_loss: 0.081298828125|cri_loss: 0.0440673828125|unsuper_loss: 0.0 +average reward score: 4.8359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.77%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5480|ppo_ep: 1|act_loss: 0.0038700103759765625|cri_loss: 0.0020809173583984375|unsuper_loss: 0.0 +average reward score: 5.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.13%) |Training time=0.48s (22.23%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5481|ppo_ep: 1|act_loss: 0.002227783203125|cri_loss: 0.0013637542724609375|unsuper_loss: 0.0 +average reward score: 5.58203125 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.94%) |Training time=0.48s (20.76%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5482|ppo_ep: 1|act_loss: 0.04876708984375|cri_loss: 0.02801513671875|unsuper_loss: 0.0 +average reward score: 4.67578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.93%) |Training time=0.49s (22.46%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5483|ppo_ep: 1|act_loss: -0.0153961181640625|cri_loss: -0.0074615478515625|unsuper_loss: 0.0 +average reward score: 4.8984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.18%) |Training time=0.48s (22.18%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5484|ppo_ep: 1|act_loss: -0.034210205078125|cri_loss: -0.0165557861328125|unsuper_loss: 0.0 +average reward score: 4.9453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.00%) |Training time=0.48s (22.35%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5485|ppo_ep: 1|act_loss: -0.01233673095703125|cri_loss: -0.005908966064453125|unsuper_loss: 0.0 +average reward score: 4.61328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.97%) |Training time=0.48s (22.42%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5486|ppo_ep: 1|act_loss: -0.01239013671875|cri_loss: -0.005908966064453125|unsuper_loss: 0.0 +average reward score: 5.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.92%) |Training time=0.49s (22.47%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5487|ppo_ep: 1|act_loss: 0.019683837890625|cri_loss: 0.01030731201171875|unsuper_loss: 0.0 +average reward score: 4.40234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.03%) |Training time=0.48s (22.35%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5488|ppo_ep: 1|act_loss: -0.0013446807861328125|cri_loss: -0.0006318092346191406|unsuper_loss: 0.0 +average reward score: 4.875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.03%) |Training time=0.48s (22.34%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50 +[2023-04-14 12:09:26,196] [INFO] [logging.py:96:log_dist] [Rank 0] step=5490, skipped=70, lr=[2.6080551319246483e-06, 2.6080551319246483e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:09:26,214] [INFO] [timer.py:199:stop] epoch=0/micro_step=5490/global_step=5490, RunningAvgSamplesPerSec=105.34444744793811, CurrSamplesPerSec=99.1912971502897, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:09:26,306] [INFO] [logging.py:96:log_dist] [Rank 0] step=5490, skipped=90, lr=[1.3684554622399404e-06, 1.3684554622399404e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5489|ppo_ep: 1|act_loss: 0.0125579833984375|cri_loss: 0.006900787353515625|unsuper_loss: 0.0 +average reward score: 5.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.96%) |Training time=0.49s (22.43%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5490|ppo_ep: 1|act_loss: 0.0027523040771484375|cri_loss: 0.0015411376953125|unsuper_loss: 0.0 +average reward score: 4.28125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.18%) |Training time=0.48s (22.20%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5491|ppo_ep: 1|act_loss: 0.029052734375|cri_loss: 0.014892578125|unsuper_loss: 0.0 +average reward score: 5.37109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (21.91%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5492|ppo_ep: 1|act_loss: -0.00307464599609375|cri_loss: -0.00016021728515625|unsuper_loss: 0.0 +average reward score: 5.15234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.72%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5493|ppo_ep: 1|act_loss: 0.0474853515625|cri_loss: 0.025177001953125|unsuper_loss: 0.0 +average reward score: 4.61328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.51%) |Training time=0.45s (20.85%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5494|ppo_ep: 1|act_loss: -0.020263671875|cri_loss: -0.0094757080078125|unsuper_loss: 0.0 +average reward score: 4.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.28%) |Training time=0.48s (22.12%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5495|ppo_ep: 1|act_loss: -0.0246429443359375|cri_loss: -0.0103759765625|unsuper_loss: 0.0 +average reward score: 5.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.97%) |Training time=0.46s (21.33%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5496|ppo_ep: 1|act_loss: -0.0308074951171875|cri_loss: -0.0125885009765625|unsuper_loss: 0.0 +average reward score: 4.64453125 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.70s (72.86%) |Training time=0.51s (21.72%) |Others=0.13 (5.43%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5497|ppo_ep: 1|act_loss: -0.038238525390625|cri_loss: -0.0164947509765625|unsuper_loss: 0.0 +average reward score: 5.1015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.30%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5498|ppo_ep: 1|act_loss: -0.0411376953125|cri_loss: -0.019500732421875|unsuper_loss: 0.0 +average reward score: 4.7265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.82%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.50 +[2023-04-14 12:09:48,028] [INFO] [logging.py:96:log_dist] [Rank 0] step=5500, skipped=70, lr=[2.591572345131111e-06, 2.591572345131111e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:09:48,046] [INFO] [timer.py:199:stop] epoch=0/micro_step=5500/global_step=5500, RunningAvgSamplesPerSec=105.3416564703474, CurrSamplesPerSec=104.90333129598685, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:09:48,139] [INFO] [logging.py:96:log_dist] [Rank 0] step=5500, skipped=90, lr=[1.3598812327405274e-06, 1.3598812327405274e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5499|ppo_ep: 1|act_loss: -0.023468017578125|cri_loss: -0.0111083984375|unsuper_loss: 0.0 +average reward score: 4.46484375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.68%) |Training time=0.47s (21.65%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5500|ppo_ep: 1|act_loss: -0.010467529296875|cri_loss: -0.003673553466796875|unsuper_loss: 0.0 +average reward score: 5.546875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.86%) |Training time=0.45s (20.52%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5501|ppo_ep: 1|act_loss: -0.0016307830810546875|cri_loss: -0.0007171630859375|unsuper_loss: 0.0 +average reward score: 5.59375 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.10%) |Training time=0.46s (20.46%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.08 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5502|ppo_ep: 1|act_loss: 0.020538330078125|cri_loss: 0.010650634765625|unsuper_loss: 0.0 +average reward score: 3.98828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.34%) |Training time=0.48s (22.00%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5503|ppo_ep: 1|act_loss: 0.0221099853515625|cri_loss: 0.01146697998046875|unsuper_loss: 0.0 +average reward score: 5.8046875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.48%) |Training time=0.48s (21.92%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5504|ppo_ep: 1|act_loss: 0.015380859375|cri_loss: 0.008544921875|unsuper_loss: 0.0 +average reward score: 5.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.11%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5505|ppo_ep: 1|act_loss: -0.045379638671875|cri_loss: -0.021820068359375|unsuper_loss: 0.0 +average reward score: 6.23046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (21.98%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5506|ppo_ep: 1|act_loss: 0.0024051666259765625|cri_loss: 0.0026035308837890625|unsuper_loss: 0.0 +average reward score: 5.1796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (22.02%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5507|ppo_ep: 1|act_loss: 0.017333984375|cri_loss: 0.00897979736328125|unsuper_loss: 0.0 +average reward score: 5.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.49%) |Training time=0.48s (21.87%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5508|ppo_ep: 1|act_loss: 0.03814697265625|cri_loss: 0.0200347900390625|unsuper_loss: 0.0 +average reward score: 4.98828125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.49%) |Training time=0.48s (21.91%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.50 +[2023-04-14 12:10:09,877] [INFO] [logging.py:96:log_dist] [Rank 0] step=5510, skipped=70, lr=[2.5751226631232703e-06, 2.5751226631232703e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:10:09,895] [INFO] [timer.py:199:stop] epoch=0/micro_step=5510/global_step=5510, RunningAvgSamplesPerSec=105.33779970588965, CurrSamplesPerSec=105.1696580948784, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:10:09,987] [INFO] [logging.py:96:log_dist] [Rank 0] step=5510, skipped=90, lr=[1.3513239025516312e-06, 1.3513239025516312e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5509|ppo_ep: 1|act_loss: 0.01238250732421875|cri_loss: 0.00702667236328125|unsuper_loss: 0.0 +average reward score: 4.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.58%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5510|ppo_ep: 1|act_loss: -0.0090484619140625|cri_loss: -0.0040740966796875|unsuper_loss: 0.0 +average reward score: 4.44140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.84%) |Training time=0.46s (21.50%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5511|ppo_ep: 1|act_loss: 0.001495361328125|cri_loss: 0.00122833251953125|unsuper_loss: 0.0 +average reward score: 4.609375 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.76%) |Training time=0.46s (19.92%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5512|ppo_ep: 1|act_loss: -0.004703521728515625|cri_loss: -0.0022373199462890625|unsuper_loss: 0.0 +average reward score: 5.6484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.91%) |Training time=0.46s (21.46%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5513|ppo_ep: 1|act_loss: -0.0171966552734375|cri_loss: -0.00771331787109375|unsuper_loss: 0.0 +average reward score: 5.3359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.56%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5514|ppo_ep: 1|act_loss: -0.035400390625|cri_loss: -0.017181396484375|unsuper_loss: 0.0 +average reward score: 4.69921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.63%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5515|ppo_ep: 1|act_loss: -0.051422119140625|cri_loss: -0.0250396728515625|unsuper_loss: 0.0 +average reward score: 5.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.80%) |Training time=0.47s (21.57%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5516|ppo_ep: 1|act_loss: -0.009002685546875|cri_loss: -0.00354766845703125|unsuper_loss: 0.0 +average reward score: 4.46484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.46s (21.53%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5517|ppo_ep: 1|act_loss: -0.044677734375|cri_loss: -0.021240234375|unsuper_loss: 0.0 +average reward score: 4.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.46s (21.54%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5518|ppo_ep: 1|act_loss: 0.028076171875|cri_loss: 0.01453399658203125|unsuper_loss: 0.0 +average reward score: 5.515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.70%) |Training time=0.47s (21.66%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50 +[2023-04-14 12:10:31,647] [INFO] [logging.py:96:log_dist] [Rank 0] step=5520, skipped=70, lr=[2.5587063297250976e-06, 2.5587063297250976e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:10:31,665] [INFO] [timer.py:199:stop] epoch=0/micro_step=5520/global_step=5520, RunningAvgSamplesPerSec=105.33906190165855, CurrSamplesPerSec=109.92073812427265, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:10:31,758] [INFO] [logging.py:96:log_dist] [Rank 0] step=5520, skipped=90, lr=[1.342783598513529e-06, 1.342783598513529e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5519|ppo_ep: 1|act_loss: 0.0145111083984375|cri_loss: 0.00746917724609375|unsuper_loss: 0.0 +average reward score: 4.609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.42%) |Training time=0.45s (20.96%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5520|ppo_ep: 1|act_loss: 0.0042877197265625|cri_loss: 0.00226593017578125|unsuper_loss: 0.0 +average reward score: 4.6484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.67%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5521|ppo_ep: 1|act_loss: -0.011566162109375|cri_loss: -0.00533294677734375|unsuper_loss: 0.0 +average reward score: 5.86328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.74%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5522|ppo_ep: 1|act_loss: 0.009521484375|cri_loss: 0.004878997802734375|unsuper_loss: 0.0 +average reward score: 5.0078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.72%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5523|ppo_ep: 1|act_loss: 0.01145172119140625|cri_loss: 0.0062408447265625|unsuper_loss: 0.0 +average reward score: 4.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.58%) |Training time=0.45s (20.75%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5524|ppo_ep: 1|act_loss: -0.00016379356384277344|cri_loss: 0.00010895729064941406|unsuper_loss: 0.0 +average reward score: 5.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.18%) |Training time=0.46s (21.17%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5525|ppo_ep: 1|act_loss: -0.0079498291015625|cri_loss: -0.003627777099609375|unsuper_loss: 0.0 +average reward score: 4.75 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.96%) |Training time=0.52s (23.35%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.42 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5526|ppo_ep: 1|act_loss: -0.0076751708984375|cri_loss: -0.0037555694580078125|unsuper_loss: 0.0 +average reward score: 4.5859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.23%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5527|ppo_ep: 1|act_loss: 0.01256561279296875|cri_loss: 0.00673675537109375|unsuper_loss: 0.0 +average reward score: 5.83984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.33%) |Training time=0.45s (21.03%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5528|ppo_ep: 1|act_loss: 0.04022216796875|cri_loss: 0.024658203125|unsuper_loss: 0.0 +average reward score: 4.41015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.46s (21.35%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50 +[2023-04-14 12:10:53,298] [INFO] [logging.py:96:log_dist] [Rank 0] step=5530, skipped=70, lr=[2.5423235882662523e-06, 2.5423235882662523e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:10:53,316] [INFO] [timer.py:199:stop] epoch=0/micro_step=5530/global_step=5530, RunningAvgSamplesPerSec=105.33915297205107, CurrSamplesPerSec=106.70993440034474, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:10:53,409] [INFO] [logging.py:96:log_dist] [Rank 0] step=5530, skipped=90, lr=[1.3342604472141296e-06, 1.3342604472141296e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5529|ppo_ep: 1|act_loss: -0.0207672119140625|cri_loss: -0.00991058349609375|unsuper_loss: 0.0 +average reward score: 4.6953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.07%) |Training time=0.46s (21.33%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5530|ppo_ep: 1|act_loss: -0.0274200439453125|cri_loss: -0.01328277587890625|unsuper_loss: 0.0 +average reward score: 5.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.50%) |Training time=0.46s (20.08%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.08 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5531|ppo_ep: 1|act_loss: 0.009490966796875|cri_loss: 0.005069732666015625|unsuper_loss: 0.0 +average reward score: 4.8203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.11%) |Training time=0.46s (21.30%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5532|ppo_ep: 1|act_loss: -0.006946563720703125|cri_loss: -0.00295257568359375|unsuper_loss: 0.0 +average reward score: 5.43359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.44%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5533|ppo_ep: 1|act_loss: 0.03204345703125|cri_loss: 0.016845703125|unsuper_loss: 0.0 +average reward score: 5.41015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.84%) |Training time=0.46s (21.53%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5534|ppo_ep: 1|act_loss: -0.0038604736328125|cri_loss: -0.001781463623046875|unsuper_loss: 0.0 +average reward score: 5.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.29%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5535|ppo_ep: 1|act_loss: -0.01529693603515625|cri_loss: -0.0073699951171875|unsuper_loss: 0.0 +average reward score: 4.7265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.25%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5536|ppo_ep: 1|act_loss: -0.0262298583984375|cri_loss: -0.0125732421875|unsuper_loss: 0.0 +average reward score: 5.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.65%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5537|ppo_ep: 1|act_loss: 0.0080108642578125|cri_loss: 0.0042877197265625|unsuper_loss: 0.0 +average reward score: 5.44140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.93%) |Training time=0.46s (21.41%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5538|ppo_ep: 1|act_loss: -0.0191802978515625|cri_loss: -0.00933837890625|unsuper_loss: 0.0 +average reward score: 5.68359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.05%) |Training time=0.44s (20.28%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50 +[2023-04-14 12:11:14,996] [INFO] [logging.py:96:log_dist] [Rank 0] step=5540, skipped=70, lr=[2.5259746815784854e-06, 2.5259746815784854e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:11:15,014] [INFO] [timer.py:199:stop] epoch=0/micro_step=5540/global_step=5540, RunningAvgSamplesPerSec=105.344337416235, CurrSamplesPerSec=110.70370709457649, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:11:15,107] [INFO] [logging.py:96:log_dist] [Rank 0] step=5540, skipped=90, lr=[1.3257545749870973e-06, 1.3257545749870973e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5539|ppo_ep: 1|act_loss: -0.007534027099609375|cri_loss: -0.00356292724609375|unsuper_loss: 0.0 +average reward score: 5.08984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.41%) |Training time=0.45s (20.96%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5540|ppo_ep: 1|act_loss: 0.03045654296875|cri_loss: 0.0161895751953125|unsuper_loss: 0.0 +average reward score: 5.3828125 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.76%) |Training time=0.46s (19.92%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.77 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5541|ppo_ep: 1|act_loss: -0.0088043212890625|cri_loss: -0.003543853759765625|unsuper_loss: 0.0 +average reward score: 4.87109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.03%) |Training time=0.46s (21.32%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5542|ppo_ep: 1|act_loss: -0.0079803466796875|cri_loss: -0.003627777099609375|unsuper_loss: 0.0 +average reward score: 5.8046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.84%) |Training time=0.46s (21.54%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5543|ppo_ep: 1|act_loss: 0.0159912109375|cri_loss: 0.0084991455078125|unsuper_loss: 0.0 +average reward score: 5.6328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.46s (21.53%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5544|ppo_ep: 1|act_loss: -0.01409912109375|cri_loss: -0.00643157958984375|unsuper_loss: 0.0 +average reward score: 5.3203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.85%) |Training time=0.46s (21.51%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5545|ppo_ep: 1|act_loss: 0.0128326416015625|cri_loss: 0.00830841064453125|unsuper_loss: 0.0 +average reward score: 5.33984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.47s (21.56%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5546|ppo_ep: 1|act_loss: 0.002899169921875|cri_loss: 0.0016632080078125|unsuper_loss: 0.0 +average reward score: 5.765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.83%) |Training time=0.49s (22.54%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5547|ppo_ep: 1|act_loss: 0.00019216537475585938|cri_loss: 0.00024962425231933594|unsuper_loss: 0.0 +average reward score: 4.4296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.49%) |Training time=0.47s (21.87%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5548|ppo_ep: 1|act_loss: -0.00588226318359375|cri_loss: -0.002727508544921875|unsuper_loss: 0.0 +average reward score: 5.62890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.99%) |Training time=0.48s (22.40%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50 +[2023-04-14 12:11:36,745] [INFO] [logging.py:96:log_dist] [Rank 0] step=5550, skipped=70, lr=[2.5096598519920344e-06, 2.5096598519920344e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:11:36,763] [INFO] [timer.py:199:stop] epoch=0/micro_step=5550/global_step=5550, RunningAvgSamplesPerSec=105.34174265021153, CurrSamplesPerSec=101.51943477330195, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:11:36,856] [INFO] [logging.py:96:log_dist] [Rank 0] step=5550, skipped=90, lr=[1.3172661079099752e-06, 1.3172661079099752e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5549|ppo_ep: 1|act_loss: 0.0240631103515625|cri_loss: 0.01264190673828125|unsuper_loss: 0.0 +average reward score: 6.34765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.21%) |Training time=0.48s (22.16%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5550|ppo_ep: 1|act_loss: 0.01739501953125|cri_loss: 0.00885772705078125|unsuper_loss: 0.0 +average reward score: 4.58203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.33%) |Training time=0.48s (22.03%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5551|ppo_ep: 1|act_loss: 0.00698089599609375|cri_loss: 0.00400543212890625|unsuper_loss: 0.0 +average reward score: 5.34375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.08%) |Training time=0.48s (22.30%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5552|ppo_ep: 1|act_loss: 0.049072265625|cri_loss: 0.025238037109375|unsuper_loss: 0.0 +average reward score: 4.9140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.47s (22.03%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5553|ppo_ep: 1|act_loss: -0.00720977783203125|cri_loss: -0.00344085693359375|unsuper_loss: 0.0 +average reward score: 5.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.43%) |Training time=0.45s (20.93%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5554|ppo_ep: 1|act_loss: 0.0136260986328125|cri_loss: 0.007171630859375|unsuper_loss: 0.0 +average reward score: 4.88671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.45s (21.09%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5555|ppo_ep: 1|act_loss: -0.016937255859375|cri_loss: -0.0080108642578125|unsuper_loss: 0.0 +average reward score: 5.74609375 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.92%) |Training time=0.46s (19.76%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5556|ppo_ep: 1|act_loss: 0.0487060546875|cri_loss: 0.025634765625|unsuper_loss: 0.0 +average reward score: 5.3828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.86%) |Training time=0.47s (21.51%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5557|ppo_ep: 1|act_loss: -0.00664520263671875|cri_loss: -0.002841949462890625|unsuper_loss: 0.0 +average reward score: 4.62109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.21%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5558|ppo_ep: 1|act_loss: -0.01214599609375|cri_loss: -0.005767822265625|unsuper_loss: 0.0 +average reward score: 5.19921875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.71%) |Training time=0.47s (21.69%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.50 +[2023-04-14 12:11:58,543] [INFO] [logging.py:96:log_dist] [Rank 0] step=5560, skipped=70, lr=[2.4933793413320327e-06, 2.4933793413320327e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:11:58,561] [INFO] [timer.py:199:stop] epoch=0/micro_step=5560/global_step=5560, RunningAvgSamplesPerSec=105.34322717762855, CurrSamplesPerSec=109.43845430039579, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:11:58,654] [INFO] [logging.py:96:log_dist] [Rank 0] step=5560, skipped=90, lr=[1.308795171802324e-06, 1.308795171802324e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5559|ppo_ep: 1|act_loss: -0.021575927734375|cri_loss: -0.0096282958984375|unsuper_loss: 0.0 +average reward score: 5.1015625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.71%) |Training time=0.46s (20.74%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5560|ppo_ep: 1|act_loss: 0.004123687744140625|cri_loss: 0.0024566650390625|unsuper_loss: 0.0 +average reward score: 5.58984375 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.41%) |Training time=0.45s (20.14%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.19 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5561|ppo_ep: 1|act_loss: 0.0013027191162109375|cri_loss: 0.0007872581481933594|unsuper_loss: 0.0 +average reward score: 5.375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.25%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5562|ppo_ep: 1|act_loss: -0.004364013671875|cri_loss: -0.001819610595703125|unsuper_loss: 0.0 +average reward score: 5.73046875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.05%) |Training time=0.46s (21.30%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5563|ppo_ep: 1|act_loss: -0.0013980865478515625|cri_loss: -0.0005540847778320312|unsuper_loss: 0.0 +average reward score: 5.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.07%) |Training time=0.46s (21.29%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5564|ppo_ep: 1|act_loss: -0.0019588470458984375|cri_loss: -0.0007309913635253906|unsuper_loss: 0.0 +average reward score: 5.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.39%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5565|ppo_ep: 1|act_loss: 0.0224609375|cri_loss: 0.01166534423828125|unsuper_loss: 0.0 +average reward score: 5.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.46s (21.37%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5566|ppo_ep: 1|act_loss: -0.00531768798828125|cri_loss: -0.0025119781494140625|unsuper_loss: 0.0 +average reward score: 5.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.11%) |Training time=0.46s (21.25%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5567|ppo_ep: 1|act_loss: 0.0183563232421875|cri_loss: 0.00948333740234375|unsuper_loss: 0.0 +average reward score: 5.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.83%) |Training time=0.47s (21.54%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5568|ppo_ep: 1|act_loss: 0.04541015625|cri_loss: 0.02337646484375|unsuper_loss: 0.0 +average reward score: 6.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.15%) |Training time=0.46s (21.19%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.50 +[2023-04-14 12:12:20,280] [INFO] [logging.py:96:log_dist] [Rank 0] step=5570, skipped=70, lr=[2.477133390914919e-06, 2.477133390914919e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:12:20,299] [INFO] [timer.py:199:stop] epoch=0/micro_step=5570/global_step=5570, RunningAvgSamplesPerSec=105.34633939663574, CurrSamplesPerSec=104.92982122851744, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:12:20,391] [INFO] [logging.py:96:log_dist] [Rank 0] step=5570, skipped=90, lr=[1.300341892223852e-06, 1.300341892223852e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5569|ppo_ep: 1|act_loss: -0.00798797607421875|cri_loss: -0.0038700103759765625|unsuper_loss: 0.0 +average reward score: 5.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.82%) |Training time=0.47s (21.59%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5570|ppo_ep: 1|act_loss: -0.003910064697265625|cri_loss: -0.0016078948974609375|unsuper_loss: 0.0 +average reward score: 5.0 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.53%) |Training time=0.47s (20.16%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5571|ppo_ep: 1|act_loss: -0.00772857666015625|cri_loss: -0.0037689208984375|unsuper_loss: 0.0 +average reward score: 4.7265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.73%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5572|ppo_ep: 1|act_loss: 0.011016845703125|cri_loss: 0.005828857421875|unsuper_loss: 0.0 +average reward score: 5.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.83%) |Training time=0.46s (21.54%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5573|ppo_ep: 1|act_loss: -0.02130126953125|cri_loss: -0.0103607177734375|unsuper_loss: 0.0 +average reward score: 5.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.73%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5574|ppo_ep: 1|act_loss: -0.018402099609375|cri_loss: -0.00804901123046875|unsuper_loss: 0.0 +average reward score: 5.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.68%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5575|ppo_ep: 1|act_loss: 0.001041412353515625|cri_loss: 0.0006079673767089844|unsuper_loss: 0.0 +average reward score: 5.65625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.68%) |Training time=0.47s (21.71%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5576|ppo_ep: 1|act_loss: 0.0243072509765625|cri_loss: 0.0126800537109375|unsuper_loss: 0.0 +average reward score: 5.375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.86%) |Training time=0.46s (21.46%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50 +[2023-04-14 12:12:37,824] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 5577|ppo_ep: 1|act_loss: 0.058837890625|cri_loss: 0.03009033203125|unsuper_loss: 0.0 +average reward score: 5.046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.16%) |Training time=0.47s (21.69%) |Others=0.09 (4.15%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.50 +[2023-04-14 12:12:39,972] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 5578|ppo_ep: 1|act_loss: 0.04931640625|cri_loss: 0.025543212890625|unsuper_loss: 0.0 +average reward score: 4.96484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.24%) |Training time=0.46s (21.61%) |Others=0.09 (4.15%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.50 +[2023-04-14 12:12:42,017] [INFO] [logging.py:96:log_dist] [Rank 0] step=5580, skipped=70, lr=[2.4609222415448723e-06, 2.4609222415448723e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:12:42,036] [INFO] [timer.py:199:stop] epoch=0/micro_step=5580/global_step=5580, RunningAvgSamplesPerSec=105.34619836932745, CurrSamplesPerSec=107.6787841445882, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:12:42,128] [INFO] [logging.py:96:log_dist] [Rank 0] step=5580, skipped=92, lr=[1.2935920654711779e-06, 1.2935920654711779e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5579|ppo_ep: 1|act_loss: 0.010833740234375|cri_loss: 0.00600433349609375|unsuper_loss: 0.0 +average reward score: 5.09375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.07%) |Training time=0.46s (21.31%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5580|ppo_ep: 1|act_loss: 0.005214691162109375|cri_loss: 0.002765655517578125|unsuper_loss: 0.0 +average reward score: 5.43359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.80%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5581|ppo_ep: 1|act_loss: 0.001560211181640625|cri_loss: 0.0010395050048828125|unsuper_loss: 0.0 +average reward score: 5.1640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.46s (21.36%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5582|ppo_ep: 1|act_loss: -0.055084228515625|cri_loss: -0.026885986328125|unsuper_loss: 0.0 +average reward score: 5.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.21%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5583|ppo_ep: 1|act_loss: -0.031036376953125|cri_loss: -0.0148162841796875|unsuper_loss: 0.0 +average reward score: 6.046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.46%) |Training time=0.48s (21.92%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5584|ppo_ep: 1|act_loss: 0.0030517578125|cri_loss: 0.0017175674438476562|unsuper_loss: 0.0 +average reward score: 5.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.46s (21.37%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5585|ppo_ep: 1|act_loss: -0.010528564453125|cri_loss: -0.00510406494140625|unsuper_loss: 0.0 +average reward score: 5.515625 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.04%) |Training time=0.46s (19.65%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.77 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5586|ppo_ep: 1|act_loss: -0.0234375|cri_loss: -0.01132965087890625|unsuper_loss: 0.0 +average reward score: 4.91796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.62%) |Training time=0.46s (21.11%) |Others=0.11 (5.27%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5587|ppo_ep: 1|act_loss: 0.04998779296875|cri_loss: 0.0256805419921875|unsuper_loss: 0.0 +average reward score: 5.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.37%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5588|ppo_ep: 1|act_loss: 0.00350189208984375|cri_loss: 0.0020236968994140625|unsuper_loss: 0.0 +average reward score: 5.4921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.21%) |Training time=0.46s (21.12%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.50 +[2023-04-14 12:13:03,886] [INFO] [logging.py:96:log_dist] [Rank 0] step=5590, skipped=70, lr=[2.4447461335102328e-06, 2.4447461335102328e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:13:03,904] [INFO] [timer.py:199:stop] epoch=0/micro_step=5590/global_step=5590, RunningAvgSamplesPerSec=105.3472014072174, CurrSamplesPerSec=99.85687682696464, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:13:03,996] [INFO] [logging.py:96:log_dist] [Rank 0] step=5590, skipped=92, lr=[1.2851708832191219e-06, 1.2851708832191219e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5589|ppo_ep: 1|act_loss: -0.00678253173828125|cri_loss: -0.00315093994140625|unsuper_loss: 0.0 +average reward score: 5.7421875 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.11%) |Training time=0.50s (22.40%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.35 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5590|ppo_ep: 1|act_loss: 0.05072021484375|cri_loss: 0.02679443359375|unsuper_loss: 0.0 +average reward score: 5.62890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.83%) |Training time=0.47s (21.55%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5591|ppo_ep: 1|act_loss: -0.0042266845703125|cri_loss: -0.0016841888427734375|unsuper_loss: 0.0 +average reward score: 5.609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.46s (21.35%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5592|ppo_ep: 1|act_loss: 0.0079345703125|cri_loss: 0.0050506591796875|unsuper_loss: 0.0 +average reward score: 5.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.24%) |Training time=0.45s (21.08%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5593|ppo_ep: 1|act_loss: -0.07049560546875|cri_loss: -0.034515380859375|unsuper_loss: 0.0 +average reward score: 5.80859375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.98%) |Training time=0.49s (22.46%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5594|ppo_ep: 1|act_loss: 0.0115966796875|cri_loss: 0.00616455078125|unsuper_loss: 0.0 +average reward score: 5.9453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.26%) |Training time=0.48s (22.13%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5595|ppo_ep: 1|act_loss: -0.01904296875|cri_loss: -0.0092620849609375|unsuper_loss: 0.0 +average reward score: 5.703125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.34%) |Training time=0.48s (22.07%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5596|ppo_ep: 1|act_loss: -0.025482177734375|cri_loss: -0.012420654296875|unsuper_loss: 0.0 +average reward score: 5.8203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.34%) |Training time=0.48s (22.06%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5597|ppo_ep: 1|act_loss: -0.030914306640625|cri_loss: -0.01505279541015625|unsuper_loss: 0.0 +average reward score: 5.28125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.10%) |Training time=0.49s (22.30%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5598|ppo_ep: 1|act_loss: -0.008148193359375|cri_loss: -0.0037078857421875|unsuper_loss: 0.0 +average reward score: 4.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.19%) |Training time=0.48s (22.19%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.50 +[2023-04-14 12:13:25,592] [INFO] [logging.py:96:log_dist] [Rank 0] step=5600, skipped=70, lr=[2.4286053065799456e-06, 2.4286053065799456e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:13:25,611] [INFO] [timer.py:199:stop] epoch=0/micro_step=5600/global_step=5600, RunningAvgSamplesPerSec=105.3410928350194, CurrSamplesPerSec=100.1417826315008, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:13:25,703] [INFO] [logging.py:96:log_dist] [Rank 0] step=5600, skipped=92, lr=[1.2767677076651913e-06, 1.2767677076651913e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5599|ppo_ep: 1|act_loss: -0.0034942626953125|cri_loss: -0.0016145706176757812|unsuper_loss: 0.0 +average reward score: 5.06640625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.27%) |Training time=0.48s (22.07%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5600|ppo_ep: 1|act_loss: 0.0264892578125|cri_loss: 0.01355743408203125|unsuper_loss: 0.0 +average reward score: 5.984375 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.56%) |Training time=0.50s (21.20%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.55 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5601|ppo_ep: 1|act_loss: 0.016510009765625|cri_loss: 0.0084381103515625|unsuper_loss: 0.0 +average reward score: 4.64453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.85%) |Training time=0.47s (21.54%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5602|ppo_ep: 1|act_loss: -0.00408935546875|cri_loss: -0.0019178390502929688|unsuper_loss: 0.0 +average reward score: 6.00390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.55%) |Training time=0.47s (21.84%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5603|ppo_ep: 1|act_loss: -0.001995086669921875|cri_loss: -0.000728607177734375|unsuper_loss: 0.0 +average reward score: 6.02734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.06%) |Training time=0.46s (21.30%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5604|ppo_ep: 1|act_loss: -0.00925445556640625|cri_loss: -0.00435638427734375|unsuper_loss: 0.0 +average reward score: 4.9609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.78%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5605|ppo_ep: 1|act_loss: 0.007480621337890625|cri_loss: 0.004024505615234375|unsuper_loss: 0.0 +average reward score: 4.8984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.91%) |Training time=0.46s (21.46%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5606|ppo_ep: 1|act_loss: -0.0097808837890625|cri_loss: -0.00438690185546875|unsuper_loss: 0.0 +average reward score: 4.73046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.58%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5607|ppo_ep: 1|act_loss: -0.001796722412109375|cri_loss: -0.0005803108215332031|unsuper_loss: 0.0 +average reward score: 5.6640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.65%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5608|ppo_ep: 1|act_loss: 0.003429412841796875|cri_loss: 0.0020198822021484375|unsuper_loss: 0.0 +average reward score: 5.40234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.96%) |Training time=0.46s (21.42%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50 +[2023-04-14 12:13:47,414] [INFO] [logging.py:96:log_dist] [Rank 0] step=5610, skipped=70, lr=[2.4125000000000015e-06, 2.4125000000000015e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:13:47,432] [INFO] [timer.py:199:stop] epoch=0/micro_step=5610/global_step=5610, RunningAvgSamplesPerSec=105.33799128956171, CurrSamplesPerSec=100.43267746437243, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:13:47,524] [INFO] [logging.py:96:log_dist] [Rank 0] step=5610, skipped=92, lr=[1.2683826633647206e-06, 1.2683826633647206e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5609|ppo_ep: 1|act_loss: 0.01202392578125|cri_loss: 0.006450653076171875|unsuper_loss: 0.0 +average reward score: 5.16796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.14%) |Training time=0.48s (22.26%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5610|ppo_ep: 1|act_loss: -0.012664794921875|cri_loss: -0.006195068359375|unsuper_loss: 0.0 +average reward score: 5.33984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.48s (22.37%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5611|ppo_ep: 1|act_loss: 0.043304443359375|cri_loss: 0.0238037109375|unsuper_loss: 0.0 +average reward score: 5.3359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.47s (21.91%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5612|ppo_ep: 1|act_loss: 0.00029969215393066406|cri_loss: 0.00031495094299316406|unsuper_loss: 0.0 +average reward score: 5.3359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.31%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5613|ppo_ep: 1|act_loss: 0.019256591796875|cri_loss: 0.0101470947265625|unsuper_loss: 0.0 +average reward score: 4.8125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.24%) |Training time=0.48s (22.13%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5614|ppo_ep: 1|act_loss: 0.002948760986328125|cri_loss: 0.0017547607421875|unsuper_loss: 0.0 +average reward score: 4.76171875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.64%) |Training time=0.48s (21.77%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5615|ppo_ep: 1|act_loss: 0.0222320556640625|cri_loss: 0.01169586181640625|unsuper_loss: 0.0 +average reward score: 5.1328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.10%) |Training time=0.48s (22.19%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5616|ppo_ep: 1|act_loss: -0.0018444061279296875|cri_loss: -0.0008955001831054688|unsuper_loss: 0.0 +average reward score: 4.73828125 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.64%) |Training time=0.49s (21.07%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5617|ppo_ep: 1|act_loss: 0.031494140625|cri_loss: 0.0165557861328125|unsuper_loss: 0.0 +average reward score: 5.58203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.10%) |Training time=0.48s (22.24%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5618|ppo_ep: 1|act_loss: 0.0479736328125|cri_loss: 0.0246734619140625|unsuper_loss: 0.0 +average reward score: 5.19140625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.79%) |Training time=0.47s (21.61%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.50 +[2023-04-14 12:14:09,378] [INFO] [logging.py:96:log_dist] [Rank 0] step=5620, skipped=70, lr=[2.396430452489896e-06, 2.396430452489896e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:14:09,396] [INFO] [timer.py:199:stop] epoch=0/micro_step=5620/global_step=5620, RunningAvgSamplesPerSec=105.32878407607137, CurrSamplesPerSec=98.12636157931868, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:14:09,489] [INFO] [logging.py:96:log_dist] [Rank 0] step=5620, skipped=92, lr=[1.260015874604294e-06, 1.260015874604294e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5619|ppo_ep: 1|act_loss: 0.005878448486328125|cri_loss: 0.00319671630859375|unsuper_loss: 0.0 +average reward score: 4.60546875 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.68s (74.07%) |Training time=0.49s (21.54%) |Others=0.10 (4.39%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5620|ppo_ep: 1|act_loss: -0.0250244140625|cri_loss: -0.01186370849609375|unsuper_loss: 0.0 +average reward score: 5.42578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.57%) |Training time=0.47s (21.76%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5621|ppo_ep: 1|act_loss: 0.03326416015625|cri_loss: 0.0173797607421875|unsuper_loss: 0.0 +average reward score: 5.63671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.69%) |Training time=0.46s (21.65%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5622|ppo_ep: 1|act_loss: -0.02783203125|cri_loss: -0.0135955810546875|unsuper_loss: 0.0 +average reward score: 5.21484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.47s (21.99%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5623|ppo_ep: 1|act_loss: -0.0118560791015625|cri_loss: -0.005462646484375|unsuper_loss: 0.0 +average reward score: 5.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.40%) |Training time=0.47s (21.95%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5624|ppo_ep: 1|act_loss: 0.025970458984375|cri_loss: 0.0137176513671875|unsuper_loss: 0.0 +average reward score: 5.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.47s (21.99%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5625|ppo_ep: 1|act_loss: 0.0224609375|cri_loss: 0.01157379150390625|unsuper_loss: 0.0 +average reward score: 5.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.50%) |Training time=0.47s (21.84%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5626|ppo_ep: 1|act_loss: 0.015960693359375|cri_loss: 0.00885009765625|unsuper_loss: 0.0 +average reward score: 5.40234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.47s (21.98%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5627|ppo_ep: 1|act_loss: -0.0035419464111328125|cri_loss: -0.0016145706176757812|unsuper_loss: 0.0 +average reward score: 5.19921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.47s (22.00%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5628|ppo_ep: 1|act_loss: -0.01557159423828125|cri_loss: -0.00714874267578125|unsuper_loss: 0.0 +average reward score: 4.58984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.89%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.50 +[2023-04-14 12:14:30,869] [INFO] [logging.py:96:log_dist] [Rank 0] step=5630, skipped=70, lr=[2.3803969022390895e-06, 2.3803969022390895e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:14:30,888] [INFO] [timer.py:199:stop] epoch=0/micro_step=5630/global_step=5630, RunningAvgSamplesPerSec=105.32614334516194, CurrSamplesPerSec=103.03302993013564, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:14:30,980] [INFO] [logging.py:96:log_dist] [Rank 0] step=5630, skipped=92, lr=[1.251667465399905e-06, 1.251667465399905e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5629|ppo_ep: 1|act_loss: 0.0026702880859375|cri_loss: 0.0017681121826171875|unsuper_loss: 0.0 +average reward score: 4.81640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.47s (21.87%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5630|ppo_ep: 1|act_loss: -0.008331298828125|cri_loss: -0.003772735595703125|unsuper_loss: 0.0 +average reward score: 4.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.17%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5631|ppo_ep: 1|act_loss: -0.002307891845703125|cri_loss: -4.9591064453125e-05|unsuper_loss: 0.0 +average reward score: 5.015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.30%) |Training time=0.45s (21.06%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5632|ppo_ep: 1|act_loss: 0.0172119140625|cri_loss: 0.0088653564453125|unsuper_loss: 0.0 +average reward score: 5.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.95%) |Training time=0.49s (20.77%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5633|ppo_ep: 1|act_loss: 0.006427764892578125|cri_loss: 0.003536224365234375|unsuper_loss: 0.0 +average reward score: 5.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.45s (21.09%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5634|ppo_ep: 1|act_loss: -0.0005903244018554688|cri_loss: -9.5367431640625e-06|unsuper_loss: 0.0 +average reward score: 5.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.08%) |Training time=0.46s (21.30%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5635|ppo_ep: 1|act_loss: 0.02325439453125|cri_loss: 0.0118408203125|unsuper_loss: 0.0 +average reward score: 4.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.26%) |Training time=0.45s (21.09%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5636|ppo_ep: 1|act_loss: 0.0030612945556640625|cri_loss: 0.00257110595703125|unsuper_loss: 0.0 +average reward score: 6.2109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.45s (21.10%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5637|ppo_ep: 1|act_loss: -0.00508880615234375|cri_loss: -0.002368927001953125|unsuper_loss: 0.0 +average reward score: 5.79296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.51%) |Training time=0.45s (20.83%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5638|ppo_ep: 1|act_loss: -0.00368499755859375|cri_loss: -0.0016355514526367188|unsuper_loss: 0.0 +average reward score: 5.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.28%) |Training time=0.45s (21.07%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50 +[2023-04-14 12:14:52,594] [INFO] [logging.py:96:log_dist] [Rank 0] step=5640, skipped=70, lr=[2.3643995869034777e-06, 2.3643995869034777e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:14:52,612] [INFO] [timer.py:199:stop] epoch=0/micro_step=5640/global_step=5640, RunningAvgSamplesPerSec=105.33151498596196, CurrSamplesPerSec=109.83789021262527, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:14:52,705] [INFO] [logging.py:96:log_dist] [Rank 0] step=5640, skipped=92, lr=[1.2433375594951166e-06, 1.2433375594951166e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5639|ppo_ep: 1|act_loss: 0.0013294219970703125|cri_loss: 0.0008649826049804688|unsuper_loss: 0.0 +average reward score: 5.73828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.45s (21.09%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5640|ppo_ep: 1|act_loss: -0.02569580078125|cri_loss: -0.009002685546875|unsuper_loss: 0.0 +average reward score: 5.10546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (20.96%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5641|ppo_ep: 1|act_loss: -0.0119476318359375|cri_loss: -0.005680084228515625|unsuper_loss: 0.0 +average reward score: 4.98046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.24%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5642|ppo_ep: 1|act_loss: 0.0367431640625|cri_loss: 0.018951416015625|unsuper_loss: 0.0 +average reward score: 5.23828125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.01%) |Training time=0.47s (21.40%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5643|ppo_ep: 1|act_loss: 0.0321044921875|cri_loss: 0.016815185546875|unsuper_loss: 0.0 +average reward score: 5.34375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.33%) |Training time=0.45s (21.01%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5644|ppo_ep: 1|act_loss: -0.0030670166015625|cri_loss: -0.0014181137084960938|unsuper_loss: 0.0 +average reward score: 5.51171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.82%) |Training time=0.47s (21.55%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5645|ppo_ep: 1|act_loss: -0.01788330078125|cri_loss: -0.0072021484375|unsuper_loss: 0.0 +average reward score: 5.17578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.25%) |Training time=0.46s (21.13%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5646|ppo_ep: 1|act_loss: -0.0009388923645019531|cri_loss: -6.29425048828125e-05|unsuper_loss: 0.0 +average reward score: 5.125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.44%) |Training time=0.45s (20.91%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5647|ppo_ep: 1|act_loss: 0.03594970703125|cri_loss: 0.0187530517578125|unsuper_loss: 0.0 +average reward score: 6.3203125 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.12%) |Training time=0.49s (20.65%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.54 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5648|ppo_ep: 1|act_loss: 0.00289154052734375|cri_loss: 0.001529693603515625|unsuper_loss: 0.0 +average reward score: 5.6953125 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.63s (71.66%) |Training time=0.48s (21.32%) |Others=0.16 (7.02%)|CurSamplesPerSec=14.08 |AvgSamplesPerSec=14.50 +[2023-04-14 12:15:14,519] [INFO] [logging.py:96:log_dist] [Rank 0] step=5650, skipped=70, lr=[2.3484387436018617e-06, 2.3484387436018617e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:15:14,537] [INFO] [timer.py:199:stop] epoch=0/micro_step=5650/global_step=5650, RunningAvgSamplesPerSec=105.33311141352057, CurrSamplesPerSec=107.29409808518922, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:15:14,630] [INFO] [logging.py:96:log_dist] [Rank 0] step=5650, skipped=92, lr=[1.2350262803592295e-06, 1.2350262803592295e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5649|ppo_ep: 1|act_loss: -0.032562255859375|cri_loss: -0.01555633544921875|unsuper_loss: 0.0 +average reward score: 5.26171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.04%) |Training time=0.46s (21.32%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5650|ppo_ep: 1|act_loss: -0.030517578125|cri_loss: -0.01496124267578125|unsuper_loss: 0.0 +average reward score: 5.50390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.09%) |Training time=0.46s (21.27%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5651|ppo_ep: 1|act_loss: 0.00797271728515625|cri_loss: 0.004436492919921875|unsuper_loss: 0.0 +average reward score: 5.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.99%) |Training time=0.46s (21.37%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5652|ppo_ep: 1|act_loss: -0.0179290771484375|cri_loss: -0.0073394775390625|unsuper_loss: 0.0 +average reward score: 5.73046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.13%) |Training time=0.45s (21.04%) |Others=0.10 (4.83%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5653|ppo_ep: 1|act_loss: 0.01305389404296875|cri_loss: 0.006961822509765625|unsuper_loss: 0.0 +average reward score: 5.44921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.24%) |Training time=0.45s (21.12%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5654|ppo_ep: 1|act_loss: 0.024688720703125|cri_loss: 0.01264190673828125|unsuper_loss: 0.0 +average reward score: 5.8359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.96%) |Training time=0.46s (21.40%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5655|ppo_ep: 1|act_loss: -0.01490020751953125|cri_loss: -0.00708770751953125|unsuper_loss: 0.0 +average reward score: 5.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.30%) |Training time=0.45s (21.05%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5656|ppo_ep: 1|act_loss: -0.00908660888671875|cri_loss: -0.00432586669921875|unsuper_loss: 0.0 +average reward score: 5.39453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.31%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5657|ppo_ep: 1|act_loss: 0.010772705078125|cri_loss: 0.005657196044921875|unsuper_loss: 0.0 +average reward score: 5.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.13%) |Training time=0.46s (21.24%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5658|ppo_ep: 1|act_loss: 0.020751953125|cri_loss: 0.0118560791015625|unsuper_loss: 0.0 +average reward score: 5.19140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.10%) |Training time=0.46s (21.24%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50 +[2023-04-14 12:15:36,069] [INFO] [logging.py:96:log_dist] [Rank 0] step=5660, skipped=70, lr=[2.3325146089124427e-06, 2.3325146089124427e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:15:36,087] [INFO] [timer.py:199:stop] epoch=0/micro_step=5660/global_step=5660, RunningAvgSamplesPerSec=105.33806343563634, CurrSamplesPerSec=105.81956599209691, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:15:36,180] [INFO] [logging.py:96:log_dist] [Rank 0] step=5660, skipped=92, lr=[1.2267337511854502e-06, 1.2267337511854502e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5659|ppo_ep: 1|act_loss: -0.008026123046875|cri_loss: -0.00384521484375|unsuper_loss: 0.0 +average reward score: 5.99609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.89%) |Training time=0.47s (21.51%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5660|ppo_ep: 1|act_loss: 0.0150604248046875|cri_loss: 0.00789642333984375|unsuper_loss: 0.0 +average reward score: 5.44140625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.25%) |Training time=0.49s (22.18%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5661|ppo_ep: 1|act_loss: -0.02191162109375|cri_loss: -0.01050567626953125|unsuper_loss: 0.0 +average reward score: 5.234375 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.17%) |Training time=0.47s (20.79%) |Others=0.18 (8.04%)|CurSamplesPerSec=14.24 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5662|ppo_ep: 1|act_loss: 0.005733489990234375|cri_loss: 0.003093719482421875|unsuper_loss: 0.0 +average reward score: 5.703125 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.67s (74.59%) |Training time=0.47s (20.92%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.27 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5663|ppo_ep: 1|act_loss: -0.0006504058837890625|cri_loss: 3.337860107421875e-06|unsuper_loss: 0.0 +average reward score: 5.6796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.18%) |Training time=0.46s (21.18%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5664|ppo_ep: 1|act_loss: 0.0027523040771484375|cri_loss: 0.002132415771484375|unsuper_loss: 0.0 +average reward score: 4.9453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.08%) |Training time=0.46s (21.28%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5665|ppo_ep: 1|act_loss: -0.008880615234375|cri_loss: -0.00409698486328125|unsuper_loss: 0.0 +average reward score: 4.65625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.45s (20.78%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5666|ppo_ep: 1|act_loss: 0.028533935546875|cri_loss: 0.0145721435546875|unsuper_loss: 0.0 +average reward score: 4.81640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.72%) |Training time=0.47s (21.67%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5667|ppo_ep: 1|act_loss: -0.037109375|cri_loss: -0.0174560546875|unsuper_loss: 0.0 +average reward score: 5.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.36%) |Training time=0.45s (20.98%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5668|ppo_ep: 1|act_loss: -0.022705078125|cri_loss: -0.0110626220703125|unsuper_loss: 0.0 +average reward score: 4.6484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.90%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.50 +[2023-04-14 12:15:57,800] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +[2023-04-14 12:15:57,801] [INFO] [logging.py:96:log_dist] [Rank 0] step=5670, skipped=71, lr=[2.3182144686488735e-06, 2.3182144686488735e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:15:57,801] [INFO] [timer.py:199:stop] epoch=0/micro_step=5670/global_step=5670, RunningAvgSamplesPerSec=105.34351177301964, CurrSamplesPerSec=121.80373494216448, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:15:57,893] [INFO] [logging.py:96:log_dist] [Rank 0] step=5670, skipped=92, lr=[1.218460094889067e-06, 1.218460094889067e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5669|ppo_ep: 1|act_loss: 0.00289154052734375|cri_loss: 0.0015668869018554688|unsuper_loss: 0.0 +average reward score: 5.41796875 +------------------------------------------------------------------------------------- +|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.29%) |Training time=0.42s (20.05%) |Others=0.10 (4.67%)|CurSamplesPerSec=15.10 |AvgSamplesPerSec=14.50 +[2023-04-14 12:15:59,951] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 5670|ppo_ep: 1|act_loss: 0.0088653564453125|cri_loss: 0.00457000732421875|unsuper_loss: 0.0 +average reward score: 5.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.04%) |Training time=0.44s (20.34%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5671|ppo_ep: 1|act_loss: -0.0054931640625|cri_loss: -0.0025272369384765625|unsuper_loss: 0.0 +average reward score: 5.4921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.36%) |Training time=0.45s (20.71%) |Others=0.11 (4.93%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5672|ppo_ep: 1|act_loss: 0.0093536376953125|cri_loss: 0.004886627197265625|unsuper_loss: 0.0 +average reward score: 5.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.23%) |Training time=0.45s (21.10%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5673|ppo_ep: 1|act_loss: 0.04095458984375|cri_loss: 0.021514892578125|unsuper_loss: 0.0 +average reward score: 4.921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.99%) |Training time=0.46s (21.37%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5674|ppo_ep: 1|act_loss: 0.0182952880859375|cri_loss: 0.0092620849609375|unsuper_loss: 0.0 +average reward score: 4.65625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.45s (21.14%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5675|ppo_ep: 1|act_loss: 0.003265380859375|cri_loss: 0.002109527587890625|unsuper_loss: 0.0 +average reward score: 5.7421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.59%) |Training time=0.45s (20.73%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5676|ppo_ep: 1|act_loss: 0.009246826171875|cri_loss: 0.005527496337890625|unsuper_loss: 0.0 +average reward score: 5.3828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.31%) |Training time=0.45s (21.02%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5677|ppo_ep: 1|act_loss: -0.01214599609375|cri_loss: -0.005889892578125|unsuper_loss: 0.0 +average reward score: 5.75 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.90%) |Training time=0.46s (19.81%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5678|ppo_ep: 1|act_loss: -0.0177154541015625|cri_loss: -0.00844573974609375|unsuper_loss: 0.0 +average reward score: 5.3125 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.15%) |Training time=0.47s (20.44%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.06 |AvgSamplesPerSec=14.50 +[2023-04-14 12:16:19,632] [INFO] [logging.py:96:log_dist] [Rank 0] step=5680, skipped=72, lr=[2.3039444252455474e-06, 2.3039444252455474e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:16:19,650] [INFO] [timer.py:199:stop] epoch=0/micro_step=5680/global_step=5680, RunningAvgSamplesPerSec=105.3509397411824, CurrSamplesPerSec=106.0162415650417, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:16:19,734] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +[2023-04-14 12:16:19,734] [INFO] [logging.py:96:log_dist] [Rank 0] step=5680, skipped=93, lr=[1.2110300418965807e-06, 1.2110300418965807e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5679|ppo_ep: 1|act_loss: 0.0119781494140625|cri_loss: 0.00623321533203125|unsuper_loss: 0.0 +average reward score: 5.38671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.19%) |Training time=0.46s (21.67%) |Others=0.09 (4.14%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.50 +[2023-04-14 12:16:21,885] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 5680|ppo_ep: 1|act_loss: 0.0156097412109375|cri_loss: 0.0085296630859375|unsuper_loss: 0.0 +average reward score: 4.7421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.47s (21.98%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5681|ppo_ep: 1|act_loss: -0.026214599609375|cri_loss: -0.0122528076171875|unsuper_loss: 0.0 +average reward score: 5.03125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.42%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5682|ppo_ep: 1|act_loss: 0.037353515625|cri_loss: 0.019256591796875|unsuper_loss: 0.0 +average reward score: 4.78515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.77%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5683|ppo_ep: 1|act_loss: 0.008056640625|cri_loss: 0.0044708251953125|unsuper_loss: 0.0 +average reward score: 5.86328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.61%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5684|ppo_ep: 1|act_loss: -0.031463623046875|cri_loss: -0.013458251953125|unsuper_loss: 0.0 +average reward score: 5.88671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.46s (21.60%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.50 +epoch: 0|step: 5685|ppo_ep: 1|act_loss: 0.0073089599609375|cri_loss: 0.004638671875|unsuper_loss: 0.0 +average reward score: 5.59375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.86%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5686|ppo_ep: 1|act_loss: 0.00563812255859375|cri_loss: 0.0030384063720703125|unsuper_loss: 0.0 +average reward score: 5.37890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.73%) |Training time=0.47s (21.61%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5687|ppo_ep: 1|act_loss: -0.034942626953125|cri_loss: -0.0169219970703125|unsuper_loss: 0.0 +average reward score: 5.71875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.84%) |Training time=0.46s (21.51%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5688|ppo_ep: 1|act_loss: -0.0142364501953125|cri_loss: -0.0067901611328125|unsuper_loss: 0.0 +average reward score: 5.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.67%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.51 +[2023-04-14 12:16:41,180] [INFO] [logging.py:96:log_dist] [Rank 0] step=5690, skipped=72, lr=[2.2881243286214536e-06, 2.2881243286214536e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:16:41,198] [INFO] [timer.py:199:stop] epoch=0/micro_step=5690/global_step=5690, RunningAvgSamplesPerSec=105.35023067784104, CurrSamplesPerSec=105.09620121933077, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:16:41,291] [INFO] [logging.py:96:log_dist] [Rank 0] step=5690, skipped=94, lr=[1.2036154644795697e-06, 1.2036154644795697e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5689|ppo_ep: 1|act_loss: 0.03704833984375|cri_loss: 0.019378662109375|unsuper_loss: 0.0 +average reward score: 5.3671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.67%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5690|ppo_ep: 1|act_loss: 0.0243682861328125|cri_loss: 0.0134124755859375|unsuper_loss: 0.0 +average reward score: 5.9921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.08%) |Training time=0.46s (21.25%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5691|ppo_ep: 1|act_loss: 0.035430908203125|cri_loss: 0.0182647705078125|unsuper_loss: 0.0 +average reward score: 4.703125 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.64%) |Training time=0.46s (20.66%) |Others=0.17 (7.69%)|CurSamplesPerSec=14.34 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5692|ppo_ep: 1|act_loss: 0.0447998046875|cri_loss: 0.0230255126953125|unsuper_loss: 0.0 +average reward score: 5.12109375 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.67s (74.77%) |Training time=0.46s (20.75%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.32 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5693|ppo_ep: 1|act_loss: 0.035491943359375|cri_loss: 0.0187835693359375|unsuper_loss: 0.0 +average reward score: 5.203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.11%) |Training time=0.46s (21.24%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5694|ppo_ep: 1|act_loss: 0.0137176513671875|cri_loss: 0.00716400146484375|unsuper_loss: 0.0 +average reward score: 5.3671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.24%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5695|ppo_ep: 1|act_loss: -0.01076507568359375|cri_loss: -0.005306243896484375|unsuper_loss: 0.0 +average reward score: 5.6875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.00%) |Training time=0.46s (21.36%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5696|ppo_ep: 1|act_loss: -0.0136566162109375|cri_loss: -0.005901336669921875|unsuper_loss: 0.0 +average reward score: 5.2734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.22%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5697|ppo_ep: 1|act_loss: -0.0341796875|cri_loss: -0.016815185546875|unsuper_loss: 0.0 +average reward score: 5.82421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.56%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5698|ppo_ep: 1|act_loss: 0.04217529296875|cri_loss: 0.0222625732421875|unsuper_loss: 0.0 +average reward score: 5.328125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.11%) |Training time=0.46s (21.26%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.51 +[2023-04-14 12:17:02,892] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384 +[2023-04-14 12:17:02,893] [INFO] [logging.py:96:log_dist] [Rank 0] step=5700, skipped=73, lr=[2.273918385226065e-06, 2.273918385226065e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:17:02,893] [INFO] [timer.py:199:stop] epoch=0/micro_step=5700/global_step=5700, RunningAvgSamplesPerSec=105.35557904627348, CurrSamplesPerSec=118.39787723532484, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:17:02,985] [INFO] [logging.py:96:log_dist] [Rank 0] step=5700, skipped=94, lr=[1.1953953037610977e-06, 1.1953953037610977e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5699|ppo_ep: 1|act_loss: 0.0312042236328125|cri_loss: 0.0164794921875|unsuper_loss: 0.0 +average reward score: 5.484375 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.00%) |Training time=0.43s (20.32%) |Others=0.10 (4.68%)|CurSamplesPerSec=15.05 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5700|ppo_ep: 1|act_loss: 0.010406494140625|cri_loss: 0.005855560302734375|unsuper_loss: 0.0 +average reward score: 5.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.34%) |Training time=0.45s (21.01%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5701|ppo_ep: 1|act_loss: -0.01861572265625|cri_loss: -0.009185791015625|unsuper_loss: 0.0 +average reward score: 5.77734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.50%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5702|ppo_ep: 1|act_loss: 0.02288818359375|cri_loss: 0.011749267578125|unsuper_loss: 0.0 +average reward score: 5.31640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.02%) |Training time=0.46s (21.35%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5703|ppo_ep: 1|act_loss: 0.0144500732421875|cri_loss: 0.00774383544921875|unsuper_loss: 0.0 +average reward score: 5.7265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.17%) |Training time=0.46s (21.17%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5704|ppo_ep: 1|act_loss: 0.020233154296875|cri_loss: 0.01152801513671875|unsuper_loss: 0.0 +average reward score: 4.99609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.25%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5705|ppo_ep: 1|act_loss: -0.01038360595703125|cri_loss: -0.00502777099609375|unsuper_loss: 0.0 +average reward score: 5.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.02%) |Training time=0.44s (20.31%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5706|ppo_ep: 1|act_loss: 0.0258941650390625|cri_loss: 0.01401519775390625|unsuper_loss: 0.0 +average reward score: 5.7265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.03%) |Training time=0.44s (20.32%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5707|ppo_ep: 1|act_loss: -0.054718017578125|cri_loss: -0.023895263671875|unsuper_loss: 0.0 +average reward score: 5.21484375 +------------------------------------------------------------------------------------- +|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.80s (76.29%) |Training time=0.46s (19.46%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.60 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5708|ppo_ep: 1|act_loss: 0.0155029296875|cri_loss: 0.0079498291015625|unsuper_loss: 0.0 +average reward score: 5.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.96%) |Training time=0.44s (19.61%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.20 |AvgSamplesPerSec=14.51 +[2023-04-14 12:17:24,721] [INFO] [logging.py:96:log_dist] [Rank 0] step=5710, skipped=73, lr=[2.2581699336049657e-06, 2.2581699336049657e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:17:24,740] [INFO] [timer.py:199:stop] epoch=0/micro_step=5710/global_step=5710, RunningAvgSamplesPerSec=105.36524562432051, CurrSamplesPerSec=114.486349178187, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:17:24,832] [INFO] [logging.py:96:log_dist] [Rank 0] step=5710, skipped=94, lr=[1.1871944804314127e-06, 1.1871944804314127e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5709|ppo_ep: 1|act_loss: 0.01461029052734375|cri_loss: 0.0086212158203125|unsuper_loss: 0.0 +average reward score: 5.1796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.84%) |Training time=0.44s (20.54%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5710|ppo_ep: 1|act_loss: -0.0137786865234375|cri_loss: -0.006793975830078125|unsuper_loss: 0.0 +average reward score: 5.49609375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.76%) |Training time=0.44s (20.57%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5711|ppo_ep: 1|act_loss: 0.0221099853515625|cri_loss: 0.01142120361328125|unsuper_loss: 0.0 +average reward score: 4.609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.69%) |Training time=0.45s (20.65%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5712|ppo_ep: 1|act_loss: 0.03350830078125|cri_loss: 0.0171661376953125|unsuper_loss: 0.0 +average reward score: 5.203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.39%) |Training time=0.43s (19.97%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5713|ppo_ep: 1|act_loss: -0.00296783447265625|cri_loss: -0.0011854171752929688|unsuper_loss: 0.0 +average reward score: 5.49609375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.69%) |Training time=0.44s (20.65%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5714|ppo_ep: 1|act_loss: 0.0247650146484375|cri_loss: 0.01328277587890625|unsuper_loss: 0.0 +average reward score: 6.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.41%) |Training time=0.45s (20.95%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5715|ppo_ep: 1|act_loss: 0.015167236328125|cri_loss: 0.00841522216796875|unsuper_loss: 0.0 +average reward score: 5.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.73%) |Training time=0.44s (20.64%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5716|ppo_ep: 1|act_loss: 0.0017757415771484375|cri_loss: 0.001026153564453125|unsuper_loss: 0.0 +average reward score: 4.88671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.55%) |Training time=0.45s (20.81%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5717|ppo_ep: 1|act_loss: -0.029449462890625|cri_loss: -0.01311492919921875|unsuper_loss: 0.0 +average reward score: 5.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.44%) |Training time=0.45s (20.79%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5718|ppo_ep: 1|act_loss: 0.0399169921875|cri_loss: 0.022247314453125|unsuper_loss: 0.0 +average reward score: 5.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.64%) |Training time=0.45s (20.72%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.51 +[2023-04-14 12:17:46,248] [INFO] [logging.py:96:log_dist] [Rank 0] step=5720, skipped=73, lr=[2.242459528597593e-06, 2.242459528597593e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:17:46,267] [INFO] [timer.py:199:stop] epoch=0/micro_step=5720/global_step=5720, RunningAvgSamplesPerSec=105.37837842461052, CurrSamplesPerSec=112.51268157248816, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:17:46,359] [INFO] [logging.py:96:log_dist] [Rank 0] step=5720, skipped=94, lr=[1.1790131160464974e-06, 1.1790131160464974e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5719|ppo_ep: 1|act_loss: 0.00589752197265625|cri_loss: 0.0037384033203125|unsuper_loss: 0.0 +average reward score: 4.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.59%) |Training time=0.45s (20.78%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5720|ppo_ep: 1|act_loss: 0.00400543212890625|cri_loss: 0.002696990966796875|unsuper_loss: 0.0 +average reward score: 5.6640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.57%) |Training time=0.45s (20.78%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5721|ppo_ep: 1|act_loss: 0.0252227783203125|cri_loss: 0.0133514404296875|unsuper_loss: 0.0 +average reward score: 4.71875 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.61s (71.63%) |Training time=0.49s (21.60%) |Others=0.15 (6.77%)|CurSamplesPerSec=14.23 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5722|ppo_ep: 1|act_loss: 0.032135009765625|cri_loss: 0.0175628662109375|unsuper_loss: 0.0 +average reward score: 5.40625 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.63s (72.89%) |Training time=0.49s (21.99%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.28 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5723|ppo_ep: 1|act_loss: -0.041290283203125|cri_loss: -0.020355224609375|unsuper_loss: 0.0 +average reward score: 5.2734375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.11%) |Training time=0.49s (22.33%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5724|ppo_ep: 1|act_loss: -0.04010009765625|cri_loss: -0.01953125|unsuper_loss: 0.0 +average reward score: 5.6015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.35%) |Training time=0.48s (22.04%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5725|ppo_ep: 1|act_loss: 0.017364501953125|cri_loss: 0.0089874267578125|unsuper_loss: 0.0 +average reward score: 5.3125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.02%) |Training time=0.47s (21.32%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5726|ppo_ep: 1|act_loss: -0.039947509765625|cri_loss: -0.0191802978515625|unsuper_loss: 0.0 +average reward score: 5.37109375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.38%) |Training time=0.41s (18.92%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5727|ppo_ep: 1|act_loss: 0.016815185546875|cri_loss: 0.008697509765625|unsuper_loss: 0.0 +average reward score: 4.88671875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.92%) |Training time=0.47s (21.41%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5728|ppo_ep: 1|act_loss: 0.04681396484375|cri_loss: 0.0238494873046875|unsuper_loss: 0.0 +average reward score: 4.28515625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.19%) |Training time=0.46s (21.19%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.51 +[2023-04-14 12:18:08,163] [INFO] [logging.py:96:log_dist] [Rank 0] step=5730, skipped=73, lr=[2.226787403070044e-06, 2.226787403070044e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:18:08,181] [INFO] [timer.py:199:stop] epoch=0/micro_step=5730/global_step=5730, RunningAvgSamplesPerSec=105.37804869000095, CurrSamplesPerSec=104.9343332311753, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:18:08,274] [INFO] [logging.py:96:log_dist] [Rank 0] step=5730, skipped=94, lr=[1.1708513318739096e-06, 1.1708513318739096e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5729|ppo_ep: 1|act_loss: 0.0394287109375|cri_loss: 0.020660400390625|unsuper_loss: 0.0 +average reward score: 5.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.88%) |Training time=0.47s (21.50%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5730|ppo_ep: 1|act_loss: 0.024932861328125|cri_loss: 0.0137481689453125|unsuper_loss: 0.0 +average reward score: 5.29296875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.24%) |Training time=0.46s (21.13%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5731|ppo_ep: 1|act_loss: 0.0161590576171875|cri_loss: 0.009124755859375|unsuper_loss: 0.0 +average reward score: 5.16796875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.85%) |Training time=0.45s (20.51%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5732|ppo_ep: 1|act_loss: 0.04718017578125|cri_loss: 0.02490234375|unsuper_loss: 0.0 +average reward score: 4.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.42%) |Training time=0.46s (20.95%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5733|ppo_ep: 1|act_loss: 0.00678253173828125|cri_loss: 0.0037174224853515625|unsuper_loss: 0.0 +average reward score: 4.76171875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.53%) |Training time=0.48s (21.84%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5734|ppo_ep: 1|act_loss: -0.0018100738525390625|cri_loss: -0.0007863044738769531|unsuper_loss: 0.0 +average reward score: 4.953125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.69%) |Training time=0.47s (21.61%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5735|ppo_ep: 1|act_loss: -0.0214691162109375|cri_loss: -0.0105133056640625|unsuper_loss: 0.0 +average reward score: 4.328125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.48%) |Training time=0.47s (21.51%) |Others=0.11 (5.01%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5736|ppo_ep: 1|act_loss: -0.0106353759765625|cri_loss: -0.00485992431640625|unsuper_loss: 0.0 +average reward score: 5.16015625 +------------------------------------------------------------------------------------- +|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.73s (73.44%) |Training time=0.52s (21.94%) |Others=0.11 (4.61%)|CurSamplesPerSec=13.59 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5737|ppo_ep: 1|act_loss: -0.01264190673828125|cri_loss: -0.00566864013671875|unsuper_loss: 0.0 +average reward score: 5.34375 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.30%) |Training time=0.48s (21.40%) |Others=0.12 (5.29%)|CurSamplesPerSec=14.27 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5738|ppo_ep: 1|act_loss: -0.037322998046875|cri_loss: -0.017547607421875|unsuper_loss: 0.0 +average reward score: 5.81640625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.42%) |Training time=0.48s (21.97%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.51 +[2023-04-14 12:18:30,185] [INFO] [logging.py:96:log_dist] [Rank 0] step=5740, skipped=73, lr=[2.2111537893210277e-06, 2.2111537893210277e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:18:30,203] [INFO] [timer.py:199:stop] epoch=0/micro_step=5740/global_step=5740, RunningAvgSamplesPerSec=105.37475219460445, CurrSamplesPerSec=103.98614734176941, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:18:30,295] [INFO] [logging.py:96:log_dist] [Rank 0] step=5740, skipped=94, lr=[1.1627092488909802e-06, 1.1627092488909802e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5739|ppo_ep: 1|act_loss: -0.016815185546875|cri_loss: -0.0080108642578125|unsuper_loss: 0.0 +average reward score: 5.65625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.65%) |Training time=0.47s (21.74%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5740|ppo_ep: 1|act_loss: 0.002567291259765625|cri_loss: 0.0016937255859375|unsuper_loss: 0.0 +average reward score: 5.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.89%) |Training time=0.47s (21.48%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5741|ppo_ep: 1|act_loss: -0.0176544189453125|cri_loss: -0.00852203369140625|unsuper_loss: 0.0 +average reward score: 4.921875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.56%) |Training time=0.48s (21.83%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5742|ppo_ep: 1|act_loss: -0.019744873046875|cri_loss: -0.0095367431640625|unsuper_loss: 0.0 +average reward score: 5.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.05%) |Training time=0.46s (21.30%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5743|ppo_ep: 1|act_loss: -0.022613525390625|cri_loss: -0.01056671142578125|unsuper_loss: 0.0 +average reward score: 4.96484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.02%) |Training time=0.49s (22.35%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5744|ppo_ep: 1|act_loss: -0.00809478759765625|cri_loss: -0.003993988037109375|unsuper_loss: 0.0 +average reward score: 6.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.23%) |Training time=0.46s (21.17%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5745|ppo_ep: 1|act_loss: -0.0010538101196289062|cri_loss: 0.00023937225341796875|unsuper_loss: 0.0 +average reward score: 4.92578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.23%) |Training time=0.48s (22.17%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5746|ppo_ep: 1|act_loss: 0.052093505859375|cri_loss: 0.0269927978515625|unsuper_loss: 0.0 +average reward score: 5.56640625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.48s (22.28%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5747|ppo_ep: 1|act_loss: 0.027069091796875|cri_loss: 0.01427459716796875|unsuper_loss: 0.0 +average reward score: 5.3046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.22%) |Training time=0.48s (22.12%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5748|ppo_ep: 1|act_loss: 0.0090789794921875|cri_loss: 0.0047760009765625|unsuper_loss: 0.0 +average reward score: 4.8984375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.98%) |Training time=0.49s (22.41%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.51 +[2023-04-14 12:18:51,928] [INFO] [logging.py:96:log_dist] [Rank 0] step=5750, skipped=73, lr=[2.195558919078415e-06, 2.195558919078415e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:18:51,947] [INFO] [timer.py:199:stop] epoch=0/micro_step=5750/global_step=5750, RunningAvgSamplesPerSec=105.36822216331952, CurrSamplesPerSec=100.24642125720193, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:18:52,039] [INFO] [logging.py:96:log_dist] [Rank 0] step=5750, skipped=94, lr=[1.1545869877830212e-06, 1.1545869877830212e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5749|ppo_ep: 1|act_loss: -0.055419921875|cri_loss: -0.0255584716796875|unsuper_loss: 0.0 +average reward score: 5.57421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.21%) |Training time=0.48s (22.20%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5750|ppo_ep: 1|act_loss: -0.03875732421875|cri_loss: -0.01885986328125|unsuper_loss: 0.0 +average reward score: 4.62109375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.93%) |Training time=0.49s (22.47%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5751|ppo_ep: 1|act_loss: -0.0103912353515625|cri_loss: -0.00496673583984375|unsuper_loss: 0.0 +average reward score: 5.18359375 +------------------------------------------------------------------------------------- +|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.31%) |Training time=0.48s (20.42%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.59 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5752|ppo_ep: 1|act_loss: 0.0010557174682617188|cri_loss: 0.0008831024169921875|unsuper_loss: 0.0 +average reward score: 5.09765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.32%) |Training time=0.48s (22.06%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5753|ppo_ep: 1|act_loss: -0.0428466796875|cri_loss: -0.0207977294921875|unsuper_loss: 0.0 +average reward score: 5.03515625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.48s (22.29%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5754|ppo_ep: 1|act_loss: 0.028411865234375|cri_loss: 0.0145263671875|unsuper_loss: 0.0 +average reward score: 5.21484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.36%) |Training time=0.45s (21.00%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5755|ppo_ep: 1|act_loss: 0.01409149169921875|cri_loss: 0.007289886474609375|unsuper_loss: 0.0 +average reward score: 5.76171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.47%) |Training time=0.45s (20.89%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5756|ppo_ep: 1|act_loss: 0.01885986328125|cri_loss: 0.00994110107421875|unsuper_loss: 0.0 +average reward score: 4.9296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.62%) |Training time=0.45s (20.76%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5757|ppo_ep: 1|act_loss: 0.014007568359375|cri_loss: 0.0072174072265625|unsuper_loss: 0.0 +average reward score: 5.28515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.83%) |Training time=0.44s (20.51%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5758|ppo_ep: 1|act_loss: 0.00698089599609375|cri_loss: 0.003719329833984375|unsuper_loss: 0.0 +average reward score: 5.10546875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.45%) |Training time=0.46s (20.94%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.51 +[2023-04-14 12:19:13,815] [INFO] [logging.py:96:log_dist] [Rank 0] step=5760, skipped=73, lr=[2.180003023495804e-06, 2.180003023495804e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:19:13,834] [INFO] [timer.py:199:stop] epoch=0/micro_step=5760/global_step=5760, RunningAvgSamplesPerSec=105.36992402704047, CurrSamplesPerSec=113.88001075862748, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:19:13,926] [INFO] [logging.py:96:log_dist] [Rank 0] step=5760, skipped=94, lr=[1.1464846689415345e-06, 1.1464846689415345e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5759|ppo_ep: 1|act_loss: 0.004638671875|cri_loss: 0.0025043487548828125|unsuper_loss: 0.0 +average reward score: 5.1015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.87%) |Training time=0.44s (20.50%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5760|ppo_ep: 1|act_loss: -0.00904083251953125|cri_loss: -0.004444122314453125|unsuper_loss: 0.0 +average reward score: 5.09375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.69%) |Training time=0.45s (20.68%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5761|ppo_ep: 1|act_loss: -0.0150146484375|cri_loss: -0.0073089599609375|unsuper_loss: 0.0 +average reward score: 5.109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.66%) |Training time=0.45s (20.71%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5762|ppo_ep: 1|act_loss: -0.001277923583984375|cri_loss: -2.956390380859375e-05|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.22%) |Training time=0.46s (21.17%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5763|ppo_ep: 1|act_loss: -0.00362396240234375|cri_loss: -0.001667022705078125|unsuper_loss: 0.0 +average reward score: 5.1875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.90%) |Training time=0.44s (20.44%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5764|ppo_ep: 1|act_loss: -0.0035037994384765625|cri_loss: -0.0008087158203125|unsuper_loss: 0.0 +average reward score: 5.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.45%) |Training time=0.45s (20.91%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5765|ppo_ep: 1|act_loss: 0.0462646484375|cri_loss: 0.0257720947265625|unsuper_loss: 0.0 +average reward score: 4.9765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.23%) |Training time=0.46s (21.15%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5766|ppo_ep: 1|act_loss: 0.02264404296875|cri_loss: 0.011871337890625|unsuper_loss: 0.0 +average reward score: 5.42578125 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.77s (74.90%) |Training time=0.49s (20.81%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.55 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5767|ppo_ep: 1|act_loss: 0.0171661376953125|cri_loss: 0.00885772705078125|unsuper_loss: 0.0 +average reward score: 5.6171875 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.51%) |Training time=0.45s (19.95%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.06 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5768|ppo_ep: 1|act_loss: 0.019622802734375|cri_loss: 0.01026153564453125|unsuper_loss: 0.0 +average reward score: 5.71484375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.16%) |Training time=0.46s (21.23%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.51 +[2023-04-14 12:19:35,783] [INFO] [logging.py:96:log_dist] [Rank 0] step=5770, skipped=73, lr=[2.164486333149091e-06, 2.164486333149091e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:19:35,801] [INFO] [timer.py:199:stop] epoch=0/micro_step=5770/global_step=5770, RunningAvgSamplesPerSec=105.3766256111742, CurrSamplesPerSec=108.46236360154154, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:19:35,894] [INFO] [logging.py:96:log_dist] [Rank 0] step=5770, skipped=94, lr=[1.1384024124624324e-06, 1.1384024124624324e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5769|ppo_ep: 1|act_loss: -0.0010747909545898438|cri_loss: -0.0003705024719238281|unsuper_loss: 0.0 +average reward score: 5.41796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.33%) |Training time=0.46s (21.08%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5770|ppo_ep: 1|act_loss: 0.00479888916015625|cri_loss: 0.0035762786865234375|unsuper_loss: 0.0 +average reward score: 5.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.19%) |Training time=0.46s (21.21%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5771|ppo_ep: 1|act_loss: -0.00518798828125|cri_loss: -0.002086639404296875|unsuper_loss: 0.0 +average reward score: 5.69140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.56%) |Training time=0.45s (20.81%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5772|ppo_ep: 1|act_loss: 0.003818511962890625|cri_loss: 0.0019702911376953125|unsuper_loss: 0.0 +average reward score: 4.8828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.15%) |Training time=0.46s (21.24%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5773|ppo_ep: 1|act_loss: 0.005664825439453125|cri_loss: 0.00347137451171875|unsuper_loss: 0.0 +average reward score: 5.15234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.38%) |Training time=0.46s (21.01%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5774|ppo_ep: 1|act_loss: 0.019561767578125|cri_loss: 0.01019287109375|unsuper_loss: 0.0 +average reward score: 5.02734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.57%) |Training time=0.45s (20.81%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5775|ppo_ep: 1|act_loss: -0.0223236083984375|cri_loss: -0.01052093505859375|unsuper_loss: 0.0 +average reward score: 5.05859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.36%) |Training time=0.46s (21.02%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5776|ppo_ep: 1|act_loss: 0.0012111663818359375|cri_loss: 0.0007171630859375|unsuper_loss: 0.0 +average reward score: 5.18359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.46%) |Training time=0.45s (20.93%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5777|ppo_ep: 1|act_loss: -0.01100921630859375|cri_loss: -0.00514984130859375|unsuper_loss: 0.0 +average reward score: 5.515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.46s (21.23%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5778|ppo_ep: 1|act_loss: -0.02056884765625|cri_loss: -0.00988006591796875|unsuper_loss: 0.0 +average reward score: 4.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.46%) |Training time=0.45s (20.92%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51 +[2023-04-14 12:19:57,471] [INFO] [logging.py:96:log_dist] [Rank 0] step=5780, skipped=73, lr=[2.1490090780330607e-06, 2.1490090780330607e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:19:57,489] [INFO] [timer.py:199:stop] epoch=0/micro_step=5780/global_step=5780, RunningAvgSamplesPerSec=105.38259097764607, CurrSamplesPerSec=107.7067809395413, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:19:57,582] [INFO] [logging.py:96:log_dist] [Rank 0] step=5780, skipped=94, lr=[1.1303403381442527e-06, 1.1303403381442527e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5779|ppo_ep: 1|act_loss: -0.00388336181640625|cri_loss: -0.0016489028930664062|unsuper_loss: 0.0 +average reward score: 5.30859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.27%) |Training time=0.46s (21.14%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5780|ppo_ep: 1|act_loss: -0.0297088623046875|cri_loss: -0.01397705078125|unsuper_loss: 0.0 +average reward score: 5.09375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.71%) |Training time=0.45s (20.70%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.51 +[2023-04-14 12:20:01,926] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 5781|ppo_ep: 1|act_loss: -0.035430908203125|cri_loss: -0.0171051025390625|unsuper_loss: 0.0 +average reward score: 4.765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.08%) |Training time=0.45s (20.76%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.51 +[2023-04-14 12:20:04,093] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 5782|ppo_ep: 1|act_loss: 0.004146575927734375|cri_loss: 0.0023784637451171875|unsuper_loss: 0.0 +average reward score: 4.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.77%) |Training time=0.46s (21.11%) |Others=0.09 (4.12%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51 +[2023-04-14 12:20:06,309] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192 +epoch: 0|step: 5783|ppo_ep: 1|act_loss: -0.0152130126953125|cri_loss: -0.007354736328125|unsuper_loss: 0.0 +average reward score: 5.9140625 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.79s (77.39%) |Training time=0.42s (18.28%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.85 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5784|ppo_ep: 1|act_loss: -0.04534912109375|cri_loss: -0.022186279296875|unsuper_loss: 0.0 +average reward score: 4.9296875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.48%) |Training time=0.46s (20.93%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5785|ppo_ep: 1|act_loss: -0.004711151123046875|cri_loss: -0.0017976760864257812|unsuper_loss: 0.0 +average reward score: 4.58984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.73%) |Training time=0.45s (20.65%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5786|ppo_ep: 1|act_loss: -0.027191162109375|cri_loss: -0.0132293701171875|unsuper_loss: 0.0 +average reward score: 4.75 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.45%) |Training time=0.45s (20.94%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5787|ppo_ep: 1|act_loss: -0.0107574462890625|cri_loss: -0.005168914794921875|unsuper_loss: 0.0 +average reward score: 4.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.18%) |Training time=0.46s (21.21%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5788|ppo_ep: 1|act_loss: 0.024444580078125|cri_loss: 0.0126953125|unsuper_loss: 0.0 +average reward score: 4.84765625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.28%) |Training time=0.46s (21.14%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.51 +[2023-04-14 12:20:19,343] [INFO] [logging.py:96:log_dist] [Rank 0] step=5790, skipped=74, lr=[2.1351134551705775e-06, 2.1351134551705775e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:20:19,361] [INFO] [timer.py:199:stop] epoch=0/micro_step=5790/global_step=5790, RunningAvgSamplesPerSec=105.3911187251698, CurrSamplesPerSec=108.43922913285121, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:20:19,453] [INFO] [logging.py:96:log_dist] [Rank 0] step=5790, skipped=96, lr=[1.12390529015926e-06, 1.12390529015926e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5789|ppo_ep: 1|act_loss: 0.0033512115478515625|cri_loss: 0.001781463623046875|unsuper_loss: 0.0 +average reward score: 4.64453125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.37%) |Training time=0.46s (21.05%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5790|ppo_ep: 1|act_loss: -0.012176513671875|cri_loss: -0.0050506591796875|unsuper_loss: 0.0 +average reward score: 4.91796875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.43%) |Training time=0.46s (21.00%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5791|ppo_ep: 1|act_loss: -0.0051422119140625|cri_loss: -0.002140045166015625|unsuper_loss: 0.0 +average reward score: 5.5703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.45%) |Training time=0.46s (20.93%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5792|ppo_ep: 1|act_loss: 0.0010499954223632812|cri_loss: 0.0006031990051269531|unsuper_loss: 0.0 +average reward score: 5.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.38%) |Training time=0.46s (21.03%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5793|ppo_ep: 1|act_loss: -0.00574493408203125|cri_loss: -0.00251007080078125|unsuper_loss: 0.0 +average reward score: 5.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.44%) |Training time=0.45s (20.96%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5794|ppo_ep: 1|act_loss: -0.0072784423828125|cri_loss: -0.0033245086669921875|unsuper_loss: 0.0 +average reward score: 5.765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.44%) |Training time=0.46s (20.96%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5795|ppo_ep: 1|act_loss: -0.0041961669921875|cri_loss: -0.001972198486328125|unsuper_loss: 0.0 +average reward score: 5.5234375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.61%) |Training time=0.45s (20.77%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5796|ppo_ep: 1|act_loss: -0.0179443359375|cri_loss: -0.00862884521484375|unsuper_loss: 0.0 +average reward score: 5.3828125 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.65s (73.39%) |Training time=0.49s (21.64%) |Others=0.11 (4.97%)|CurSamplesPerSec=14.28 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5797|ppo_ep: 1|act_loss: 0.07708740234375|cri_loss: 0.03997802734375|unsuper_loss: 0.0 +average reward score: 4.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.30%) |Training time=0.44s (19.64%) |Others=0.16 (7.06%)|CurSamplesPerSec=14.45 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5798|ppo_ep: 1|act_loss: 0.0013828277587890625|cri_loss: 0.0012359619140625|unsuper_loss: 0.0 +average reward score: 5.9609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.45s (20.87%) |Others=0.11 (4.95%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.51 +[2023-04-14 12:20:41,175] [INFO] [logging.py:96:log_dist] [Rank 0] step=5800, skipped=74, lr=[2.1197117585322546e-06, 2.1197117585322546e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:20:41,193] [INFO] [timer.py:199:stop] epoch=0/micro_step=5800/global_step=5800, RunningAvgSamplesPerSec=105.39739484592792, CurrSamplesPerSec=109.96594783124505, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:20:41,286] [INFO] [logging.py:96:log_dist] [Rank 0] step=5800, skipped=96, lr=[1.1158798446670462e-06, 1.1158798446670462e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5799|ppo_ep: 1|act_loss: -0.0104522705078125|cri_loss: -0.004947662353515625|unsuper_loss: 0.0 +average reward score: 5.1328125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.45s (21.12%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5800|ppo_ep: 1|act_loss: -0.02130126953125|cri_loss: -0.010406494140625|unsuper_loss: 0.0 +average reward score: 4.93359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.45s (21.09%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5801|ppo_ep: 1|act_loss: 0.005802154541015625|cri_loss: 0.0036106109619140625|unsuper_loss: 0.0 +average reward score: 4.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.24%) |Training time=0.45s (21.12%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5802|ppo_ep: 1|act_loss: -0.0009617805480957031|cri_loss: 2.384185791015625e-06|unsuper_loss: 0.0 +average reward score: 5.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.74%) |Training time=0.46s (21.61%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5803|ppo_ep: 1|act_loss: -0.0011854171752929688|cri_loss: -0.0003643035888671875|unsuper_loss: 0.0 +average reward score: 5.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.84%) |Training time=0.46s (21.51%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5804|ppo_ep: 1|act_loss: 0.0267486572265625|cri_loss: 0.01435089111328125|unsuper_loss: 0.0 +average reward score: 4.953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.64%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5805|ppo_ep: 1|act_loss: 0.0136566162109375|cri_loss: 0.00754547119140625|unsuper_loss: 0.0 +average reward score: 5.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.74%) |Training time=0.46s (21.62%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5806|ppo_ep: 1|act_loss: 0.0253448486328125|cri_loss: 0.01340484619140625|unsuper_loss: 0.0 +average reward score: 5.3203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.65%) |Training time=0.47s (21.70%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5807|ppo_ep: 1|act_loss: -0.0128173828125|cri_loss: -0.006130218505859375|unsuper_loss: 0.0 +average reward score: 5.78515625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.90%) |Training time=0.46s (21.45%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5808|ppo_ep: 1|act_loss: -0.0037994384765625|cri_loss: -0.0012502670288085938|unsuper_loss: 0.0 +average reward score: 6.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.65%) |Training time=0.47s (21.74%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.51 +[2023-04-14 12:21:02,690] [INFO] [logging.py:96:log_dist] [Rank 0] step=5810, skipped=74, lr=[2.1043501607918214e-06, 2.1043501607918214e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:21:02,709] [INFO] [timer.py:199:stop] epoch=0/micro_step=5810/global_step=5810, RunningAvgSamplesPerSec=105.39882852218719, CurrSamplesPerSec=102.81196321932057, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:21:02,804] [INFO] [logging.py:96:log_dist] [Rank 0] step=5810, skipped=96, lr=[1.107874915174542e-06, 1.107874915174542e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5809|ppo_ep: 1|act_loss: -0.031494140625|cri_loss: -0.0153961181640625|unsuper_loss: 0.0 +average reward score: 5.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.47s (21.95%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5810|ppo_ep: 1|act_loss: -0.03619384765625|cri_loss: -0.0177154541015625|unsuper_loss: 0.0 +average reward score: 5.80078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.66%) |Training time=0.47s (21.68%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5811|ppo_ep: 1|act_loss: -0.04034423828125|cri_loss: -0.01953125|unsuper_loss: 0.0 +average reward score: 5.77734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.85%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5812|ppo_ep: 1|act_loss: 0.01007080078125|cri_loss: 0.005229949951171875|unsuper_loss: 0.0 +average reward score: 4.953125 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.59s (70.07%) |Training time=0.47s (20.86%) |Others=0.21 (9.07%)|CurSamplesPerSec=14.09 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5813|ppo_ep: 1|act_loss: -0.0201416015625|cri_loss: -0.0098114013671875|unsuper_loss: 0.0 +average reward score: 5.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.47s (21.97%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5814|ppo_ep: 1|act_loss: 0.033935546875|cri_loss: 0.018096923828125|unsuper_loss: 0.0 +average reward score: 5.203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.49%) |Training time=0.47s (21.87%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5815|ppo_ep: 1|act_loss: 0.007904052734375|cri_loss: 0.004215240478515625|unsuper_loss: 0.0 +average reward score: 5.05078125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.91%) |Training time=0.46s (21.40%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5816|ppo_ep: 1|act_loss: 0.012664794921875|cri_loss: 0.006511688232421875|unsuper_loss: 0.0 +average reward score: 4.5234375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.70%) |Training time=0.46s (21.63%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5817|ppo_ep: 1|act_loss: -0.00014269351959228516|cri_loss: 1.1444091796875e-05|unsuper_loss: 0.0 +average reward score: 5.42578125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.82%) |Training time=0.46s (21.51%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5818|ppo_ep: 1|act_loss: 0.040313720703125|cri_loss: 0.0205841064453125|unsuper_loss: 0.0 +average reward score: 5.46484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.64%) |Training time=0.46s (21.68%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.51 +[2023-04-14 12:21:24,289] [INFO] [logging.py:96:log_dist] [Rank 0] step=5820, skipped=74, lr=[2.0890288896452154e-06, 2.0890288896452154e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:21:24,307] [INFO] [timer.py:199:stop] epoch=0/micro_step=5820/global_step=5820, RunningAvgSamplesPerSec=105.39866420062243, CurrSamplesPerSec=108.02475065836494, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:21:24,400] [INFO] [logging.py:96:log_dist] [Rank 0] step=5820, skipped=96, lr=[1.0998906203341133e-06, 1.0998906203341133e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5819|ppo_ep: 1|act_loss: 0.0126953125|cri_loss: 0.006683349609375|unsuper_loss: 0.0 +average reward score: 4.859375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.89%) |Training time=0.46s (21.45%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5820|ppo_ep: 1|act_loss: 0.002716064453125|cri_loss: 0.0014963150024414062|unsuper_loss: 0.0 +average reward score: 5.1171875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.59%) |Training time=0.47s (21.75%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5821|ppo_ep: 1|act_loss: -0.0369873046875|cri_loss: -0.0176239013671875|unsuper_loss: 0.0 +average reward score: 5.16796875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.72%) |Training time=0.46s (21.58%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5822|ppo_ep: 1|act_loss: 0.00484466552734375|cri_loss: 0.00281524658203125|unsuper_loss: 0.0 +average reward score: 5.9140625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.88%) |Training time=0.46s (21.42%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5823|ppo_ep: 1|act_loss: -0.02099609375|cri_loss: -0.009765625|unsuper_loss: 0.0 +average reward score: 4.99609375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.56%) |Training time=0.47s (21.78%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5824|ppo_ep: 1|act_loss: -0.0311737060546875|cri_loss: -0.0151214599609375|unsuper_loss: 0.0 +average reward score: 5.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.57%) |Training time=0.47s (21.74%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5825|ppo_ep: 1|act_loss: 0.03155517578125|cri_loss: 0.016632080078125|unsuper_loss: 0.0 +average reward score: 4.74609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.67%) |Training time=0.47s (21.67%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5826|ppo_ep: 1|act_loss: -0.00661468505859375|cri_loss: -0.0029697418212890625|unsuper_loss: 0.0 +average reward score: 5.01953125 +------------------------------------------------------------------------------------- +|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.37%) |Training time=0.48s (21.21%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.00 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5827|ppo_ep: 1|act_loss: 0.04132080078125|cri_loss: 0.021881103515625|unsuper_loss: 0.0 +average reward score: 5.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.70%) |Training time=0.47s (21.68%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5828|ppo_ep: 1|act_loss: 0.0458984375|cri_loss: 0.02459716796875|unsuper_loss: 0.0 +average reward score: 5.20703125 +------------------------------------------------------------------------------------- +|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.93%) |Training time=0.49s (20.81%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.62 |AvgSamplesPerSec=14.51 +[2023-04-14 12:21:46,126] [INFO] [logging.py:96:log_dist] [Rank 0] step=5830, skipped=74, lr=[2.0737481721906358e-06, 2.0737481721906358e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:21:46,145] [INFO] [timer.py:199:stop] epoch=0/micro_step=5830/global_step=5830, RunningAvgSamplesPerSec=105.39656981851218, CurrSamplesPerSec=106.60348186749079, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:21:46,237] [INFO] [logging.py:96:log_dist] [Rank 0] step=5830, skipped=96, lr=[1.0919270784922725e-06, 1.0919270784922725e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5829|ppo_ep: 1|act_loss: -0.011474609375|cri_loss: -0.005504608154296875|unsuper_loss: 0.0 +average reward score: 5.28125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.43%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5830|ppo_ep: 1|act_loss: 0.00536346435546875|cri_loss: 0.0030269622802734375|unsuper_loss: 0.0 +average reward score: 5.890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.86%) |Training time=0.47s (21.53%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5831|ppo_ep: 1|act_loss: 0.018096923828125|cri_loss: 0.0093841552734375|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.46%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5832|ppo_ep: 1|act_loss: 0.015869140625|cri_loss: 0.0083160400390625|unsuper_loss: 0.0 +average reward score: 5.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.92%) |Training time=0.46s (21.45%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5833|ppo_ep: 1|act_loss: 0.09326171875|cri_loss: 0.048065185546875|unsuper_loss: 0.0 +average reward score: 5.65234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.73%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5834|ppo_ep: 1|act_loss: -0.003528594970703125|cri_loss: -0.0014944076538085938|unsuper_loss: 0.0 +average reward score: 4.24609375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.46%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5835|ppo_ep: 1|act_loss: -0.00787353515625|cri_loss: -0.00383758544921875|unsuper_loss: 0.0 +average reward score: 5.515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.94%) |Training time=0.46s (21.44%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5836|ppo_ep: 1|act_loss: -0.001445770263671875|cri_loss: -0.0004544258117675781|unsuper_loss: 0.0 +average reward score: 5.04296875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.88%) |Training time=0.47s (21.55%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5837|ppo_ep: 1|act_loss: -0.0098419189453125|cri_loss: -0.0033473968505859375|unsuper_loss: 0.0 +average reward score: 5.109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.48%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5838|ppo_ep: 1|act_loss: -0.0123443603515625|cri_loss: -0.006011962890625|unsuper_loss: 0.0 +average reward score: 6.16796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.85%) |Training time=0.46s (21.52%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.51 +[2023-04-14 12:22:07,741] [INFO] [logging.py:96:log_dist] [Rank 0] step=5840, skipped=74, lr=[2.05850823492518e-06, 2.05850823492518e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:22:07,759] [INFO] [timer.py:199:stop] epoch=0/micro_step=5840/global_step=5840, RunningAvgSamplesPerSec=105.39736005809708, CurrSamplesPerSec=107.77095193664996, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:22:07,851] [INFO] [logging.py:96:log_dist] [Rank 0] step=5840, skipped=96, lr=[1.0839844076879186e-06, 1.0839844076879186e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5839|ppo_ep: 1|act_loss: -0.055328369140625|cri_loss: -0.0242919921875|unsuper_loss: 0.0 +average reward score: 5.67578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.32%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5840|ppo_ep: 1|act_loss: -0.0052490234375|cri_loss: -0.0023136138916015625|unsuper_loss: 0.0 +average reward score: 5.1953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.82%) |Training time=0.47s (21.55%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5841|ppo_ep: 1|act_loss: 0.0347900390625|cri_loss: 0.018707275390625|unsuper_loss: 0.0 +average reward score: 5.20703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.13%) |Training time=0.46s (21.21%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5842|ppo_ep: 1|act_loss: -0.0010709762573242188|cri_loss: -0.0003044605255126953|unsuper_loss: 0.0 +average reward score: 5.765625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.91%) |Training time=0.47s (21.47%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5843|ppo_ep: 1|act_loss: 0.002651214599609375|cri_loss: 0.0016536712646484375|unsuper_loss: 0.0 +average reward score: 4.9296875 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.11%) |Training time=0.49s (20.63%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.56 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5844|ppo_ep: 1|act_loss: 0.00412750244140625|cri_loss: 0.002384185791015625|unsuper_loss: 0.0 +average reward score: 5.02734375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.01%) |Training time=0.44s (20.31%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5845|ppo_ep: 1|act_loss: 0.0032215118408203125|cri_loss: 0.001739501953125|unsuper_loss: 0.0 +average reward score: 5.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.67%) |Training time=0.45s (20.69%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5846|ppo_ep: 1|act_loss: -0.004993438720703125|cri_loss: -0.002056121826171875|unsuper_loss: 0.0 +average reward score: 5.01953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.83%) |Training time=0.44s (20.35%) |Others=0.10 (4.81%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5847|ppo_ep: 1|act_loss: 0.0643310546875|cri_loss: 0.03436279296875|unsuper_loss: 0.0 +average reward score: 6.1640625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.05%) |Training time=0.43s (20.26%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5848|ppo_ep: 1|act_loss: -0.023712158203125|cri_loss: -0.0104217529296875|unsuper_loss: 0.0 +average reward score: 5.67578125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.91%) |Training time=0.44s (20.43%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.51 +[2023-04-14 12:22:29,465] [INFO] [logging.py:96:log_dist] [Rank 0] step=5850, skipped=74, lr=[2.0433093037414786e-06, 2.0433093037414786e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:22:29,484] [INFO] [timer.py:199:stop] epoch=0/micro_step=5850/global_step=5850, RunningAvgSamplesPerSec=105.40682625158138, CurrSamplesPerSec=117.7084163556254, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:22:29,576] [INFO] [logging.py:96:log_dist] [Rank 0] step=5850, skipped=96, lr=[1.076062725650594e-06, 1.076062725650594e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5849|ppo_ep: 1|act_loss: -0.00316619873046875|cri_loss: -0.0006608963012695312|unsuper_loss: 0.0 +average reward score: 5.28125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.03%) |Training time=0.43s (20.31%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5850|ppo_ep: 1|act_loss: -0.00543975830078125|cri_loss: -0.00247955322265625|unsuper_loss: 0.0 +average reward score: 5.6640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.09%) |Training time=0.43s (20.23%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5851|ppo_ep: 1|act_loss: -0.03326416015625|cri_loss: -0.016357421875|unsuper_loss: 0.0 +average reward score: 5.7265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.98%) |Training time=0.44s (20.35%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5852|ppo_ep: 1|act_loss: -0.03924560546875|cri_loss: -0.0193328857421875|unsuper_loss: 0.0 +average reward score: 6.23828125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.84%) |Training time=0.44s (20.48%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5853|ppo_ep: 1|act_loss: 0.0015001296997070312|cri_loss: 0.003116607666015625|unsuper_loss: 0.0 +average reward score: 6.23828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.76%) |Training time=0.44s (20.55%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5854|ppo_ep: 1|act_loss: -0.028717041015625|cri_loss: -0.01385498046875|unsuper_loss: 0.0 +average reward score: 5.43359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.34%) |Training time=0.45s (21.02%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5855|ppo_ep: 1|act_loss: -0.00753021240234375|cri_loss: -0.0033626556396484375|unsuper_loss: 0.0 +average reward score: 5.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.40%) |Training time=0.44s (20.00%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5856|ppo_ep: 1|act_loss: 0.032745361328125|cri_loss: 0.016998291015625|unsuper_loss: 0.0 +average reward score: 5.10546875 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.71s (76.06%) |Training time=0.44s (19.47%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.25 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5857|ppo_ep: 1|act_loss: -0.00849151611328125|cri_loss: -0.0033931732177734375|unsuper_loss: 0.0 +average reward score: 4.5859375 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.74%) |Training time=0.46s (20.72%) |Others=0.14 (6.54%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5858|ppo_ep: 1|act_loss: 0.0016469955444335938|cri_loss: 0.0010538101196289062|unsuper_loss: 0.0 +average reward score: 5.21875 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.66s (74.10%) |Training time=0.48s (21.43%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.25 |AvgSamplesPerSec=14.51 +[2023-04-14 12:22:51,232] [INFO] [logging.py:96:log_dist] [Rank 0] step=5860, skipped=74, lr=[2.028151603924359e-06, 2.028151603924359e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:22:51,250] [INFO] [timer.py:199:stop] epoch=0/micro_step=5860/global_step=5860, RunningAvgSamplesPerSec=105.41776771499981, CurrSamplesPerSec=105.08171960509524, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:22:51,343] [INFO] [logging.py:96:log_dist] [Rank 0] step=5860, skipped=96, lr=[1.068162149798737e-06, 1.068162149798737e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5859|ppo_ep: 1|act_loss: 0.0809326171875|cri_loss: 0.04217529296875|unsuper_loss: 0.0 +average reward score: 4.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.74%) |Training time=0.47s (21.65%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5860|ppo_ep: 1|act_loss: -0.0221710205078125|cri_loss: -0.01047515869140625|unsuper_loss: 0.0 +average reward score: 5.0078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.80%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5861|ppo_ep: 1|act_loss: -0.00592041015625|cri_loss: -0.0028438568115234375|unsuper_loss: 0.0 +average reward score: 6.1953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.86%) |Training time=0.46s (21.52%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5862|ppo_ep: 1|act_loss: 0.00959014892578125|cri_loss: 0.00543975830078125|unsuper_loss: 0.0 +average reward score: 5.10546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.62%) |Training time=0.47s (21.76%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5863|ppo_ep: 1|act_loss: -0.008148193359375|cri_loss: -0.002391815185546875|unsuper_loss: 0.0 +average reward score: 4.66796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (22.02%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5864|ppo_ep: 1|act_loss: -0.003368377685546875|cri_loss: -0.0016183853149414062|unsuper_loss: 0.0 +average reward score: 4.703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.88%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5865|ppo_ep: 1|act_loss: -0.0250701904296875|cri_loss: -0.012115478515625|unsuper_loss: 0.0 +average reward score: 4.56640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.83%) |Training time=0.46s (21.55%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5866|ppo_ep: 1|act_loss: -0.015777587890625|cri_loss: -0.00771331787109375|unsuper_loss: 0.0 +average reward score: 4.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.81%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5867|ppo_ep: 1|act_loss: -0.0160980224609375|cri_loss: -0.007701873779296875|unsuper_loss: 0.0 +average reward score: 4.48046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.80%) |Training time=0.46s (21.55%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5868|ppo_ep: 1|act_loss: 0.00677490234375|cri_loss: 0.0037555694580078125|unsuper_loss: 0.0 +average reward score: 4.53515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.47s (21.57%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.51 +[2023-04-14 12:23:12,850] [INFO] [logging.py:96:log_dist] [Rank 0] step=5870, skipped=74, lr=[2.013035360147499e-06, 2.013035360147499e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:23:12,868] [INFO] [timer.py:199:stop] epoch=0/micro_step=5870/global_step=5870, RunningAvgSamplesPerSec=105.4160718237638, CurrSamplesPerSec=106.12563552319669, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:23:12,961] [INFO] [logging.py:96:log_dist] [Rank 0] step=5870, skipped=96, lr=[1.0602827972379417e-06, 1.0602827972379417e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5869|ppo_ep: 1|act_loss: 0.020050048828125|cri_loss: 0.0104827880859375|unsuper_loss: 0.0 +average reward score: 4.8828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.92%) |Training time=0.46s (21.46%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5870|ppo_ep: 1|act_loss: 0.0234375|cri_loss: 0.01206207275390625|unsuper_loss: 0.0 +average reward score: 5.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.85%) |Training time=0.46s (21.52%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5871|ppo_ep: 1|act_loss: -0.0171356201171875|cri_loss: -0.00823974609375|unsuper_loss: 0.0 +average reward score: 5.43359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.85%) |Training time=0.46s (21.50%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5872|ppo_ep: 1|act_loss: -0.01873779296875|cri_loss: -0.00913238525390625|unsuper_loss: 0.0 +average reward score: 5.94921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.09%) |Training time=0.46s (21.27%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5873|ppo_ep: 1|act_loss: 0.0076446533203125|cri_loss: 0.004428863525390625|unsuper_loss: 0.0 +average reward score: 5.64453125 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.64%) |Training time=0.47s (20.04%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5874|ppo_ep: 1|act_loss: 0.00121307373046875|cri_loss: 0.0009002685546875|unsuper_loss: 0.0 +average reward score: 5.98828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.28%) |Training time=0.48s (22.14%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5875|ppo_ep: 1|act_loss: -0.0013256072998046875|cri_loss: -0.0005035400390625|unsuper_loss: 0.0 +average reward score: 6.109375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.58%) |Training time=0.47s (21.77%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5876|ppo_ep: 1|act_loss: -0.0012073516845703125|cri_loss: -0.00045490264892578125|unsuper_loss: 0.0 +average reward score: 6.01953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.39%) |Training time=0.48s (21.99%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5877|ppo_ep: 1|act_loss: 0.0103302001953125|cri_loss: 0.005706787109375|unsuper_loss: 0.0 +average reward score: 4.75390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.48s (21.96%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5878|ppo_ep: 1|act_loss: -0.007904052734375|cri_loss: -0.003818511962890625|unsuper_loss: 0.0 +average reward score: 4.80859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.87%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.51 +[2023-04-14 12:23:34,691] [INFO] [logging.py:96:log_dist] [Rank 0] step=5880, skipped=74, lr=[1.9979607964700985e-06, 1.9979607964700985e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:23:34,710] [INFO] [timer.py:199:stop] epoch=0/micro_step=5880/global_step=5880, RunningAvgSamplesPerSec=105.41309467540493, CurrSamplesPerSec=104.21140785612961, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:23:34,803] [INFO] [logging.py:96:log_dist] [Rank 0] step=5880, skipped=96, lr=[1.0524247847592191e-06, 1.0524247847592191e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5879|ppo_ep: 1|act_loss: -0.00569915771484375|cri_loss: -0.0027313232421875|unsuper_loss: 0.0 +average reward score: 5.43359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.68%) |Training time=0.47s (21.68%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5880|ppo_ep: 1|act_loss: 0.03271484375|cri_loss: 0.016754150390625|unsuper_loss: 0.0 +average reward score: 5.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (21.98%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5881|ppo_ep: 1|act_loss: -0.01360321044921875|cri_loss: -0.006427764892578125|unsuper_loss: 0.0 +average reward score: 4.65625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.48s (22.21%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5882|ppo_ep: 1|act_loss: 0.0108489990234375|cri_loss: 0.005596160888671875|unsuper_loss: 0.0 +average reward score: 5.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (21.99%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.51 +[2023-04-14 12:23:43,477] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 5883|ppo_ep: 1|act_loss: 0.0019989013671875|cri_loss: 0.0011072158813476562|unsuper_loss: 0.0 +average reward score: 6.0 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.73%) |Training time=0.48s (22.14%) |Others=0.09 (4.14%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.51 +[2023-04-14 12:23:45,644] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 5884|ppo_ep: 1|act_loss: 0.025115966796875|cri_loss: 0.01308441162109375|unsuper_loss: 0.0 +average reward score: 5.87109375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.76%) |Training time=0.48s (22.02%) |Others=0.09 (4.22%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5885|ppo_ep: 1|act_loss: 0.004608154296875|cri_loss: 0.0024166107177734375|unsuper_loss: 0.0 +average reward score: 5.42578125 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.62s (72.49%) |Training time=0.51s (23.01%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.33 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5886|ppo_ep: 1|act_loss: -0.02362060546875|cri_loss: -0.0113067626953125|unsuper_loss: 0.0 +average reward score: 4.69921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.48s (22.31%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5887|ppo_ep: 1|act_loss: 0.013458251953125|cri_loss: 0.00859832763671875|unsuper_loss: 0.0 +average reward score: 4.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.50%) |Training time=0.47s (20.16%) |Others=0.10 (4.34%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5888|ppo_ep: 1|act_loss: 0.0234375|cri_loss: 0.01216888427734375|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.30%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.51 +[2023-04-14 12:23:56,602] [INFO] [logging.py:96:log_dist] [Rank 0] step=5890, skipped=74, lr=[1.982928136333553e-06, 1.982928136333553e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:23:56,621] [INFO] [timer.py:199:stop] epoch=0/micro_step=5890/global_step=5890, RunningAvgSamplesPerSec=105.40534437177286, CurrSamplesPerSec=102.15519602149695, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:23:56,713] [INFO] [logging.py:96:log_dist] [Rank 0] step=5890, skipped=98, lr=[1.0461538179170056e-06, 1.0461538179170056e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5889|ppo_ep: 1|act_loss: -0.00814056396484375|cri_loss: -0.00372314453125|unsuper_loss: 0.0 +average reward score: 5.33984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (21.97%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5890|ppo_ep: 1|act_loss: 0.031341552734375|cri_loss: 0.0159759521484375|unsuper_loss: 0.0 +average reward score: 5.60546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.47s (21.95%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5891|ppo_ep: 1|act_loss: 0.031219482421875|cri_loss: 0.01629638671875|unsuper_loss: 0.0 +average reward score: 5.31640625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.54%) |Training time=0.48s (21.88%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5892|ppo_ep: 1|act_loss: 0.0266265869140625|cri_loss: 0.0140533447265625|unsuper_loss: 0.0 +average reward score: 5.12890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.47s (21.94%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5893|ppo_ep: 1|act_loss: 0.0418701171875|cri_loss: 0.0213165283203125|unsuper_loss: 0.0 +average reward score: 5.39453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.18%) |Training time=0.48s (22.22%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5894|ppo_ep: 1|act_loss: 0.00766754150390625|cri_loss: 0.004138946533203125|unsuper_loss: 0.0 +average reward score: 4.8203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.28%) |Training time=0.48s (22.09%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5895|ppo_ep: 1|act_loss: -0.00994873046875|cri_loss: -0.00438690185546875|unsuper_loss: 0.0 +average reward score: 5.36328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.81%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5896|ppo_ep: 1|act_loss: -0.0102691650390625|cri_loss: -0.004901885986328125|unsuper_loss: 0.0 +average reward score: 4.81640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.33%) |Training time=0.48s (22.06%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5897|ppo_ep: 1|act_loss: 0.01910400390625|cri_loss: 0.0099334716796875|unsuper_loss: 0.0 +average reward score: 5.21484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.46%) |Training time=0.47s (21.91%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5898|ppo_ep: 1|act_loss: -0.032958984375|cri_loss: -0.0158843994140625|unsuper_loss: 0.0 +average reward score: 4.57421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.88%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.51 +[2023-04-14 12:24:18,277] [INFO] [logging.py:96:log_dist] [Rank 0] step=5900, skipped=74, lr=[1.9679376025581524e-06, 1.9679376025581524e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:24:18,295] [INFO] [timer.py:199:stop] epoch=0/micro_step=5900/global_step=5900, RunningAvgSamplesPerSec=105.39940940116666, CurrSamplesPerSec=103.16481053464135, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:24:18,393] [INFO] [logging.py:96:log_dist] [Rank 0] step=5900, skipped=98, lr=[1.0383345108885993e-06, 1.0383345108885993e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5899|ppo_ep: 1|act_loss: -0.025238037109375|cri_loss: -0.01222991943359375|unsuper_loss: 0.0 +average reward score: 5.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.47s (21.78%) |Others=0.10 (4.80%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5900|ppo_ep: 1|act_loss: -0.004428863525390625|cri_loss: -0.0018815994262695312|unsuper_loss: 0.0 +average reward score: 4.9375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.27%) |Training time=0.48s (22.11%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5901|ppo_ep: 1|act_loss: -0.01324462890625|cri_loss: -0.00623321533203125|unsuper_loss: 0.0 +average reward score: 5.29296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.86%) |Training time=0.47s (21.51%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5902|ppo_ep: 1|act_loss: -0.00580596923828125|cri_loss: -0.0019092559814453125|unsuper_loss: 0.0 +average reward score: 5.48828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.52%) |Training time=0.47s (21.86%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5903|ppo_ep: 1|act_loss: 0.0159912109375|cri_loss: 0.008148193359375|unsuper_loss: 0.0 +average reward score: 5.6328125 +------------------------------------------------------------------------------------- +|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.77s (74.42%) |Training time=0.51s (21.33%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.48 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5904|ppo_ep: 1|act_loss: -0.0110626220703125|cri_loss: -0.005344390869140625|unsuper_loss: 0.0 +average reward score: 5.62109375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.63%) |Training time=0.47s (21.74%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.51 +[2023-04-14 12:24:31,500] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 5905|ppo_ep: 1|act_loss: 0.025054931640625|cri_loss: 0.0130462646484375|unsuper_loss: 0.0 +average reward score: 5.08984375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.39%) |Training time=0.45s (20.93%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5906|ppo_ep: 1|act_loss: 0.0015077590942382812|cri_loss: 0.0011386871337890625|unsuper_loss: 0.0 +average reward score: 5.28125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.94%) |Training time=0.49s (22.46%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5907|ppo_ep: 1|act_loss: 0.00643157958984375|cri_loss: 0.0035247802734375|unsuper_loss: 0.0 +average reward score: 5.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.94%) |Training time=0.49s (22.45%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5908|ppo_ep: 1|act_loss: 0.006465911865234375|cri_loss: 0.0035915374755859375|unsuper_loss: 0.0 +average reward score: 5.828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.48s (22.02%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.51 +[2023-04-14 12:24:40,173] [INFO] [logging.py:96:log_dist] [Rank 0] step=5910, skipped=75, lr=[1.9544823238569153e-06, 1.9544823238569153e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:24:40,191] [INFO] [timer.py:199:stop] epoch=0/micro_step=5910/global_step=5910, RunningAvgSamplesPerSec=105.39236435819984, CurrSamplesPerSec=102.31007903210069, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:24:40,284] [INFO] [logging.py:96:log_dist] [Rank 0] step=5910, skipped=98, lr=[1.0305368692688175e-06, 1.0305368692688175e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5909|ppo_ep: 1|act_loss: -0.00597381591796875|cri_loss: -0.0027923583984375|unsuper_loss: 0.0 +average reward score: 5.86328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.46%) |Training time=0.48s (21.93%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.51 +[2023-04-14 12:24:42,333] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192 +epoch: 0|step: 5910|ppo_ep: 1|act_loss: 0.0025787353515625|cri_loss: 0.0016918182373046875|unsuper_loss: 0.0 +average reward score: 5.3359375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.17%) |Training time=0.45s (21.15%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5911|ppo_ep: 1|act_loss: -0.002712249755859375|cri_loss: -0.001117706298828125|unsuper_loss: 0.0 +average reward score: 5.37890625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.89%) |Training time=0.49s (22.52%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5912|ppo_ep: 1|act_loss: 0.0108795166015625|cri_loss: 0.00567626953125|unsuper_loss: 0.0 +average reward score: 4.9921875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.80%) |Training time=0.49s (22.62%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5913|ppo_ep: 1|act_loss: -0.01776123046875|cri_loss: -0.00823974609375|unsuper_loss: 0.0 +average reward score: 6.1640625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.50%) |Training time=0.50s (22.93%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5914|ppo_ep: 1|act_loss: -0.003353118896484375|cri_loss: -0.0009851455688476562|unsuper_loss: 0.0 +average reward score: 4.6328125 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.53%) |Training time=0.48s (21.92%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5915|ppo_ep: 1|act_loss: -0.0023899078369140625|cri_loss: -0.0008401870727539062|unsuper_loss: 0.0 +average reward score: 4.99609375 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.43%) |Training time=0.48s (21.18%) |Others=0.10 (4.39%)|CurSamplesPerSec=14.06 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5916|ppo_ep: 1|act_loss: 0.01297760009765625|cri_loss: 0.0069427490234375|unsuper_loss: 0.0 +average reward score: 5.5 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.15%) |Training time=0.48s (22.20%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5917|ppo_ep: 1|act_loss: -0.0005397796630859375|cri_loss: 0.00010776519775390625|unsuper_loss: 0.0 +average reward score: 5.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.26%) |Training time=0.48s (22.12%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5918|ppo_ep: 1|act_loss: -0.0025806427001953125|cri_loss: -0.0009760856628417969|unsuper_loss: 0.0 +average reward score: 5.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.20%) |Training time=0.47s (21.63%) |Others=0.11 (5.17%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.51 +[2023-04-14 12:25:02,201] [INFO] [logging.py:96:log_dist] [Rank 0] step=5920, skipped=76, lr=[1.9410615090410984e-06, 1.9410615090410984e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:25:02,220] [INFO] [timer.py:199:stop] epoch=0/micro_step=5920/global_step=5920, RunningAvgSamplesPerSec=105.38332702712113, CurrSamplesPerSec=103.77662056098468, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:25:02,313] [INFO] [logging.py:96:log_dist] [Rank 0] step=5920, skipped=98, lr=[1.0227610086375208e-06, 1.0227610086375208e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5919|ppo_ep: 1|act_loss: -0.0345458984375|cri_loss: -0.0159912109375|unsuper_loss: 0.0 +average reward score: 5.0234375 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.74s (75.05%) |Training time=0.47s (20.33%) |Others=0.11 (4.62%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5920|ppo_ep: 1|act_loss: -0.0001728534698486328|cri_loss: 6.532669067382812e-05|unsuper_loss: 0.0 +average reward score: 5.6484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.12%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5921|ppo_ep: 1|act_loss: -0.027099609375|cri_loss: -0.0131988525390625|unsuper_loss: 0.0 +average reward score: 5.16796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.47s (21.93%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5922|ppo_ep: 1|act_loss: 0.04107666015625|cri_loss: 0.02093505859375|unsuper_loss: 0.0 +average reward score: 4.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.21%) |Training time=0.48s (22.19%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5923|ppo_ep: 1|act_loss: -0.01739501953125|cri_loss: -0.00836181640625|unsuper_loss: 0.0 +average reward score: 6.3359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.48s (21.94%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5924|ppo_ep: 1|act_loss: -0.0078277587890625|cri_loss: -0.003665924072265625|unsuper_loss: 0.0 +average reward score: 5.00390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.68%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5925|ppo_ep: 1|act_loss: -0.03497314453125|cri_loss: -0.0165863037109375|unsuper_loss: 0.0 +average reward score: 4.30078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (21.94%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5926|ppo_ep: 1|act_loss: -0.026092529296875|cri_loss: -0.01259613037109375|unsuper_loss: 0.0 +average reward score: 5.875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.59%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5927|ppo_ep: 1|act_loss: 0.0107879638671875|cri_loss: 0.005474090576171875|unsuper_loss: 0.0 +average reward score: 5.51953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.64%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5928|ppo_ep: 1|act_loss: 0.05828857421875|cri_loss: 0.030517578125|unsuper_loss: 0.0 +average reward score: 5.65625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.98%) |Training time=0.47s (21.41%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.51 +[2023-04-14 12:25:23,865] [INFO] [logging.py:96:log_dist] [Rank 0] step=5930, skipped=76, lr=[1.9261901091532087e-06, 1.9261901091532087e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:25:23,884] [INFO] [timer.py:199:stop] epoch=0/micro_step=5930/global_step=5930, RunningAvgSamplesPerSec=105.37955887751461, CurrSamplesPerSec=105.48167242600165, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:25:23,976] [INFO] [logging.py:96:log_dist] [Rank 0] step=5930, skipped=98, lr=[1.0150070442517201e-06, 1.0150070442517201e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5929|ppo_ep: 1|act_loss: 0.220703125|cri_loss: 0.143310546875|unsuper_loss: 0.0 +average reward score: 5.23828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.79%) |Training time=0.47s (21.57%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5930|ppo_ep: 1|act_loss: -0.004055023193359375|cri_loss: -0.0017652511596679688|unsuper_loss: 0.0 +average reward score: 5.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.58%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5931|ppo_ep: 1|act_loss: 0.0077362060546875|cri_loss: 0.004604339599609375|unsuper_loss: 0.0 +average reward score: 5.19140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.65%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5932|ppo_ep: 1|act_loss: -0.0015506744384765625|cri_loss: -0.000553131103515625|unsuper_loss: 0.0 +average reward score: 5.72265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.79%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5933|ppo_ep: 1|act_loss: 0.006988525390625|cri_loss: 0.003902435302734375|unsuper_loss: 0.0 +average reward score: 5.46484375 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.52%) |Training time=0.46s (20.62%) |Others=0.15 (6.86%)|CurSamplesPerSec=14.38 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5934|ppo_ep: 1|act_loss: -0.014007568359375|cri_loss: -0.0065155029296875|unsuper_loss: 0.0 +average reward score: 5.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.25%) |Training time=0.46s (20.30%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.20 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5935|ppo_ep: 1|act_loss: -0.0095367431640625|cri_loss: -0.00386810302734375|unsuper_loss: 0.0 +average reward score: 5.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.01%) |Training time=0.46s (21.36%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5936|ppo_ep: 1|act_loss: 0.007232666015625|cri_loss: 0.003726959228515625|unsuper_loss: 0.0 +average reward score: 6.13671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.45s (21.14%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5937|ppo_ep: 1|act_loss: -0.00814056396484375|cri_loss: -0.0037689208984375|unsuper_loss: 0.0 +average reward score: 4.77734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.20%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5938|ppo_ep: 1|act_loss: -0.009490966796875|cri_loss: -0.0043487548828125|unsuper_loss: 0.0 +average reward score: 5.55859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.15%) |Training time=0.45s (21.17%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.51 +[2023-04-14 12:25:45,564] [INFO] [logging.py:96:log_dist] [Rank 0] step=5940, skipped=76, lr=[1.911361676620899e-06, 1.911361676620899e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:25:45,583] [INFO] [timer.py:199:stop] epoch=0/micro_step=5940/global_step=5940, RunningAvgSamplesPerSec=105.38352468920138, CurrSamplesPerSec=110.74535974698605, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:25:45,675] [INFO] [logging.py:96:log_dist] [Rank 0] step=5940, skipped=98, lr=[1.0072750910438734e-06, 1.0072750910438734e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5939|ppo_ep: 1|act_loss: 0.01202392578125|cri_loss: 0.006191253662109375|unsuper_loss: 0.0 +average reward score: 6.58203125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.28%) |Training time=0.45s (21.07%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5940|ppo_ep: 1|act_loss: -0.003948211669921875|cri_loss: -0.0017728805541992188|unsuper_loss: 0.0 +average reward score: 5.39453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.02%) |Training time=0.46s (21.32%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5941|ppo_ep: 1|act_loss: 0.029876708984375|cri_loss: 0.01517486572265625|unsuper_loss: 0.0 +average reward score: 5.10546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.46%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5942|ppo_ep: 1|act_loss: -0.0004661083221435547|cri_loss: -0.000164031982421875|unsuper_loss: 0.0 +average reward score: 4.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.96%) |Training time=0.46s (21.39%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5943|ppo_ep: 1|act_loss: 0.019561767578125|cri_loss: 0.01006317138671875|unsuper_loss: 0.0 +average reward score: 4.6796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.23%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5944|ppo_ep: 1|act_loss: 0.0049285888671875|cri_loss: 0.0025653839111328125|unsuper_loss: 0.0 +average reward score: 5.7578125 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.63s (72.69%) |Training time=0.51s (22.85%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.26 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5945|ppo_ep: 1|act_loss: -0.007572174072265625|cri_loss: -0.003124237060546875|unsuper_loss: 0.0 +average reward score: 5.34375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.10%) |Training time=0.46s (21.18%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5946|ppo_ep: 1|act_loss: -0.01294708251953125|cri_loss: -0.006168365478515625|unsuper_loss: 0.0 +average reward score: 5.77734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.24%) |Training time=0.46s (21.15%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5947|ppo_ep: 1|act_loss: -0.0242156982421875|cri_loss: -0.0118408203125|unsuper_loss: 0.0 +average reward score: 4.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.46%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5948|ppo_ep: 1|act_loss: -0.00698089599609375|cri_loss: -0.0033702850341796875|unsuper_loss: 0.0 +average reward score: 5.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.18%) |Training time=0.46s (21.15%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.51 +[2023-04-14 12:26:07,375] [INFO] [logging.py:96:log_dist] [Rank 0] step=5950, skipped=76, lr=[1.8965764312373112e-06, 1.8965764312373112e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:26:07,393] [INFO] [timer.py:199:stop] epoch=0/micro_step=5950/global_step=5950, RunningAvgSamplesPerSec=105.38668188338777, CurrSamplesPerSec=107.2714592506216, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:26:07,485] [INFO] [logging.py:96:log_dist] [Rank 0] step=5950, skipped=98, lr=[9.995652636201819e-07, 9.995652636201819e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5949|ppo_ep: 1|act_loss: -0.00499725341796875|cri_loss: -0.002071380615234375|unsuper_loss: 0.0 +average reward score: 5.37109375 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.02%) |Training time=0.46s (19.71%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5950|ppo_ep: 1|act_loss: 0.01093292236328125|cri_loss: 0.00567626953125|unsuper_loss: 0.0 +average reward score: 6.38671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.89%) |Training time=0.46s (21.40%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5951|ppo_ep: 1|act_loss: 0.019287109375|cri_loss: 0.0101776123046875|unsuper_loss: 0.0 +average reward score: 5.6484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.01%) |Training time=0.46s (21.34%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5952|ppo_ep: 1|act_loss: -0.00455474853515625|cri_loss: -0.0020961761474609375|unsuper_loss: 0.0 +average reward score: 5.41796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.08%) |Training time=0.46s (21.26%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5953|ppo_ep: 1|act_loss: -0.0208282470703125|cri_loss: -0.010009765625|unsuper_loss: 0.0 +average reward score: 5.6484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.99%) |Training time=0.46s (21.37%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5954|ppo_ep: 1|act_loss: -0.028045654296875|cri_loss: -0.0135040283203125|unsuper_loss: 0.0 +average reward score: 4.62109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.46s (21.58%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5955|ppo_ep: 1|act_loss: 0.0128021240234375|cri_loss: 0.006572723388671875|unsuper_loss: 0.0 +average reward score: 5.17578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.16%) |Training time=0.45s (21.16%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5956|ppo_ep: 1|act_loss: 0.002628326416015625|cri_loss: 0.00140380859375|unsuper_loss: 0.0 +average reward score: 5.6875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.07%) |Training time=0.46s (21.25%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5957|ppo_ep: 1|act_loss: -0.003032684326171875|cri_loss: -0.0013885498046875|unsuper_loss: 0.0 +average reward score: 6.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.45s (21.17%) |Others=0.10 (4.80%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5958|ppo_ep: 1|act_loss: 0.0238037109375|cri_loss: 0.0124359130859375|unsuper_loss: 0.0 +average reward score: 5.20703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.96%) |Training time=0.46s (21.35%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.51 +[2023-04-14 12:26:28,860] [INFO] [logging.py:96:log_dist] [Rank 0] step=5960, skipped=76, lr=[1.8818345921554516e-06, 1.8818345921554516e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:26:28,878] [INFO] [timer.py:199:stop] epoch=0/micro_step=5960/global_step=5960, RunningAvgSamplesPerSec=105.39163032103932, CurrSamplesPerSec=110.05142543688827, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:26:28,971] [INFO] [logging.py:96:log_dist] [Rank 0] step=5960, skipped=98, lr=[9.91877676258884e-07, 9.91877676258884e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5959|ppo_ep: 1|act_loss: 0.0390625|cri_loss: 0.0201568603515625|unsuper_loss: 0.0 +average reward score: 5.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.23%) |Training time=0.45s (21.11%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5960|ppo_ep: 1|act_loss: 0.01190948486328125|cri_loss: 0.006134033203125|unsuper_loss: 0.0 +average reward score: 6.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.34%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5961|ppo_ep: 1|act_loss: -0.0223541259765625|cri_loss: -0.010711669921875|unsuper_loss: 0.0 +average reward score: 4.9765625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.44%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5962|ppo_ep: 1|act_loss: 0.01100921630859375|cri_loss: 0.005786895751953125|unsuper_loss: 0.0 +average reward score: 5.0859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.07%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5963|ppo_ep: 1|act_loss: -0.0018167495727539062|cri_loss: -0.0007228851318359375|unsuper_loss: 0.0 +average reward score: 5.3828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.16%) |Training time=0.45s (21.16%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5964|ppo_ep: 1|act_loss: -0.0391845703125|cri_loss: -0.0184478759765625|unsuper_loss: 0.0 +average reward score: 5.8515625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.46s (21.59%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5965|ppo_ep: 1|act_loss: 0.0067138671875|cri_loss: 0.004070281982421875|unsuper_loss: 0.0 +average reward score: 5.40625 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.66s (71.69%) |Training time=0.55s (23.59%) |Others=0.11 (4.72%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5966|ppo_ep: 1|act_loss: -0.030609130859375|cri_loss: -0.014984130859375|unsuper_loss: 0.0 +average reward score: 5.03515625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.67%) |Training time=0.46s (21.67%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5967|ppo_ep: 1|act_loss: -0.02716064453125|cri_loss: -0.0134735107421875|unsuper_loss: 0.0 +average reward score: 4.7421875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.93%) |Training time=0.46s (21.40%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5968|ppo_ep: 1|act_loss: 0.034423828125|cri_loss: 0.018524169921875|unsuper_loss: 0.0 +average reward score: 5.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.25%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.51 +[2023-04-14 12:26:50,497] [INFO] [logging.py:96:log_dist] [Rank 0] step=5970, skipped=76, lr=[1.8671363778849347e-06, 1.8671363778849347e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:26:50,516] [INFO] [timer.py:199:stop] epoch=0/micro_step=5970/global_step=5970, RunningAvgSamplesPerSec=105.39215717659776, CurrSamplesPerSec=107.45963430119632, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:26:50,608] [INFO] [logging.py:96:log_dist] [Rank 0] step=5970, skipped=98, lr=[9.842124429085714e-07, 9.842124429085714e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5969|ppo_ep: 1|act_loss: 0.00429534912109375|cri_loss: 0.002338409423828125|unsuper_loss: 0.0 +average reward score: 4.8046875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.87%) |Training time=0.46s (21.48%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5970|ppo_ep: 1|act_loss: -0.0014858245849609375|cri_loss: -0.0004582405090332031|unsuper_loss: 0.0 +average reward score: 5.1875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.75%) |Training time=0.44s (20.59%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5971|ppo_ep: 1|act_loss: -0.0072021484375|cri_loss: -0.00341796875|unsuper_loss: 0.0 +average reward score: 5.09375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.94%) |Training time=0.44s (20.39%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5972|ppo_ep: 1|act_loss: 0.02154541015625|cri_loss: 0.01145172119140625|unsuper_loss: 0.0 +average reward score: 4.19140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.35%) |Training time=0.45s (21.00%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5973|ppo_ep: 1|act_loss: 0.022430419921875|cri_loss: 0.01153564453125|unsuper_loss: 0.0 +average reward score: 4.9453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.83%) |Training time=0.44s (20.44%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5974|ppo_ep: 1|act_loss: -0.018157958984375|cri_loss: -0.0088348388671875|unsuper_loss: 0.0 +average reward score: 4.859375 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.98%) |Training time=0.44s (19.59%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5975|ppo_ep: 1|act_loss: 0.016845703125|cri_loss: 0.0087127685546875|unsuper_loss: 0.0 +average reward score: 5.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.76%) |Training time=0.44s (20.57%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5976|ppo_ep: 1|act_loss: -0.0218505859375|cri_loss: -0.01079559326171875|unsuper_loss: 0.0 +average reward score: 5.20703125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.82%) |Training time=0.44s (20.53%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5977|ppo_ep: 1|act_loss: -0.02777099609375|cri_loss: -0.01334381103515625|unsuper_loss: 0.0 +average reward score: 5.23046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.65%) |Training time=0.45s (20.70%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5978|ppo_ep: 1|act_loss: -0.013427734375|cri_loss: -0.006366729736328125|unsuper_loss: 0.0 +average reward score: 5.15234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.79%) |Training time=0.44s (20.53%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.51 +[2023-04-14 12:27:12,106] [INFO] [logging.py:96:log_dist] [Rank 0] step=5980, skipped=76, lr=[1.8524820062887546e-06, 1.8524820062887546e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:27:12,124] [INFO] [timer.py:199:stop] epoch=0/micro_step=5980/global_step=5980, RunningAvgSamplesPerSec=105.40602482628336, CurrSamplesPerSec=113.2016207116422, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:27:12,218] [INFO] [logging.py:96:log_dist] [Rank 0] step=5980, skipped=98, lr=[9.765696771864933e-07, 9.765696771864933e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5979|ppo_ep: 1|act_loss: 0.0231781005859375|cri_loss: 0.01198577880859375|unsuper_loss: 0.0 +average reward score: 4.875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.55%) |Training time=0.45s (20.73%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52 +epoch: 0|step: 5980|ppo_ep: 1|act_loss: 0.021240234375|cri_loss: 0.01136016845703125|unsuper_loss: 0.0 +average reward score: 5.29296875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.74%) |Training time=0.46s (21.57%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.52 +epoch: 0|step: 5981|ppo_ep: 1|act_loss: 0.01788330078125|cri_loss: 0.009185791015625|unsuper_loss: 0.0 +average reward score: 5.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.74s (75.18%) |Training time=0.47s (20.50%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.82 |AvgSamplesPerSec=14.51 +epoch: 0|step: 5982|ppo_ep: 1|act_loss: 0.034881591796875|cri_loss: 0.0184783935546875|unsuper_loss: 0.0 +average reward score: 5.31640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.13%) |Training time=0.48s (22.23%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52 +epoch: 0|step: 5983|ppo_ep: 1|act_loss: -0.0008478164672851562|cri_loss: -0.00020265579223632812|unsuper_loss: 0.0 +average reward score: 5.8046875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.40%) |Training time=0.47s (21.95%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.52 +epoch: 0|step: 5984|ppo_ep: 1|act_loss: -0.024932861328125|cri_loss: -0.01213836669921875|unsuper_loss: 0.0 +average reward score: 6.08984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.51%) |Training time=0.47s (21.80%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52 +[2023-04-14 12:27:25,255] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 5985|ppo_ep: 1|act_loss: -0.00849151611328125|cri_loss: -0.0028514862060546875|unsuper_loss: 0.0 +average reward score: 5.6953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.93%) |Training time=0.47s (21.88%) |Others=0.09 (4.19%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.52 +[2023-04-14 12:27:27,391] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 5986|ppo_ep: 1|act_loss: -0.0246734619140625|cri_loss: -0.01187896728515625|unsuper_loss: 0.0 +average reward score: 4.84765625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.76%) |Training time=0.47s (22.04%) |Others=0.09 (4.20%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.52 +epoch: 0|step: 5987|ppo_ep: 1|act_loss: 0.0214080810546875|cri_loss: 0.0121612548828125|unsuper_loss: 0.0 +average reward score: 5.44921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.84%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52 +epoch: 0|step: 5988|ppo_ep: 1|act_loss: -0.01424407958984375|cri_loss: -0.005828857421875|unsuper_loss: 0.0 +average reward score: 5.1875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.42%) |Training time=0.47s (21.92%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52 +[2023-04-14 12:27:33,716] [INFO] [logging.py:96:log_dist] [Rank 0] step=5990, skipped=76, lr=[1.8378716945800507e-06, 1.8378716945800507e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:27:33,734] [INFO] [timer.py:199:stop] epoch=0/micro_step=5990/global_step=5990, RunningAvgSamplesPerSec=105.40360050801567, CurrSamplesPerSec=103.72817841981488, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:27:33,827] [INFO] [logging.py:96:log_dist] [Rank 0] step=5990, skipped=100, lr=[9.704717174393912e-07, 9.704717174393912e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5989|ppo_ep: 1|act_loss: -0.00201416015625|cri_loss: -0.0007715225219726562|unsuper_loss: 0.0 +average reward score: 5.1875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.40%) |Training time=0.47s (21.93%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52 +epoch: 0|step: 5990|ppo_ep: 1|act_loss: -0.023345947265625|cri_loss: -0.01153564453125|unsuper_loss: 0.0 +average reward score: 5.45703125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.58%) |Training time=0.47s (21.75%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52 +epoch: 0|step: 5991|ppo_ep: 1|act_loss: 0.0107879638671875|cri_loss: 0.005706787109375|unsuper_loss: 0.0 +average reward score: 5.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.41%) |Training time=0.47s (21.93%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52 +epoch: 0|step: 5992|ppo_ep: 1|act_loss: 0.0079498291015625|cri_loss: 0.0045623779296875|unsuper_loss: 0.0 +average reward score: 5.38671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.40%) |Training time=0.47s (21.92%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.52 +epoch: 0|step: 5993|ppo_ep: 1|act_loss: 0.0242919921875|cri_loss: 0.01251220703125|unsuper_loss: 0.0 +average reward score: 5.09765625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.28%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52 +epoch: 0|step: 5994|ppo_ep: 1|act_loss: -0.0028228759765625|cri_loss: -0.0012311935424804688|unsuper_loss: 0.0 +average reward score: 5.796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.64%) |Training time=0.47s (21.69%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52 +epoch: 0|step: 5995|ppo_ep: 1|act_loss: -0.00330352783203125|cri_loss: -0.0015287399291992188|unsuper_loss: 0.0 +average reward score: 4.609375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.84%) |Training time=0.46s (21.50%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52 +epoch: 0|step: 5996|ppo_ep: 1|act_loss: 0.0069732666015625|cri_loss: 0.0037631988525390625|unsuper_loss: 0.0 +average reward score: 6.5546875 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.96%) |Training time=0.45s (20.29%) |Others=0.17 (7.74%)|CurSamplesPerSec=14.44 |AvgSamplesPerSec=14.52 +epoch: 0|step: 5997|ppo_ep: 1|act_loss: -0.005279541015625|cri_loss: -0.0024662017822265625|unsuper_loss: 0.0 +average reward score: 5.30078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.22%) |Training time=0.45s (21.12%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52 +epoch: 0|step: 5998|ppo_ep: 1|act_loss: -0.0168304443359375|cri_loss: -0.00815582275390625|unsuper_loss: 0.0 +average reward score: 6.7109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.05%) |Training time=0.46s (21.30%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52 +[2023-04-14 12:27:55,249] [INFO] [logging.py:96:log_dist] [Rank 0] step=6000, skipped=76, lr=[1.8233056593188891e-06, 1.8233056593188891e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:27:55,267] [INFO] [timer.py:199:stop] epoch=0/micro_step=6000/global_step=6000, RunningAvgSamplesPerSec=105.40675121024026, CurrSamplesPerSec=109.96774978513238, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:27:55,360] [INFO] [logging.py:96:log_dist] [Rank 0] step=6000, skipped=100, lr=[9.62869678699519e-07, 9.62869678699519e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 5999|ppo_ep: 1|act_loss: -0.0345458984375|cri_loss: -0.0167236328125|unsuper_loss: 0.0 +average reward score: 5.58984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.19%) |Training time=0.45s (21.15%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6000|ppo_ep: 1|act_loss: -0.0182952880859375|cri_loss: -0.008880615234375|unsuper_loss: 0.0 +average reward score: 5.69140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.99%) |Training time=0.46s (21.37%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6001|ppo_ep: 1|act_loss: -0.002777099609375|cri_loss: -0.0012273788452148438|unsuper_loss: 0.0 +average reward score: 5.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.32%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6002|ppo_ep: 1|act_loss: 0.0582275390625|cri_loss: 0.03076171875|unsuper_loss: 0.0 +average reward score: 6.01171875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.87%) |Training time=0.47s (21.53%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6003|ppo_ep: 1|act_loss: -0.0167999267578125|cri_loss: -0.0081329345703125|unsuper_loss: 0.0 +average reward score: 4.9765625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.45%) |Training time=0.46s (20.97%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6004|ppo_ep: 1|act_loss: -0.0114898681640625|cri_loss: -0.00524139404296875|unsuper_loss: 0.0 +average reward score: 4.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.89%) |Training time=0.47s (20.71%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.17 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6005|ppo_ep: 1|act_loss: -0.0033092498779296875|cri_loss: -0.0012836456298828125|unsuper_loss: 0.0 +average reward score: 4.9453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.10%) |Training time=0.46s (21.22%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6006|ppo_ep: 1|act_loss: 0.0024547576904296875|cri_loss: 0.001491546630859375|unsuper_loss: 0.0 +average reward score: 5.1640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.45s (21.14%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6007|ppo_ep: 1|act_loss: -0.001834869384765625|cri_loss: -0.0005602836608886719|unsuper_loss: 0.0 +average reward score: 5.73828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.16%) |Training time=0.46s (21.20%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6008|ppo_ep: 1|act_loss: -0.028045654296875|cri_loss: -0.0137939453125|unsuper_loss: 0.0 +average reward score: 4.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.08%) |Training time=0.46s (21.25%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52 +[2023-04-14 12:28:16,897] [INFO] [logging.py:96:log_dist] [Rank 0] step=6010, skipped=76, lr=[1.8087841164090489e-06, 1.8087841164090489e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:28:16,915] [INFO] [timer.py:199:stop] epoch=0/micro_step=6010/global_step=6010, RunningAvgSamplesPerSec=105.41120763475524, CurrSamplesPerSec=110.29632922803589, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:28:17,008] [INFO] [logging.py:96:log_dist] [Rank 0] step=6010, skipped=100, lr=[9.552904239391322e-07, 9.552904239391322e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6009|ppo_ep: 1|act_loss: 0.01910400390625|cri_loss: 0.00994110107421875|unsuper_loss: 0.0 +average reward score: 5.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.45s (21.09%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6010|ppo_ep: 1|act_loss: -0.01312255859375|cri_loss: -0.006427764892578125|unsuper_loss: 0.0 +average reward score: 4.97265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.30%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6011|ppo_ep: 1|act_loss: -0.0002875328063964844|cri_loss: 5.936622619628906e-05|unsuper_loss: 0.0 +average reward score: 5.35546875 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.72s (74.56%) |Training time=0.48s (20.78%) |Others=0.11 (4.66%)|CurSamplesPerSec=13.84 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6012|ppo_ep: 1|act_loss: -0.028472900390625|cri_loss: -0.0135345458984375|unsuper_loss: 0.0 +average reward score: 5.046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.06%) |Training time=0.46s (21.29%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6013|ppo_ep: 1|act_loss: -0.027679443359375|cri_loss: -0.013671875|unsuper_loss: 0.0 +average reward score: 5.8828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.00%) |Training time=0.46s (21.35%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6014|ppo_ep: 1|act_loss: -0.01953125|cri_loss: -0.0093841552734375|unsuper_loss: 0.0 +average reward score: 5.3203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.13%) |Training time=0.45s (21.19%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6015|ppo_ep: 1|act_loss: -0.02752685546875|cri_loss: -0.01348114013671875|unsuper_loss: 0.0 +average reward score: 5.9609375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.36%) |Training time=0.45s (20.97%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6016|ppo_ep: 1|act_loss: 0.017852783203125|cri_loss: 0.0092010498046875|unsuper_loss: 0.0 +average reward score: 4.97265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.18%) |Training time=0.45s (21.05%) |Others=0.10 (4.78%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6017|ppo_ep: 1|act_loss: -0.01593017578125|cri_loss: -0.007625579833984375|unsuper_loss: 0.0 +average reward score: 5.453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.05%) |Training time=0.45s (21.13%) |Others=0.10 (4.82%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6018|ppo_ep: 1|act_loss: 0.02801513671875|cri_loss: 0.014251708984375|unsuper_loss: 0.0 +average reward score: 5.14453125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.34%) |Training time=0.47s (21.98%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52 +[2023-04-14 12:28:38,536] [INFO] [logging.py:96:log_dist] [Rank 0] step=6020, skipped=76, lr=[1.7943072810948272e-06, 1.7943072810948272e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:28:38,555] [INFO] [timer.py:199:stop] epoch=0/micro_step=6020/global_step=6020, RunningAvgSamplesPerSec=105.41476522670784, CurrSamplesPerSec=105.24371287159768, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:28:38,648] [INFO] [logging.py:96:log_dist] [Rank 0] step=6020, skipped=100, lr=[9.477340655010717e-07, 9.477340655010717e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6019|ppo_ep: 1|act_loss: -0.04486083984375|cri_loss: -0.021087646484375|unsuper_loss: 0.0 +average reward score: 5.32421875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.57%) |Training time=0.47s (21.75%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6020|ppo_ep: 1|act_loss: -0.005889892578125|cri_loss: -0.0028514862060546875|unsuper_loss: 0.0 +average reward score: 5.58203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.41%) |Training time=0.47s (21.90%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6021|ppo_ep: 1|act_loss: 0.00341033935546875|cri_loss: 0.002422332763671875|unsuper_loss: 0.0 +average reward score: 5.5234375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.56%) |Training time=0.46s (21.62%) |Others=0.10 (4.82%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6022|ppo_ep: 1|act_loss: 0.0144805908203125|cri_loss: 0.007442474365234375|unsuper_loss: 0.0 +average reward score: 4.9453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.58%) |Training time=0.47s (21.75%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6023|ppo_ep: 1|act_loss: 0.013458251953125|cri_loss: 0.006992340087890625|unsuper_loss: 0.0 +average reward score: 4.98046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.13%) |Training time=0.48s (22.22%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6024|ppo_ep: 1|act_loss: -0.00662994384765625|cri_loss: -0.0031528472900390625|unsuper_loss: 0.0 +average reward score: 5.96484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.48%) |Training time=0.47s (21.83%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6025|ppo_ep: 1|act_loss: 0.01381683349609375|cri_loss: 0.007251739501953125|unsuper_loss: 0.0 +average reward score: 4.8203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.45s (21.08%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6026|ppo_ep: 1|act_loss: 0.056182861328125|cri_loss: 0.0298004150390625|unsuper_loss: 0.0 +average reward score: 5.30078125 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.23%) |Training time=0.45s (19.42%) |Others=0.10 (4.35%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6027|ppo_ep: 1|act_loss: 0.0283355712890625|cri_loss: 0.0158233642578125|unsuper_loss: 0.0 +average reward score: 5.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.15%) |Training time=0.45s (21.18%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6028|ppo_ep: 1|act_loss: 0.0100860595703125|cri_loss: 0.00598907470703125|unsuper_loss: 0.0 +average reward score: 5.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.31%) |Training time=0.45s (21.01%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52 +[2023-04-14 12:29:00,177] [INFO] [logging.py:96:log_dist] [Rank 0] step=6030, skipped=76, lr=[1.7798753679578482e-06, 1.7798753679578482e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:29:00,195] [INFO] [timer.py:199:stop] epoch=0/micro_step=6030/global_step=6030, RunningAvgSamplesPerSec=105.41748877018956, CurrSamplesPerSec=108.73361352979491, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:29:00,288] [INFO] [logging.py:96:log_dist] [Rank 0] step=6030, skipped=100, lr=[9.402007153887982e-07, 9.402007153887982e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6029|ppo_ep: 1|act_loss: -0.027130126953125|cri_loss: -0.0133209228515625|unsuper_loss: 0.0 +average reward score: 5.92578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.09%) |Training time=0.46s (21.28%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6030|ppo_ep: 1|act_loss: -0.023773193359375|cri_loss: -0.0114898681640625|unsuper_loss: 0.0 +average reward score: 5.6640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.26%) |Training time=0.46s (21.12%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6031|ppo_ep: 1|act_loss: 0.0023746490478515625|cri_loss: 0.0015802383422851562|unsuper_loss: 0.0 +average reward score: 4.90625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.11%) |Training time=0.46s (21.23%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6032|ppo_ep: 1|act_loss: -0.03857421875|cri_loss: -0.0188446044921875|unsuper_loss: 0.0 +average reward score: 5.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.22%) |Training time=0.46s (21.14%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6033|ppo_ep: 1|act_loss: -0.0029544830322265625|cri_loss: -0.0013036727905273438|unsuper_loss: 0.0 +average reward score: 5.45703125 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.36%) |Training time=0.47s (21.04%) |Others=0.12 (5.60%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6034|ppo_ep: 1|act_loss: -0.012664794921875|cri_loss: -0.006099700927734375|unsuper_loss: 0.0 +average reward score: 5.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.84%) |Training time=0.44s (20.21%) |Others=0.11 (4.94%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6035|ppo_ep: 1|act_loss: 0.019073486328125|cri_loss: 0.00981903076171875|unsuper_loss: 0.0 +average reward score: 5.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.75%) |Training time=0.44s (20.55%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6036|ppo_ep: 1|act_loss: 0.00531005859375|cri_loss: 0.00304412841796875|unsuper_loss: 0.0 +average reward score: 6.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.33%) |Training time=0.45s (21.00%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6037|ppo_ep: 1|act_loss: -0.00218963623046875|cri_loss: -0.0005550384521484375|unsuper_loss: 0.0 +average reward score: 5.1015625 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.52%) |Training time=0.44s (20.79%) |Others=0.10 (4.68%)|CurSamplesPerSec=15.03 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6038|ppo_ep: 1|act_loss: -0.0223236083984375|cri_loss: -0.01055908203125|unsuper_loss: 0.0 +average reward score: 5.62890625 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.35%) |Training time=0.45s (20.95%) |Others=0.10 (4.70%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.52 +[2023-04-14 12:29:21,698] [INFO] [logging.py:96:log_dist] [Rank 0] step=6040, skipped=76, lr=[1.765488590913881e-06, 1.765488590913881e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:29:21,716] [INFO] [timer.py:199:stop] epoch=0/micro_step=6040/global_step=6040, RunningAvgSamplesPerSec=105.42680483904256, CurrSamplesPerSec=113.44426751053996, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:29:21,809] [INFO] [logging.py:96:log_dist] [Rank 0] step=6040, skipped=100, lr=[9.326904852647345e-07, 9.326904852647345e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6039|ppo_ep: 1|act_loss: -0.0015201568603515625|cri_loss: -0.0006833076477050781|unsuper_loss: 0.0 +average reward score: 5.03515625 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.47%) |Training time=0.44s (20.87%) |Others=0.10 (4.67%)|CurSamplesPerSec=15.02 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6040|ppo_ep: 1|act_loss: -0.014862060546875|cri_loss: -0.007099151611328125|unsuper_loss: 0.0 +average reward score: 4.80078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.08%) |Training time=0.43s (20.07%) |Others=0.10 (4.85%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6041|ppo_ep: 1|act_loss: 0.0177154541015625|cri_loss: 0.009063720703125|unsuper_loss: 0.0 +average reward score: 5.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.45s (20.79%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6042|ppo_ep: 1|act_loss: -0.003208160400390625|cri_loss: -0.0014791488647460938|unsuper_loss: 0.0 +average reward score: 5.3828125 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (76.04%) |Training time=0.46s (19.65%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.79 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6043|ppo_ep: 1|act_loss: -0.0123443603515625|cri_loss: -0.006061553955078125|unsuper_loss: 0.0 +average reward score: 5.890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.47%) |Training time=0.45s (20.86%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6044|ppo_ep: 1|act_loss: 0.029998779296875|cri_loss: 0.0153350830078125|unsuper_loss: 0.0 +average reward score: 5.40625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.40%) |Training time=0.45s (20.92%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6045|ppo_ep: 1|act_loss: -0.00530242919921875|cri_loss: -0.002552032470703125|unsuper_loss: 0.0 +average reward score: 5.62890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.28%) |Training time=0.45s (21.05%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6046|ppo_ep: 1|act_loss: 0.0306396484375|cri_loss: 0.016571044921875|unsuper_loss: 0.0 +average reward score: 5.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.33%) |Training time=0.45s (21.01%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6047|ppo_ep: 1|act_loss: 0.002735137939453125|cri_loss: 0.001861572265625|unsuper_loss: 0.0 +average reward score: 4.98828125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.51%) |Training time=0.45s (20.81%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6048|ppo_ep: 1|act_loss: 0.006122589111328125|cri_loss: 0.0031719207763671875|unsuper_loss: 0.0 +average reward score: 4.75390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.31%) |Training time=0.45s (21.03%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52 +[2023-04-14 12:29:43,343] [INFO] [logging.py:96:log_dist] [Rank 0] step=6050, skipped=76, lr=[1.751147163209664e-06, 1.751147163209664e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:29:43,361] [INFO] [timer.py:199:stop] epoch=0/micro_step=6050/global_step=6050, RunningAvgSamplesPerSec=105.43758064958905, CurrSamplesPerSec=114.32953392591709, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:29:43,454] [INFO] [logging.py:96:log_dist] [Rank 0] step=6050, skipped=100, lr=[9.252034864486062e-07, 9.252034864486062e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6049|ppo_ep: 1|act_loss: -0.04718017578125|cri_loss: -0.023223876953125|unsuper_loss: 0.0 +average reward score: 5.21875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.72%) |Training time=0.44s (20.62%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6050|ppo_ep: 1|act_loss: -0.021270751953125|cri_loss: -0.01024627685546875|unsuper_loss: 0.0 +average reward score: 4.71484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.55%) |Training time=0.45s (20.78%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6051|ppo_ep: 1|act_loss: 0.01218414306640625|cri_loss: 0.00667572021484375|unsuper_loss: 0.0 +average reward score: 4.984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.46%) |Training time=0.45s (20.85%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6052|ppo_ep: 1|act_loss: -0.0228271484375|cri_loss: -0.0111236572265625|unsuper_loss: 0.0 +average reward score: 4.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.45s (21.14%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6053|ppo_ep: 1|act_loss: -0.0243682861328125|cri_loss: -0.011871337890625|unsuper_loss: 0.0 +average reward score: 5.25390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.46%) |Training time=0.45s (20.86%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6054|ppo_ep: 1|act_loss: 0.0071868896484375|cri_loss: 0.003875732421875|unsuper_loss: 0.0 +average reward score: 5.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.42%) |Training time=0.45s (20.91%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6055|ppo_ep: 1|act_loss: 0.035552978515625|cri_loss: 0.0187225341796875|unsuper_loss: 0.0 +average reward score: 5.453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.50%) |Training time=0.44s (20.61%) |Others=0.11 (4.88%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6056|ppo_ep: 1|act_loss: 0.043182373046875|cri_loss: 0.0221099853515625|unsuper_loss: 0.0 +average reward score: 5.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.96%) |Training time=0.44s (20.37%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6057|ppo_ep: 1|act_loss: 0.0064544677734375|cri_loss: 0.003429412841796875|unsuper_loss: 0.0 +average reward score: 4.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.96%) |Training time=0.47s (21.10%) |Others=0.15 (6.94%)|CurSamplesPerSec=14.42 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6058|ppo_ep: 1|act_loss: 0.0033817291259765625|cri_loss: 0.002197265625|unsuper_loss: 0.0 +average reward score: 4.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.35%) |Training time=0.46s (20.20%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.19 |AvgSamplesPerSec=14.52 +[2023-04-14 12:30:05,009] [INFO] [logging.py:96:log_dist] [Rank 0] step=6060, skipped=76, lr=[1.7368512974197543e-06, 1.7368512974197543e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:30:05,028] [INFO] [timer.py:199:stop] epoch=0/micro_step=6060/global_step=6060, RunningAvgSamplesPerSec=105.4466785255788, CurrSamplesPerSec=110.93914526975647, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:30:05,120] [INFO] [logging.py:96:log_dist] [Rank 0] step=6060, skipped=100, lr=[9.177398299157989e-07, 9.177398299157989e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6059|ppo_ep: 1|act_loss: 0.00881195068359375|cri_loss: 0.004749298095703125|unsuper_loss: 0.0 +average reward score: 4.671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.34%) |Training time=0.45s (21.02%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6060|ppo_ep: 1|act_loss: 0.02923583984375|cri_loss: 0.01485443115234375|unsuper_loss: 0.0 +average reward score: 5.3359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.46s (21.39%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6061|ppo_ep: 1|act_loss: -0.0119171142578125|cri_loss: -0.005054473876953125|unsuper_loss: 0.0 +average reward score: 4.85546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.10%) |Training time=0.46s (21.25%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6062|ppo_ep: 1|act_loss: -0.020172119140625|cri_loss: -0.00948333740234375|unsuper_loss: 0.0 +average reward score: 4.79296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.22%) |Training time=0.47s (21.92%) |Others=0.11 (4.87%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6063|ppo_ep: 1|act_loss: -0.0472412109375|cri_loss: -0.023162841796875|unsuper_loss: 0.0 +average reward score: 5.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.65s (72.69%) |Training time=0.52s (22.88%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.09 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6064|ppo_ep: 1|act_loss: 0.00022161006927490234|cri_loss: 0.0004191398620605469|unsuper_loss: 0.0 +average reward score: 5.671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.06%) |Training time=0.48s (22.30%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6065|ppo_ep: 1|act_loss: -0.0066070556640625|cri_loss: -0.002780914306640625|unsuper_loss: 0.0 +average reward score: 5.5 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.17%) |Training time=0.48s (22.18%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6066|ppo_ep: 1|act_loss: -0.01184844970703125|cri_loss: -0.005641937255859375|unsuper_loss: 0.0 +average reward score: 6.296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.26%) |Training time=0.48s (22.08%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6067|ppo_ep: 1|act_loss: 0.00704193115234375|cri_loss: 0.0038299560546875|unsuper_loss: 0.0 +average reward score: 6.09375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.62%) |Training time=0.47s (21.76%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6068|ppo_ep: 1|act_loss: 0.01067352294921875|cri_loss: 0.005550384521484375|unsuper_loss: 0.0 +average reward score: 5.70703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.10%) |Training time=0.48s (22.25%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52 +[2023-04-14 12:30:26,681] [INFO] [logging.py:96:log_dist] [Rank 0] step=6070, skipped=76, lr=[1.7226012054433704e-06, 1.7226012054433704e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:30:26,699] [INFO] [timer.py:199:stop] epoch=0/micro_step=6070/global_step=6070, RunningAvgSamplesPerSec=105.44069020433436, CurrSamplesPerSec=101.47307651888987, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:30:26,792] [INFO] [logging.py:96:log_dist] [Rank 0] step=6070, skipped=100, lr=[9.102996262957067e-07, 9.102996262957067e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6069|ppo_ep: 1|act_loss: 0.0227508544921875|cri_loss: 0.0118408203125|unsuper_loss: 0.0 +average reward score: 5.07421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.18%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6070|ppo_ep: 1|act_loss: 0.015289306640625|cri_loss: 0.00815582275390625|unsuper_loss: 0.0 +average reward score: 4.36328125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (21.86%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6071|ppo_ep: 1|act_loss: 0.0294189453125|cri_loss: 0.01490020751953125|unsuper_loss: 0.0 +average reward score: 4.921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.87%) |Training time=0.44s (20.45%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6072|ppo_ep: 1|act_loss: -0.010040283203125|cri_loss: -0.004913330078125|unsuper_loss: 0.0 +average reward score: 5.921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.25%) |Training time=0.48s (22.08%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6073|ppo_ep: 1|act_loss: 0.0377197265625|cri_loss: 0.01934814453125|unsuper_loss: 0.0 +average reward score: 5.796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.13%) |Training time=0.45s (21.18%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6074|ppo_ep: 1|act_loss: 0.00595855712890625|cri_loss: 0.003124237060546875|unsuper_loss: 0.0 +average reward score: 5.44140625 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.44%) |Training time=0.47s (20.24%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.80 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6075|ppo_ep: 1|act_loss: -0.0601806640625|cri_loss: -0.0290985107421875|unsuper_loss: 0.0 +average reward score: 4.6640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.94%) |Training time=0.44s (20.38%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6076|ppo_ep: 1|act_loss: 0.04913330078125|cri_loss: 0.02716064453125|unsuper_loss: 0.0 +average reward score: 4.61328125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.77%) |Training time=0.44s (20.57%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6077|ppo_ep: 1|act_loss: 0.01459503173828125|cri_loss: 0.00791168212890625|unsuper_loss: 0.0 +average reward score: 5.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.64%) |Training time=0.44s (20.70%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6078|ppo_ep: 1|act_loss: -0.014251708984375|cri_loss: -0.006336212158203125|unsuper_loss: 0.0 +average reward score: 5.8359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.94%) |Training time=0.44s (20.39%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52 +[2023-04-14 12:30:48,347] [INFO] [logging.py:96:log_dist] [Rank 0] step=6080, skipped=76, lr=[1.7083970985012522e-06, 1.7083970985012522e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:30:48,365] [INFO] [timer.py:199:stop] epoch=0/micro_step=6080/global_step=6080, RunningAvgSamplesPerSec=105.44891692173641, CurrSamplesPerSec=114.66817942601718, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:30:48,458] [INFO] [logging.py:96:log_dist] [Rank 0] step=6080, skipped=100, lr=[9.028829858700974e-07, 9.028829858700974e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6079|ppo_ep: 1|act_loss: -0.020721435546875|cri_loss: -0.01010894775390625|unsuper_loss: 0.0 +average reward score: 5.296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.84%) |Training time=0.44s (20.52%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6080|ppo_ep: 1|act_loss: -0.01166534423828125|cri_loss: -0.005489349365234375|unsuper_loss: 0.0 +average reward score: 5.03515625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.84%) |Training time=0.44s (20.50%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6081|ppo_ep: 1|act_loss: 0.0248565673828125|cri_loss: 0.012939453125|unsuper_loss: 0.0 +average reward score: 6.3203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.44%) |Training time=0.45s (20.91%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6082|ppo_ep: 1|act_loss: 0.0131683349609375|cri_loss: 0.0073394775390625|unsuper_loss: 0.0 +average reward score: 4.7109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.86%) |Training time=0.44s (20.47%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6083|ppo_ep: 1|act_loss: 0.021148681640625|cri_loss: 0.010986328125|unsuper_loss: 0.0 +average reward score: 6.01953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.68%) |Training time=0.44s (20.66%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6084|ppo_ep: 1|act_loss: -0.002887725830078125|cri_loss: -0.0010995864868164062|unsuper_loss: 0.0 +average reward score: 5.5859375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.76%) |Training time=0.44s (20.56%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6085|ppo_ep: 1|act_loss: 0.02294921875|cri_loss: 0.012542724609375|unsuper_loss: 0.0 +average reward score: 5.28125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.83%) |Training time=0.44s (20.47%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6086|ppo_ep: 1|act_loss: -0.003551483154296875|cri_loss: -0.0016345977783203125|unsuper_loss: 0.0 +average reward score: 6.015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.91%) |Training time=0.44s (20.40%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52 +[2023-04-14 12:31:05,617] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 6087|ppo_ep: 1|act_loss: 0.0240478515625|cri_loss: 0.0125885009765625|unsuper_loss: 0.0 +average reward score: 5.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.28%) |Training time=0.44s (20.50%) |Others=0.09 (4.22%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.52 +[2023-04-14 12:31:07,750] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 6088|ppo_ep: 1|act_loss: 0.035614013671875|cri_loss: 0.01824951171875|unsuper_loss: 0.0 +average reward score: 5.05859375 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.12%) |Training time=0.44s (20.68%) |Others=0.09 (4.20%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.52 +[2023-04-14 12:31:09,949] [INFO] [logging.py:96:log_dist] [Rank 0] step=6090, skipped=76, lr=[1.6942391871325284e-06, 1.6942391871325284e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:31:09,968] [INFO] [timer.py:199:stop] epoch=0/micro_step=6090/global_step=6090, RunningAvgSamplesPerSec=105.46141074976802, CurrSamplesPerSec=103.6843468130931, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:31:10,060] [INFO] [logging.py:96:log_dist] [Rank 0] step=6090, skipped=102, lr=[8.969667129160547e-07, 8.969667129160547e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6089|ppo_ep: 1|act_loss: -0.011199951171875|cri_loss: -0.005390167236328125|unsuper_loss: 0.0 +average reward score: 5.703125 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.73s (74.93%) |Training time=0.47s (20.43%) |Others=0.11 (4.64%)|CurSamplesPerSec=13.84 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6090|ppo_ep: 1|act_loss: -0.021484375|cri_loss: -0.01032257080078125|unsuper_loss: 0.0 +average reward score: 5.59375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.71%) |Training time=0.44s (20.63%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6091|ppo_ep: 1|act_loss: -0.01059722900390625|cri_loss: -0.0050811767578125|unsuper_loss: 0.0 +average reward score: 4.9921875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.90%) |Training time=0.44s (20.43%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6092|ppo_ep: 1|act_loss: -0.02288818359375|cri_loss: -0.01100921630859375|unsuper_loss: 0.0 +average reward score: 4.60546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.33%) |Training time=0.43s (20.02%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6093|ppo_ep: 1|act_loss: 0.01059722900390625|cri_loss: 0.00543975830078125|unsuper_loss: 0.0 +average reward score: 5.87890625 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.77%) |Training time=0.45s (19.79%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.23 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6094|ppo_ep: 1|act_loss: 0.0088653564453125|cri_loss: 0.0054931640625|unsuper_loss: 0.0 +average reward score: 5.05859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.66%) |Training time=0.44s (20.68%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6095|ppo_ep: 1|act_loss: 0.002613067626953125|cri_loss: 0.0014209747314453125|unsuper_loss: 0.0 +average reward score: 5.1796875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.98%) |Training time=0.44s (20.33%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6096|ppo_ep: 1|act_loss: -0.00624847412109375|cri_loss: -0.0026302337646484375|unsuper_loss: 0.0 +average reward score: 4.890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.66%) |Training time=0.45s (20.70%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6097|ppo_ep: 1|act_loss: 0.03790283203125|cri_loss: 0.0205535888671875|unsuper_loss: 0.0 +average reward score: 5.671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.72%) |Training time=0.44s (20.63%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6098|ppo_ep: 1|act_loss: -0.00748443603515625|cri_loss: -0.00344085693359375|unsuper_loss: 0.0 +average reward score: 5.6015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.73%) |Training time=0.44s (20.59%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52 +[2023-04-14 12:31:31,534] [INFO] [logging.py:96:log_dist] [Rank 0] step=6100, skipped=76, lr=[1.6801276811916005e-06, 1.6801276811916005e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:31:31,552] [INFO] [timer.py:199:stop] epoch=0/micro_step=6100/global_step=6100, RunningAvgSamplesPerSec=105.47637239879195, CurrSamplesPerSec=115.30227455669898, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:31:31,645] [INFO] [logging.py:96:log_dist] [Rank 0] step=6100, skipped=102, lr=[8.895927630346729e-07, 8.895927630346729e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6099|ppo_ep: 1|act_loss: -0.0009832382202148438|cri_loss: -2.765655517578125e-05|unsuper_loss: 0.0 +average reward score: 5.52734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.79%) |Training time=0.44s (20.57%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6100|ppo_ep: 1|act_loss: 0.0250396728515625|cri_loss: 0.012725830078125|unsuper_loss: 0.0 +average reward score: 4.625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.43%) |Training time=0.45s (20.94%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6101|ppo_ep: 1|act_loss: 0.025787353515625|cri_loss: 0.01318359375|unsuper_loss: 0.0 +average reward score: 5.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.52%) |Training time=0.43s (19.81%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6102|ppo_ep: 1|act_loss: 0.00839996337890625|cri_loss: 0.0043182373046875|unsuper_loss: 0.0 +average reward score: 5.15234375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.46%) |Training time=0.43s (19.83%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6103|ppo_ep: 1|act_loss: -0.0005388259887695312|cri_loss: 0.00017642974853515625|unsuper_loss: 0.0 +average reward score: 5.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.88%) |Training time=0.44s (20.47%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6104|ppo_ep: 1|act_loss: 0.0017223358154296875|cri_loss: 0.001003265380859375|unsuper_loss: 0.0 +average reward score: 4.6953125 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.70s (73.49%) |Training time=0.51s (22.14%) |Others=0.10 (4.37%)|CurSamplesPerSec=13.86 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6105|ppo_ep: 1|act_loss: 0.0120697021484375|cri_loss: 0.00655364990234375|unsuper_loss: 0.0 +average reward score: 5.53515625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.03%) |Training time=0.44s (20.32%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6106|ppo_ep: 1|act_loss: -0.000453948974609375|cri_loss: 0.0004172325134277344|unsuper_loss: 0.0 +average reward score: 5.02734375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.64s (76.35%) |Training time=0.41s (18.98%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6107|ppo_ep: 1|act_loss: 0.0012788772583007812|cri_loss: 0.0009832382202148438|unsuper_loss: 0.0 +average reward score: 5.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.86%) |Training time=0.42s (19.38%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6108|ppo_ep: 1|act_loss: -0.0181121826171875|cri_loss: -0.0089263916015625|unsuper_loss: 0.0 +average reward score: 5.05859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.97%) |Training time=0.44s (20.36%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52 +[2023-04-14 12:31:53,178] [INFO] [logging.py:96:log_dist] [Rank 0] step=6110, skipped=76, lr=[1.666062789845028e-06, 1.666062789845028e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:31:53,197] [INFO] [timer.py:199:stop] epoch=0/micro_step=6110/global_step=6110, RunningAvgSamplesPerSec=105.49268024048469, CurrSamplesPerSec=120.01878555601061, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:31:53,289] [INFO] [logging.py:96:log_dist] [Rank 0] step=6110, skipped=102, lr=[8.822426832734388e-07, 8.822426832734388e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6109|ppo_ep: 1|act_loss: 0.03955078125|cri_loss: 0.0203399658203125|unsuper_loss: 0.0 +average reward score: 5.234375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.27%) |Training time=0.43s (20.06%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6110|ppo_ep: 1|act_loss: -0.0210723876953125|cri_loss: -0.01020050048828125|unsuper_loss: 0.0 +average reward score: 6.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.69%) |Training time=0.44s (20.64%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6111|ppo_ep: 1|act_loss: -0.01251220703125|cri_loss: -0.005100250244140625|unsuper_loss: 0.0 +average reward score: 5.03125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.78%) |Training time=0.44s (20.57%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6112|ppo_ep: 1|act_loss: -0.00618743896484375|cri_loss: -0.002872467041015625|unsuper_loss: 0.0 +average reward score: 5.04296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.10%) |Training time=0.43s (20.21%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6113|ppo_ep: 1|act_loss: 0.021453857421875|cri_loss: 0.0109710693359375|unsuper_loss: 0.0 +average reward score: 4.69921875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.97%) |Training time=0.44s (20.36%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6114|ppo_ep: 1|act_loss: -0.00036072731018066406|cri_loss: 3.933906555175781e-05|unsuper_loss: 0.0 +average reward score: 5.62890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.72%) |Training time=0.44s (20.60%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6115|ppo_ep: 1|act_loss: -0.0131378173828125|cri_loss: -0.006378173828125|unsuper_loss: 0.0 +average reward score: 5.27734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.92%) |Training time=0.44s (20.41%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6116|ppo_ep: 1|act_loss: 0.027069091796875|cri_loss: 0.013885498046875|unsuper_loss: 0.0 +average reward score: 4.9609375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.90%) |Training time=0.44s (20.42%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6117|ppo_ep: 1|act_loss: 0.03656005859375|cri_loss: 0.019195556640625|unsuper_loss: 0.0 +average reward score: 5.3828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.26%) |Training time=0.46s (21.11%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6118|ppo_ep: 1|act_loss: -0.01161956787109375|cri_loss: -0.00482177734375|unsuper_loss: 0.0 +average reward score: 5.4453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.63%) |Training time=0.45s (20.71%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.52 +[2023-04-14 12:32:14,701] [INFO] [logging.py:96:log_dist] [Rank 0] step=6120, skipped=76, lr=[1.6520447215684334e-06, 1.6520447215684334e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:32:14,705] [INFO] [timer.py:199:stop] epoch=0/micro_step=6120/global_step=6120, RunningAvgSamplesPerSec=105.50570028757365, CurrSamplesPerSec=111.2033127913316, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:32:14,868] [INFO] [logging.py:96:log_dist] [Rank 0] step=6120, skipped=102, lr=[8.749165825782657e-07, 8.749165825782657e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6119|ppo_ep: 1|act_loss: -0.0025386810302734375|cri_loss: -0.0011510848999023438|unsuper_loss: 0.0 +average reward score: 5.0234375 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.14%) |Training time=0.48s (21.48%) |Others=0.14 (6.38%)|CurSamplesPerSec=14.36 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6120|ppo_ep: 1|act_loss: -0.005218505859375|cri_loss: -0.0017795562744140625|unsuper_loss: 0.0 +average reward score: 5.49609375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.63%) |Training time=0.45s (20.72%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6121|ppo_ep: 1|act_loss: 0.0121917724609375|cri_loss: 0.00702667236328125|unsuper_loss: 0.0 +average reward score: 5.09375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.87%) |Training time=0.44s (20.49%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6122|ppo_ep: 1|act_loss: -0.012176513671875|cri_loss: -0.00586700439453125|unsuper_loss: 0.0 +average reward score: 5.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.87%) |Training time=0.45s (20.53%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6123|ppo_ep: 1|act_loss: -0.003131866455078125|cri_loss: -0.0015077590942382812|unsuper_loss: 0.0 +average reward score: 5.08203125 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.78%) |Training time=0.45s (19.78%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6124|ppo_ep: 1|act_loss: 0.003070831298828125|cri_loss: 0.002010345458984375|unsuper_loss: 0.0 +average reward score: 5.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.59%) |Training time=0.45s (20.78%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6125|ppo_ep: 1|act_loss: -0.0011262893676757812|cri_loss: -7.152557373046875e-05|unsuper_loss: 0.0 +average reward score: 5.19921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.95%) |Training time=0.44s (20.38%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6126|ppo_ep: 1|act_loss: 0.0059051513671875|cri_loss: 0.003509521484375|unsuper_loss: 0.0 +average reward score: 4.73828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.75%) |Training time=0.44s (20.61%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6127|ppo_ep: 1|act_loss: -0.0007395744323730469|cri_loss: -0.0002562999725341797|unsuper_loss: 0.0 +average reward score: 4.9921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.55%) |Training time=0.45s (20.82%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6128|ppo_ep: 1|act_loss: 0.0227203369140625|cri_loss: 0.0115966796875|unsuper_loss: 0.0 +average reward score: 6.3515625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.71%) |Training time=0.44s (20.63%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52 +[2023-04-14 12:32:36,432] [INFO] [logging.py:96:log_dist] [Rank 0] step=6130, skipped=76, lr=[1.6380736841434013e-06, 1.6380736841434013e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:32:36,450] [INFO] [timer.py:199:stop] epoch=0/micro_step=6130/global_step=6130, RunningAvgSamplesPerSec=105.5171589615916, CurrSamplesPerSec=112.04278099865182, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:32:36,542] [INFO] [logging.py:96:log_dist] [Rank 0] step=6130, skipped=102, lr=[8.676145695396399e-07, 8.676145695396399e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6129|ppo_ep: 1|act_loss: -0.01308441162109375|cri_loss: -0.006229400634765625|unsuper_loss: 0.0 +average reward score: 5.44921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.58%) |Training time=0.45s (20.81%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6130|ppo_ep: 1|act_loss: 0.003566741943359375|cri_loss: 0.00501251220703125|unsuper_loss: 0.0 +average reward score: 4.9453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.52%) |Training time=0.45s (20.82%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6131|ppo_ep: 1|act_loss: -0.010467529296875|cri_loss: -0.005153656005859375|unsuper_loss: 0.0 +average reward score: 6.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.16%) |Training time=0.43s (20.14%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6132|ppo_ep: 1|act_loss: 0.0159759521484375|cri_loss: 0.00836181640625|unsuper_loss: 0.0 +average reward score: 5.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.47s (21.90%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6133|ppo_ep: 1|act_loss: 0.01029205322265625|cri_loss: 0.00539398193359375|unsuper_loss: 0.0 +average reward score: 4.8046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.15%) |Training time=0.45s (21.18%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6134|ppo_ep: 1|act_loss: 0.028656005859375|cri_loss: 0.01476287841796875|unsuper_loss: 0.0 +average reward score: 5.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.57s (69.15%) |Training time=0.60s (26.43%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.06 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6135|ppo_ep: 1|act_loss: 0.00274658203125|cri_loss: 0.0015954971313476562|unsuper_loss: 0.0 +average reward score: 5.375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.51%) |Training time=0.47s (21.83%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6136|ppo_ep: 1|act_loss: -0.020721435546875|cri_loss: -0.00540924072265625|unsuper_loss: 0.0 +average reward score: 5.40234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.09%) |Training time=0.48s (22.28%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6137|ppo_ep: 1|act_loss: -0.032318115234375|cri_loss: -0.015716552734375|unsuper_loss: 0.0 +average reward score: 4.9296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.13%) |Training time=0.48s (22.21%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6138|ppo_ep: 1|act_loss: 0.0096435546875|cri_loss: 0.005260467529296875|unsuper_loss: 0.0 +average reward score: 5.15625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.47s (21.95%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52 +[2023-04-14 12:32:58,076] [INFO] [logging.py:96:log_dist] [Rank 0] step=6140, skipped=76, lr=[1.6241498846544112e-06, 1.6241498846544112e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:32:58,094] [INFO] [timer.py:199:stop] epoch=0/micro_step=6140/global_step=6140, RunningAvgSamplesPerSec=105.51132018702846, CurrSamplesPerSec=102.04126119112495, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:32:58,188] [INFO] [logging.py:96:log_dist] [Rank 0] step=6140, skipped=102, lr=[8.60336752391008e-07, 8.60336752391008e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6139|ppo_ep: 1|act_loss: 0.007755279541015625|cri_loss: 0.004730224609375|unsuper_loss: 0.0 +average reward score: 5.875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.57s (73.17%) |Training time=0.48s (22.20%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6140|ppo_ep: 1|act_loss: 0.006702423095703125|cri_loss: 0.0040435791015625|unsuper_loss: 0.0 +average reward score: 5.20703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.24%) |Training time=0.48s (22.09%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6141|ppo_ep: 1|act_loss: -0.02569580078125|cri_loss: -0.012451171875|unsuper_loss: 0.0 +average reward score: 5.828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.66%) |Training time=0.47s (21.73%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6142|ppo_ep: 1|act_loss: -0.0069122314453125|cri_loss: -0.00293731689453125|unsuper_loss: 0.0 +average reward score: 5.015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.14%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6143|ppo_ep: 1|act_loss: 0.0230712890625|cri_loss: 0.0119781494140625|unsuper_loss: 0.0 +average reward score: 5.76171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.88%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6144|ppo_ep: 1|act_loss: 0.0186614990234375|cri_loss: 0.0100250244140625|unsuper_loss: 0.0 +average reward score: 6.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.11%) |Training time=0.48s (22.25%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6145|ppo_ep: 1|act_loss: -0.0276031494140625|cri_loss: -0.01342010498046875|unsuper_loss: 0.0 +average reward score: 5.58984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.87%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6146|ppo_ep: 1|act_loss: -0.0023174285888671875|cri_loss: -0.0010881423950195312|unsuper_loss: 0.0 +average reward score: 5.1015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.28%) |Training time=0.48s (22.05%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6147|ppo_ep: 1|act_loss: -0.05145263671875|cri_loss: -0.0251922607421875|unsuper_loss: 0.0 +average reward score: 5.57421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.74%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6148|ppo_ep: 1|act_loss: 0.0002200603485107422|cri_loss: 0.0001798868179321289|unsuper_loss: 0.0 +average reward score: 5.83203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.12%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.52 +[2023-04-14 12:33:19,694] [INFO] [logging.py:96:log_dist] [Rank 0] step=6150, skipped=76, lr=[1.6102735294857612e-06, 1.6102735294857612e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:33:19,711] [INFO] [timer.py:199:stop] epoch=0/micro_step=6150/global_step=6150, RunningAvgSamplesPerSec=105.50358234499453, CurrSamplesPerSec=88.18394502043337, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:33:19,859] [INFO] [logging.py:96:log_dist] [Rank 0] step=6150, skipped=102, lr=[8.530832390071778e-07, 8.530832390071778e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6149|ppo_ep: 1|act_loss: 0.0155792236328125|cri_loss: 0.008056640625|unsuper_loss: 0.0 +average reward score: 5.6640625 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.58s (69.94%) |Training time=0.53s (23.28%) |Others=0.15 (6.79%)|CurSamplesPerSec=14.17 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6150|ppo_ep: 1|act_loss: -0.014556884765625|cri_loss: -0.00690460205078125|unsuper_loss: 0.0 +average reward score: 5.390625 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.53%) |Training time=0.48s (21.80%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6151|ppo_ep: 1|act_loss: -0.001575469970703125|cri_loss: -0.0005412101745605469|unsuper_loss: 0.0 +average reward score: 5.90625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.22%) |Training time=0.42s (19.07%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6152|ppo_ep: 1|act_loss: 0.0008678436279296875|cri_loss: 0.0005321502685546875|unsuper_loss: 0.0 +average reward score: 4.9375 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.69s (75.80%) |Training time=0.43s (19.49%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.38 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6153|ppo_ep: 1|act_loss: 0.0113983154296875|cri_loss: 0.0059967041015625|unsuper_loss: 0.0 +average reward score: 5.484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.21%) |Training time=0.43s (20.03%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6154|ppo_ep: 1|act_loss: 0.027252197265625|cri_loss: 0.01384735107421875|unsuper_loss: 0.0 +average reward score: 5.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.07%) |Training time=0.46s (21.25%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6155|ppo_ep: 1|act_loss: 0.01102447509765625|cri_loss: 0.00586700439453125|unsuper_loss: 0.0 +average reward score: 5.109375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.58%) |Training time=0.45s (20.70%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6156|ppo_ep: 1|act_loss: -0.01934814453125|cri_loss: -0.0091705322265625|unsuper_loss: 0.0 +average reward score: 5.5859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.83%) |Training time=0.44s (20.44%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6157|ppo_ep: 1|act_loss: -0.01885986328125|cri_loss: -0.00922393798828125|unsuper_loss: 0.0 +average reward score: 5.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.70%) |Training time=0.43s (19.58%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6158|ppo_ep: 1|act_loss: -0.0258026123046875|cri_loss: -0.01213836669921875|unsuper_loss: 0.0 +average reward score: 5.41796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.72%) |Training time=0.47s (21.64%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.52 +[2023-04-14 12:33:41,552] [INFO] [logging.py:96:log_dist] [Rank 0] step=6160, skipped=76, lr=[1.5964448243185107e-06, 1.5964448243185107e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:33:41,570] [INFO] [timer.py:199:stop] epoch=0/micro_step=6160/global_step=6160, RunningAvgSamplesPerSec=105.51397045659475, CurrSamplesPerSec=105.2155795695511, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:33:41,663] [INFO] [logging.py:96:log_dist] [Rank 0] step=6160, skipped=102, lr=[8.458541369027159e-07, 8.458541369027159e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6159|ppo_ep: 1|act_loss: 0.0109405517578125|cri_loss: 0.005832672119140625|unsuper_loss: 0.0 +average reward score: 4.83984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.83%) |Training time=0.47s (21.55%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6160|ppo_ep: 1|act_loss: 0.0102691650390625|cri_loss: 0.005405426025390625|unsuper_loss: 0.0 +average reward score: 5.4453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.32%) |Training time=0.48s (22.04%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6161|ppo_ep: 1|act_loss: -0.024322509765625|cri_loss: -0.01200103759765625|unsuper_loss: 0.0 +average reward score: 5.23046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.62%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6162|ppo_ep: 1|act_loss: -0.0107574462890625|cri_loss: -0.00484466552734375|unsuper_loss: 0.0 +average reward score: 6.30078125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.00%) |Training time=0.44s (20.34%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6163|ppo_ep: 1|act_loss: 0.029083251953125|cri_loss: 0.015167236328125|unsuper_loss: 0.0 +average reward score: 5.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.81%) |Training time=0.42s (19.54%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6164|ppo_ep: 1|act_loss: 0.0117950439453125|cri_loss: 0.00669097900390625|unsuper_loss: 0.0 +average reward score: 5.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.53%) |Training time=0.45s (20.84%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6165|ppo_ep: 1|act_loss: 0.02691650390625|cri_loss: 0.01380157470703125|unsuper_loss: 0.0 +average reward score: 5.77734375 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.71%) |Training time=0.49s (20.97%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6166|ppo_ep: 1|act_loss: 0.00437164306640625|cri_loss: 0.00235748291015625|unsuper_loss: 0.0 +average reward score: 5.08203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.49%) |Training time=0.45s (20.88%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6167|ppo_ep: 1|act_loss: -0.00687408447265625|cri_loss: -0.0029582977294921875|unsuper_loss: 0.0 +average reward score: 5.45703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.76%) |Training time=0.45s (20.59%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6168|ppo_ep: 1|act_loss: 0.017120361328125|cri_loss: 0.0088958740234375|unsuper_loss: 0.0 +average reward score: 5.484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.53%) |Training time=0.45s (20.81%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.52 +[2023-04-14 12:34:03,411] [INFO] [logging.py:96:log_dist] [Rank 0] step=6170, skipped=76, lr=[1.5826639741274258e-06, 1.5826639741274258e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:34:03,430] [INFO] [timer.py:199:stop] epoch=0/micro_step=6170/global_step=6170, RunningAvgSamplesPerSec=105.5204895216476, CurrSamplesPerSec=110.71758783026316, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:34:03,522] [INFO] [logging.py:96:log_dist] [Rank 0] step=6170, skipped=102, lr=[8.386495532303557e-07, 8.386495532303557e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6169|ppo_ep: 1|act_loss: -0.0137786865234375|cri_loss: -0.006626129150390625|unsuper_loss: 0.0 +average reward score: 5.09375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.70%) |Training time=0.45s (20.70%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6170|ppo_ep: 1|act_loss: -0.0162811279296875|cri_loss: -0.00791168212890625|unsuper_loss: 0.0 +average reward score: 5.17578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.03%) |Training time=0.44s (20.29%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6171|ppo_ep: 1|act_loss: -0.020263671875|cri_loss: -0.0099029541015625|unsuper_loss: 0.0 +average reward score: 5.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.14%) |Training time=0.44s (20.18%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6172|ppo_ep: 1|act_loss: -0.026458740234375|cri_loss: -0.00772857666015625|unsuper_loss: 0.0 +average reward score: 5.36328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.13%) |Training time=0.44s (20.22%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6173|ppo_ep: 1|act_loss: -0.0079498291015625|cri_loss: -0.003582000732421875|unsuper_loss: 0.0 +average reward score: 5.44921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.18%) |Training time=0.44s (20.17%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6174|ppo_ep: 1|act_loss: -0.00720977783203125|cri_loss: -0.003360748291015625|unsuper_loss: 0.0 +average reward score: 4.85546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.95%) |Training time=0.44s (20.40%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6175|ppo_ep: 1|act_loss: 0.0032806396484375|cri_loss: 0.0018138885498046875|unsuper_loss: 0.0 +average reward score: 6.015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.95%) |Training time=0.44s (20.39%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6176|ppo_ep: 1|act_loss: 0.010406494140625|cri_loss: 0.005847930908203125|unsuper_loss: 0.0 +average reward score: 5.75 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.63%) |Training time=0.45s (20.71%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6177|ppo_ep: 1|act_loss: 0.001491546630859375|cri_loss: 0.0009002685546875|unsuper_loss: 0.0 +average reward score: 5.765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.40%) |Training time=0.45s (20.66%) |Others=0.11 (4.94%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6178|ppo_ep: 1|act_loss: 0.00827789306640625|cri_loss: 0.004497528076171875|unsuper_loss: 0.0 +average reward score: 5.4296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.27%) |Training time=0.46s (21.11%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.52 +[2023-04-14 12:34:25,037] [INFO] [logging.py:96:log_dist] [Rank 0] step=6180, skipped=76, lr=[1.5689311831779562e-06, 1.5689311831779562e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:34:25,056] [INFO] [timer.py:199:stop] epoch=0/micro_step=6180/global_step=6180, RunningAvgSamplesPerSec=105.53333925269833, CurrSamplesPerSec=109.81793903372979, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:34:25,148] [INFO] [logging.py:96:log_dist] [Rank 0] step=6180, skipped=102, lr=[8.314695947794054e-07, 8.314695947794054e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6179|ppo_ep: 1|act_loss: 0.0247955322265625|cri_loss: 0.012725830078125|unsuper_loss: 0.0 +average reward score: 4.7109375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.36%) |Training time=0.46s (21.04%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6180|ppo_ep: 1|act_loss: -0.02691650390625|cri_loss: -0.0132904052734375|unsuper_loss: 0.0 +average reward score: 5.65234375 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.63s (71.82%) |Training time=0.53s (23.42%) |Others=0.11 (4.76%)|CurSamplesPerSec=14.07 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6181|ppo_ep: 1|act_loss: -0.01049041748046875|cri_loss: -0.004871368408203125|unsuper_loss: 0.0 +average reward score: 5.95703125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.51%) |Training time=0.46s (20.91%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6182|ppo_ep: 1|act_loss: -8.463859558105469e-05|cri_loss: 0.00012814998626708984|unsuper_loss: 0.0 +average reward score: 5.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.72%) |Training time=0.45s (19.84%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6183|ppo_ep: 1|act_loss: -0.0137481689453125|cri_loss: -0.00630950927734375|unsuper_loss: 0.0 +average reward score: 4.7421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.68%) |Training time=0.45s (20.69%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6184|ppo_ep: 1|act_loss: -0.0121002197265625|cri_loss: -0.00557708740234375|unsuper_loss: 0.0 +average reward score: 4.9453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.58%) |Training time=0.45s (20.78%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6185|ppo_ep: 1|act_loss: 0.01291656494140625|cri_loss: 0.00736236572265625|unsuper_loss: 0.0 +average reward score: 5.94921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.87%) |Training time=0.44s (20.50%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6186|ppo_ep: 1|act_loss: 0.007572174072265625|cri_loss: 0.00428009033203125|unsuper_loss: 0.0 +average reward score: 5.45703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.82%) |Training time=0.44s (20.51%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6187|ppo_ep: 1|act_loss: 0.01485443115234375|cri_loss: 0.0076904296875|unsuper_loss: 0.0 +average reward score: 5.8828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.86%) |Training time=0.44s (20.49%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6188|ppo_ep: 1|act_loss: 0.02923583984375|cri_loss: 0.01490020751953125|unsuper_loss: 0.0 +average reward score: 5.69921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.55%) |Training time=0.45s (20.82%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.52 +[2023-04-14 12:34:46,845] [INFO] [logging.py:96:log_dist] [Rank 0] step=6190, skipped=76, lr=[1.5552466550231917e-06, 1.5552466550231917e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:34:46,863] [INFO] [timer.py:199:stop] epoch=0/micro_step=6190/global_step=6190, RunningAvgSamplesPerSec=105.54164914219182, CurrSamplesPerSec=115.26355799601869, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:34:46,947] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +[2023-04-14 12:34:46,947] [INFO] [logging.py:96:log_dist] [Rank 0] step=6190, skipped=103, lr=[8.25028774705112e-07, 8.25028774705112e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6189|ppo_ep: 1|act_loss: 0.0374755859375|cri_loss: 0.0200653076171875|unsuper_loss: 0.0 +average reward score: 4.6484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.31%) |Training time=0.44s (20.52%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52 +[2023-04-14 12:34:49,092] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 6190|ppo_ep: 1|act_loss: 0.0126953125|cri_loss: 0.00658416748046875|unsuper_loss: 0.0 +average reward score: 5.40625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.20%) |Training time=0.44s (20.63%) |Others=0.09 (4.17%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6191|ppo_ep: 1|act_loss: 0.0193328857421875|cri_loss: 0.01024627685546875|unsuper_loss: 0.0 +average reward score: 5.68359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.85%) |Training time=0.44s (20.51%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6192|ppo_ep: 1|act_loss: -0.034759521484375|cri_loss: -0.0170745849609375|unsuper_loss: 0.0 +average reward score: 5.2109375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.49%) |Training time=0.45s (20.85%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6193|ppo_ep: 1|act_loss: 0.015655517578125|cri_loss: 0.00811004638671875|unsuper_loss: 0.0 +average reward score: 4.49609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.89%) |Training time=0.44s (20.47%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6194|ppo_ep: 1|act_loss: -0.0287933349609375|cri_loss: -0.0138702392578125|unsuper_loss: 0.0 +average reward score: 4.96484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.39%) |Training time=0.43s (19.91%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6195|ppo_ep: 1|act_loss: 0.029052734375|cri_loss: 0.018585205078125|unsuper_loss: 0.0 +average reward score: 5.20703125 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.08%) |Training time=0.45s (20.21%) |Others=0.15 (6.72%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6196|ppo_ep: 1|act_loss: -0.021270751953125|cri_loss: -0.0104522705078125|unsuper_loss: 0.0 +average reward score: 5.13671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.92%) |Training time=0.44s (20.37%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6197|ppo_ep: 1|act_loss: 0.0007162094116210938|cri_loss: 0.0006031990051269531|unsuper_loss: 0.0 +average reward score: 5.33984375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.91%) |Training time=0.44s (20.40%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6198|ppo_ep: 1|act_loss: -0.003345489501953125|cri_loss: -0.0012874603271484375|unsuper_loss: 0.0 +average reward score: 5.40234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.81%) |Training time=0.44s (20.51%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52 +[2023-04-14 12:35:08,385] [INFO] [logging.py:96:log_dist] [Rank 0] step=6200, skipped=76, lr=[1.5416105925008481e-06, 1.5416105925008481e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:35:08,403] [INFO] [timer.py:199:stop] epoch=0/micro_step=6200/global_step=6200, RunningAvgSamplesPerSec=105.5565439468875, CurrSamplesPerSec=118.04580662921714, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:35:08,496] [INFO] [logging.py:96:log_dist] [Rank 0] step=6200, skipped=104, lr=[8.186080645980449e-07, 8.186080645980449e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6199|ppo_ep: 1|act_loss: 0.0003528594970703125|cri_loss: 0.0007967948913574219|unsuper_loss: 0.0 +average reward score: 5.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.09%) |Training time=0.43s (20.24%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6200|ppo_ep: 1|act_loss: -0.003871917724609375|cri_loss: -0.00183868408203125|unsuper_loss: 0.0 +average reward score: 5.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.73%) |Training time=0.44s (20.62%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6201|ppo_ep: 1|act_loss: 0.004852294921875|cri_loss: 0.0026836395263671875|unsuper_loss: 0.0 +average reward score: 5.0859375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.91%) |Training time=0.44s (20.42%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6202|ppo_ep: 1|act_loss: -0.002704620361328125|cri_loss: -0.00090789794921875|unsuper_loss: 0.0 +average reward score: 5.484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.78%) |Training time=0.44s (20.55%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6203|ppo_ep: 1|act_loss: -9.34600830078125e-05|cri_loss: 5.0902366638183594e-05|unsuper_loss: 0.0 +average reward score: 5.6015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.49%) |Training time=0.45s (20.88%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6204|ppo_ep: 1|act_loss: -0.00811004638671875|cri_loss: -0.003986358642578125|unsuper_loss: 0.0 +average reward score: 6.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.71%) |Training time=0.44s (20.62%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6205|ppo_ep: 1|act_loss: -0.014251708984375|cri_loss: -0.006908416748046875|unsuper_loss: 0.0 +average reward score: 5.078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.14%) |Training time=0.45s (20.65%) |Others=0.11 (5.21%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6206|ppo_ep: 1|act_loss: -0.0167236328125|cri_loss: -0.00799560546875|unsuper_loss: 0.0 +average reward score: 5.50390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.64%) |Training time=0.44s (20.64%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6207|ppo_ep: 1|act_loss: 0.020660400390625|cri_loss: 0.01129913330078125|unsuper_loss: 0.0 +average reward score: 5.0234375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.60%) |Training time=0.47s (21.28%) |Others=0.11 (5.12%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6208|ppo_ep: 1|act_loss: -0.0110626220703125|cri_loss: -0.005298614501953125|unsuper_loss: 0.0 +average reward score: 4.8359375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.20%) |Training time=0.42s (19.07%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.52 +[2023-04-14 12:35:30,005] [INFO] [logging.py:96:log_dist] [Rank 0] step=6210, skipped=76, lr=[1.5280231977302697e-06, 1.5280231977302697e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:35:30,023] [INFO] [timer.py:199:stop] epoch=0/micro_step=6210/global_step=6210, RunningAvgSamplesPerSec=105.569428626769, CurrSamplesPerSec=115.34894721463071, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:35:30,115] [INFO] [logging.py:96:log_dist] [Rank 0] step=6210, skipped=104, lr=[8.114976217732126e-07, 8.114976217732126e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6209|ppo_ep: 1|act_loss: -0.007843017578125|cri_loss: -0.0037384033203125|unsuper_loss: 0.0 +average reward score: 4.86328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.97%) |Training time=0.44s (20.37%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6210|ppo_ep: 1|act_loss: 0.001140594482421875|cri_loss: 0.0009160041809082031|unsuper_loss: 0.0 +average reward score: 5.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.79s (75.64%) |Training time=0.48s (20.07%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.50 |AvgSamplesPerSec=14.52 +[2023-04-14 12:35:34,546] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 6211|ppo_ep: 1|act_loss: 0.00507354736328125|cri_loss: 0.0026645660400390625|unsuper_loss: 0.0 +average reward score: 5.3515625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.66s (77.26%) |Training time=0.39s (18.05%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.52 +[2023-04-14 12:35:36,778] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 6212|ppo_ep: 1|act_loss: 0.0276031494140625|cri_loss: 0.0142974853515625|unsuper_loss: 0.0 +average reward score: 5.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.75s (78.19%) |Training time=0.39s (17.28%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.34 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6213|ppo_ep: 1|act_loss: -0.02130126953125|cri_loss: -0.01049041748046875|unsuper_loss: 0.0 +average reward score: 5.25390625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.42%) |Training time=0.45s (20.84%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6214|ppo_ep: 1|act_loss: -0.0183563232421875|cri_loss: -0.00872039794921875|unsuper_loss: 0.0 +average reward score: 5.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.28%) |Training time=0.43s (19.99%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6215|ppo_ep: 1|act_loss: 0.0140380859375|cri_loss: 0.007381439208984375|unsuper_loss: 0.0 +average reward score: 5.203125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.41%) |Training time=0.45s (20.87%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6216|ppo_ep: 1|act_loss: 0.002777099609375|cri_loss: 0.0017156600952148438|unsuper_loss: 0.0 +average reward score: 5.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.76%) |Training time=0.44s (20.52%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6217|ppo_ep: 1|act_loss: 0.0430908203125|cri_loss: 0.0218505859375|unsuper_loss: 0.0 +average reward score: 5.90625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.66%) |Training time=0.42s (19.60%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.52 +epoch: 0|step: 6218|ppo_ep: 1|act_loss: -0.029815673828125|cri_loss: -0.01462554931640625|unsuper_loss: 0.0 +average reward score: 5.37109375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.43%) |Training time=0.42s (19.85%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.52 +[2023-04-14 12:35:51,756] [INFO] [logging.py:96:log_dist] [Rank 0] step=6220, skipped=78, lr=[1.517188458058827e-06, 1.517188458058827e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:35:51,774] [INFO] [timer.py:199:stop] epoch=0/micro_step=6220/global_step=6220, RunningAvgSamplesPerSec=105.5899408595616, CurrSamplesPerSec=112.21889660419636, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:35:51,867] [INFO] [logging.py:96:log_dist] [Rank 0] step=6220, skipped=104, lr=[8.04412206626915e-07, 8.04412206626915e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6219|ppo_ep: 1|act_loss: 0.0148468017578125|cri_loss: 0.007663726806640625|unsuper_loss: 0.0 +average reward score: 5.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.39%) |Training time=0.45s (20.92%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6220|ppo_ep: 1|act_loss: -0.00608062744140625|cri_loss: -0.0027713775634765625|unsuper_loss: 0.0 +average reward score: 5.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.70%) |Training time=0.44s (20.59%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6221|ppo_ep: 1|act_loss: -0.0115509033203125|cri_loss: -0.0054931640625|unsuper_loss: 0.0 +average reward score: 5.71875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.80%) |Training time=0.44s (20.50%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6222|ppo_ep: 1|act_loss: -0.00766754150390625|cri_loss: -0.003414154052734375|unsuper_loss: 0.0 +average reward score: 5.7421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.42%) |Training time=0.45s (20.86%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6223|ppo_ep: 1|act_loss: -0.0185546875|cri_loss: -0.00916290283203125|unsuper_loss: 0.0 +average reward score: 5.73046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.17%) |Training time=0.46s (21.15%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6224|ppo_ep: 1|act_loss: 0.0041961669921875|cri_loss: 0.0024700164794921875|unsuper_loss: 0.0 +average reward score: 5.71875 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.25%) |Training time=0.51s (23.20%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6225|ppo_ep: 1|act_loss: -0.0008344650268554688|cri_loss: 7.200241088867188e-05|unsuper_loss: 0.0 +average reward score: 5.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.59s (69.27%) |Training time=0.60s (26.34%) |Others=0.10 (4.39%)|CurSamplesPerSec=13.96 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6226|ppo_ep: 1|act_loss: 0.0102691650390625|cri_loss: 0.00655364990234375|unsuper_loss: 0.0 +average reward score: 5.6328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.62%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6227|ppo_ep: 1|act_loss: 0.0199127197265625|cri_loss: 0.0105438232421875|unsuper_loss: 0.0 +average reward score: 5.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.47s (21.90%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6228|ppo_ep: 1|act_loss: -0.008087158203125|cri_loss: -0.003635406494140625|unsuper_loss: 0.0 +average reward score: 5.31640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.69%) |Training time=0.47s (21.51%) |Others=0.10 (4.80%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53 +[2023-04-14 12:36:13,489] [INFO] [logging.py:96:log_dist] [Rank 0] step=6230, skipped=78, lr=[1.5036891722776364e-06, 1.5036891722776364e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:36:13,507] [INFO] [timer.py:199:stop] epoch=0/micro_step=6230/global_step=6230, RunningAvgSamplesPerSec=105.58477727357665, CurrSamplesPerSec=102.88509972863999, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:36:13,600] [INFO] [logging.py:96:log_dist] [Rank 0] step=6230, skipped=104, lr=[7.973519241820982e-07, 7.973519241820982e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6229|ppo_ep: 1|act_loss: -0.00170135498046875|cri_loss: -0.0007963180541992188|unsuper_loss: 0.0 +average reward score: 5.5 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.88%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6230|ppo_ep: 1|act_loss: 0.00777435302734375|cri_loss: 0.004039764404296875|unsuper_loss: 0.0 +average reward score: 5.296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.57%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6231|ppo_ep: 1|act_loss: -0.0211181640625|cri_loss: -0.0089569091796875|unsuper_loss: 0.0 +average reward score: 5.6328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.86%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6232|ppo_ep: 1|act_loss: -0.026275634765625|cri_loss: -0.01280975341796875|unsuper_loss: 0.0 +average reward score: 5.24609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.24%) |Training time=0.48s (22.12%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6233|ppo_ep: 1|act_loss: -0.01097869873046875|cri_loss: -0.005405426025390625|unsuper_loss: 0.0 +average reward score: 5.98828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.24%) |Training time=0.48s (22.13%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6234|ppo_ep: 1|act_loss: 0.0022563934326171875|cri_loss: 0.0018453598022460938|unsuper_loss: 0.0 +average reward score: 5.6875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.13%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6235|ppo_ep: 1|act_loss: -0.00632476806640625|cri_loss: -0.002765655517578125|unsuper_loss: 0.0 +average reward score: 4.84765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.40%) |Training time=0.47s (21.96%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6236|ppo_ep: 1|act_loss: -0.025146484375|cri_loss: -0.0122833251953125|unsuper_loss: 0.0 +average reward score: 5.30078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (21.98%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6237|ppo_ep: 1|act_loss: -0.003505706787109375|cri_loss: -0.0015869140625|unsuper_loss: 0.0 +average reward score: 4.921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.11%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6238|ppo_ep: 1|act_loss: -0.01464080810546875|cri_loss: -0.00714874267578125|unsuper_loss: 0.0 +average reward score: 5.1171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.57%) |Training time=0.47s (21.78%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53 +[2023-04-14 12:36:35,111] [INFO] [logging.py:96:log_dist] [Rank 0] step=6240, skipped=78, lr=[1.4902391163351402e-06, 1.4902391163351402e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:36:35,130] [INFO] [timer.py:199:stop] epoch=0/micro_step=6240/global_step=6240, RunningAvgSamplesPerSec=105.58013996430111, CurrSamplesPerSec=104.96740574710576, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:36:35,222] [INFO] [logging.py:96:log_dist] [Rank 0] step=6240, skipped=104, lr=[7.903168790891797e-07, 7.903168790891797e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6239|ppo_ep: 1|act_loss: 0.0259552001953125|cri_loss: 0.0141448974609375|unsuper_loss: 0.0 +average reward score: 4.9921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.58%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6240|ppo_ep: 1|act_loss: 0.01824951171875|cri_loss: 0.0101776123046875|unsuper_loss: 0.0 +average reward score: 5.96875 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.60s (70.23%) |Training time=0.58s (25.37%) |Others=0.10 (4.40%)|CurSamplesPerSec=14.05 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6241|ppo_ep: 1|act_loss: 0.02484130859375|cri_loss: 0.012664794921875|unsuper_loss: 0.0 +average reward score: 5.375 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.25%) |Training time=0.48s (21.74%) |Others=0.11 (5.01%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6242|ppo_ep: 1|act_loss: 0.01416015625|cri_loss: 0.007274627685546875|unsuper_loss: 0.0 +average reward score: 5.67578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.17%) |Training time=0.46s (21.18%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6243|ppo_ep: 1|act_loss: 0.028594970703125|cri_loss: 0.01468658447265625|unsuper_loss: 0.0 +average reward score: 5.66015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.65%) |Training time=0.44s (20.67%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6244|ppo_ep: 1|act_loss: 0.004192352294921875|cri_loss: 0.002834320068359375|unsuper_loss: 0.0 +average reward score: 5.1171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.38%) |Training time=0.45s (20.84%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6245|ppo_ep: 1|act_loss: -0.0104217529296875|cri_loss: -0.004413604736328125|unsuper_loss: 0.0 +average reward score: 5.36328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.79%) |Training time=0.44s (20.56%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6246|ppo_ep: 1|act_loss: -0.004180908203125|cri_loss: -0.0017414093017578125|unsuper_loss: 0.0 +average reward score: 5.09375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.40%) |Training time=0.45s (20.97%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6247|ppo_ep: 1|act_loss: -0.00423431396484375|cri_loss: -0.001888275146484375|unsuper_loss: 0.0 +average reward score: 5.04296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.69%) |Training time=0.45s (20.67%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6248|ppo_ep: 1|act_loss: -0.008544921875|cri_loss: -0.004192352294921875|unsuper_loss: 0.0 +average reward score: 5.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.72%) |Training time=0.45s (20.64%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53 +[2023-04-14 12:36:56,876] [INFO] [logging.py:96:log_dist] [Rank 0] step=6250, skipped=78, lr=[1.4768384895936108e-06, 1.4768384895936108e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:36:56,894] [INFO] [timer.py:199:stop] epoch=0/micro_step=6250/global_step=6250, RunningAvgSamplesPerSec=105.58190779052778, CurrSamplesPerSec=111.68467475926893, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:36:56,986] [INFO] [logging.py:96:log_dist] [Rank 0] step=6250, skipped=104, lr=[7.833071756244998e-07, 7.833071756244998e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6249|ppo_ep: 1|act_loss: 0.006694793701171875|cri_loss: 0.0036468505859375|unsuper_loss: 0.0 +average reward score: 5.09375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.61%) |Training time=0.45s (20.78%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6250|ppo_ep: 1|act_loss: -0.013153076171875|cri_loss: -0.006481170654296875|unsuper_loss: 0.0 +average reward score: 5.21484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.69%) |Training time=0.45s (20.66%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6251|ppo_ep: 1|act_loss: -0.00647735595703125|cri_loss: -0.0031337738037109375|unsuper_loss: 0.0 +average reward score: 5.203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.30%) |Training time=0.46s (21.08%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6252|ppo_ep: 1|act_loss: 0.02947998046875|cri_loss: 0.01508331298828125|unsuper_loss: 0.0 +average reward score: 5.20703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.86%) |Training time=0.45s (20.54%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6253|ppo_ep: 1|act_loss: 0.004730224609375|cri_loss: 0.002590179443359375|unsuper_loss: 0.0 +average reward score: 5.40625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.08%) |Training time=0.44s (20.25%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6254|ppo_ep: 1|act_loss: 0.01178741455078125|cri_loss: 0.006591796875|unsuper_loss: 0.0 +average reward score: 5.28515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.62%) |Training time=0.45s (20.75%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6255|ppo_ep: 1|act_loss: 0.02813720703125|cri_loss: 0.01495361328125|unsuper_loss: 0.0 +average reward score: 5.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.92%) |Training time=0.44s (20.41%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6256|ppo_ep: 1|act_loss: 0.0218048095703125|cri_loss: 0.01131439208984375|unsuper_loss: 0.0 +average reward score: 6.625 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.11%) |Training time=0.46s (19.58%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.75 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6257|ppo_ep: 1|act_loss: 0.02520751953125|cri_loss: 0.01290130615234375|unsuper_loss: 0.0 +average reward score: 5.40234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.64%) |Training time=0.45s (20.71%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6258|ppo_ep: 1|act_loss: 0.001255035400390625|cri_loss: 0.000720977783203125|unsuper_loss: 0.0 +average reward score: 5.44140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.91%) |Training time=0.44s (20.41%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.53 +[2023-04-14 12:37:18,637] [INFO] [logging.py:96:log_dist] [Rank 0] step=6260, skipped=78, lr=[1.4634874906826658e-06, 1.4634874906826658e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:37:18,654] [INFO] [timer.py:199:stop] epoch=0/micro_step=6260/global_step=6260, RunningAvgSamplesPerSec=105.59232439178967, CurrSamplesPerSec=109.13119463360097, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:37:18,747] [INFO] [logging.py:96:log_dist] [Rank 0] step=6260, skipped=104, lr=[7.763229176887724e-07, 7.763229176887724e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6259|ppo_ep: 1|act_loss: 0.00228118896484375|cri_loss: 0.0017452239990234375|unsuper_loss: 0.0 +average reward score: 5.4453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.32%) |Training time=0.46s (21.07%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6260|ppo_ep: 1|act_loss: 0.00464630126953125|cri_loss: 0.0027523040771484375|unsuper_loss: 0.0 +average reward score: 5.72265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.58%) |Training time=0.45s (20.78%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6261|ppo_ep: 1|act_loss: -0.013946533203125|cri_loss: -0.0068206787109375|unsuper_loss: 0.0 +average reward score: 5.02734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.50%) |Training time=0.45s (20.86%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6262|ppo_ep: 1|act_loss: 0.0018863677978515625|cri_loss: 0.001041412353515625|unsuper_loss: 0.0 +average reward score: 5.125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.55%) |Training time=0.45s (20.79%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6263|ppo_ep: 1|act_loss: -0.0026092529296875|cri_loss: -0.000988006591796875|unsuper_loss: 0.0 +average reward score: 5.7265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.44%) |Training time=0.45s (20.92%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6264|ppo_ep: 1|act_loss: 0.00653076171875|cri_loss: 0.003620147705078125|unsuper_loss: 0.0 +average reward score: 5.6796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.71%) |Training time=0.44s (20.62%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6265|ppo_ep: 1|act_loss: -0.00452423095703125|cri_loss: -0.0022068023681640625|unsuper_loss: 0.0 +average reward score: 6.4296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.61%) |Training time=0.45s (20.75%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6266|ppo_ep: 1|act_loss: -0.0116119384765625|cri_loss: -0.005718231201171875|unsuper_loss: 0.0 +average reward score: 5.76953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.49%) |Training time=0.45s (20.87%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6267|ppo_ep: 1|act_loss: -0.0115966796875|cri_loss: -0.005588531494140625|unsuper_loss: 0.0 +average reward score: 5.59375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.69%) |Training time=0.44s (20.67%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6268|ppo_ep: 1|act_loss: -0.00019741058349609375|cri_loss: 0.00042819976806640625|unsuper_loss: 0.0 +average reward score: 6.16015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.41%) |Training time=0.45s (20.83%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +[2023-04-14 12:37:40,204] [INFO] [logging.py:96:log_dist] [Rank 0] step=6270, skipped=78, lr=[1.4501863174963161e-06, 1.4501863174963161e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:37:40,222] [INFO] [timer.py:199:stop] epoch=0/micro_step=6270/global_step=6270, RunningAvgSamplesPerSec=105.6018430806941, CurrSamplesPerSec=111.38955300678872, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:37:40,326] [INFO] [logging.py:96:log_dist] [Rank 0] step=6270, skipped=104, lr=[7.693642088055492e-07, 7.693642088055492e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6269|ppo_ep: 1|act_loss: 0.0145263671875|cri_loss: 0.007442474365234375|unsuper_loss: 0.0 +average reward score: 5.6796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.27%) |Training time=0.45s (20.72%) |Others=0.11 (5.01%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6270|ppo_ep: 1|act_loss: -0.026397705078125|cri_loss: -0.01293182373046875|unsuper_loss: 0.0 +average reward score: 5.27734375 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.56%) |Training time=0.48s (21.89%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6271|ppo_ep: 1|act_loss: 0.002685546875|cri_loss: 0.0016155242919921875|unsuper_loss: 0.0 +average reward score: 5.8125 +------------------------------------------------------------------------------------- +|E2E latency=2.45s |Gather latency=0.00s (0.00%) |Generate time=1.87s (76.54%) |Training time=0.47s (19.35%) |Others=0.10 (4.11%)|CurSamplesPerSec=13.08 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6272|ppo_ep: 1|act_loss: -0.01654052734375|cri_loss: -0.0077056884765625|unsuper_loss: 0.0 +average reward score: 5.28125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.10%) |Training time=0.46s (21.28%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6273|ppo_ep: 1|act_loss: -0.0284576416015625|cri_loss: -0.01349639892578125|unsuper_loss: 0.0 +average reward score: 5.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.22%) |Training time=0.46s (21.17%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6274|ppo_ep: 1|act_loss: -0.025360107421875|cri_loss: -0.01253509521484375|unsuper_loss: 0.0 +average reward score: 6.06640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.41%) |Training time=0.45s (20.97%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6275|ppo_ep: 1|act_loss: 0.0177001953125|cri_loss: 0.009307861328125|unsuper_loss: 0.0 +average reward score: 5.69921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.19%) |Training time=0.46s (21.19%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6276|ppo_ep: 1|act_loss: 0.0011444091796875|cri_loss: 0.0008845329284667969|unsuper_loss: 0.0 +average reward score: 5.859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.13%) |Training time=0.46s (21.26%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6277|ppo_ep: 1|act_loss: 0.042236328125|cri_loss: 0.021392822265625|unsuper_loss: 0.0 +average reward score: 5.07421875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.88%) |Training time=0.47s (21.51%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6278|ppo_ep: 1|act_loss: -0.0009446144104003906|cri_loss: -0.00019884109497070312|unsuper_loss: 0.0 +average reward score: 5.7421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.17%) |Training time=0.46s (21.22%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53 +[2023-04-14 12:38:02,230] [INFO] [logging.py:96:log_dist] [Rank 0] step=6280, skipped=78, lr=[1.4369351671900373e-06, 1.4369351671900373e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:38:02,249] [INFO] [timer.py:199:stop] epoch=0/micro_step=6280/global_step=6280, RunningAvgSamplesPerSec=105.6027195287297, CurrSamplesPerSec=109.6824271427334, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:38:02,342] [INFO] [logging.py:96:log_dist] [Rank 0] step=6280, skipped=104, lr=[7.624311521196831e-07, 7.624311521196831e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6279|ppo_ep: 1|act_loss: 0.0102691650390625|cri_loss: 0.00617218017578125|unsuper_loss: 0.0 +average reward score: 5.51171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.30%) |Training time=0.45s (21.06%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6280|ppo_ep: 1|act_loss: 0.00545501708984375|cri_loss: 0.003635406494140625|unsuper_loss: 0.0 +average reward score: 6.10546875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.88%) |Training time=0.45s (20.48%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6281|ppo_ep: 1|act_loss: -0.0016050338745117188|cri_loss: -0.00028705596923828125|unsuper_loss: 0.0 +average reward score: 6.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.49%) |Training time=0.45s (20.88%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6282|ppo_ep: 1|act_loss: 0.011962890625|cri_loss: 0.006378173828125|unsuper_loss: 0.0 +average reward score: 5.6328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.45%) |Training time=0.45s (20.91%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6283|ppo_ep: 1|act_loss: -0.0033092498779296875|cri_loss: -0.0013580322265625|unsuper_loss: 0.0 +average reward score: 5.05078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.20%) |Training time=0.46s (21.16%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6284|ppo_ep: 1|act_loss: 0.0046234130859375|cri_loss: 0.002899169921875|unsuper_loss: 0.0 +average reward score: 5.61328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.01%) |Training time=0.46s (21.26%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6285|ppo_ep: 1|act_loss: -0.03997802734375|cri_loss: -0.0195159912109375|unsuper_loss: 0.0 +average reward score: 4.953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.50%) |Training time=0.45s (20.85%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6286|ppo_ep: 1|act_loss: 0.00927734375|cri_loss: 0.004932403564453125|unsuper_loss: 0.0 +average reward score: 5.30859375 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.91%) |Training time=0.47s (20.44%) |Others=0.11 (4.64%)|CurSamplesPerSec=13.78 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6287|ppo_ep: 1|act_loss: -0.0032196044921875|cri_loss: -0.0010728836059570312|unsuper_loss: 0.0 +average reward score: 5.1796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.68%) |Training time=0.45s (20.68%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6288|ppo_ep: 1|act_loss: -0.00197601318359375|cri_loss: -0.0006690025329589844|unsuper_loss: 0.0 +average reward score: 5.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.30%) |Training time=0.46s (21.07%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53 +[2023-04-14 12:38:24,029] [INFO] [logging.py:96:log_dist] [Rank 0] step=6290, skipped=78, lr=[1.4237342361778406e-06, 1.4237342361778406e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:38:24,048] [INFO] [timer.py:199:stop] epoch=0/micro_step=6290/global_step=6290, RunningAvgSamplesPerSec=105.6091977124146, CurrSamplesPerSec=111.14943981290935, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:38:24,140] [INFO] [logging.py:96:log_dist] [Rank 0] step=6290, skipped=104, lr=[7.555238503958001e-07, 7.555238503958001e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6289|ppo_ep: 1|act_loss: 0.033782958984375|cri_loss: 0.01788330078125|unsuper_loss: 0.0 +average reward score: 6.03125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.53%) |Training time=0.45s (20.86%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6290|ppo_ep: 1|act_loss: -0.0182647705078125|cri_loss: -0.00897979736328125|unsuper_loss: 0.0 +average reward score: 6.25 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.26%) |Training time=0.46s (21.12%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53 +[2023-04-14 12:38:28,458] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 6291|ppo_ep: 1|act_loss: 0.013763427734375|cri_loss: 0.007251739501953125|unsuper_loss: 0.0 +average reward score: 6.2109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.90%) |Training time=0.45s (20.95%) |Others=0.09 (4.14%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.53 +[2023-04-14 12:38:30,610] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 6292|ppo_ep: 1|act_loss: -0.0005426406860351562|cri_loss: -1.621246337890625e-05|unsuper_loss: 0.0 +average reward score: 5.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.67%) |Training time=0.46s (21.17%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6293|ppo_ep: 1|act_loss: 0.0010776519775390625|cri_loss: 0.0010042190551757812|unsuper_loss: 0.0 +average reward score: 5.796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.28%) |Training time=0.46s (21.10%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6294|ppo_ep: 1|act_loss: 0.04022216796875|cri_loss: 0.02130126953125|unsuper_loss: 0.0 +average reward score: 4.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.41%) |Training time=0.45s (20.97%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6295|ppo_ep: 1|act_loss: -0.014068603515625|cri_loss: -0.006320953369140625|unsuper_loss: 0.0 +average reward score: 5.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.44%) |Training time=0.45s (20.93%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6296|ppo_ep: 1|act_loss: 0.002857208251953125|cri_loss: 0.001953125|unsuper_loss: 0.0 +average reward score: 5.296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.60%) |Training time=0.45s (20.76%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6297|ppo_ep: 1|act_loss: -0.0020904541015625|cri_loss: -0.0003910064697265625|unsuper_loss: 0.0 +average reward score: 5.08984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.35%) |Training time=0.46s (21.03%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6298|ppo_ep: 1|act_loss: -0.003864288330078125|cri_loss: 0.001556396484375|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.35%) |Training time=0.45s (20.94%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53 +[2023-04-14 12:38:45,675] [INFO] [logging.py:96:log_dist] [Rank 0] step=6300, skipped=78, lr=[1.4105837201293704e-06, 1.4105837201293704e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:38:45,694] [INFO] [timer.py:199:stop] epoch=0/micro_step=6300/global_step=6300, RunningAvgSamplesPerSec=105.61516707150545, CurrSamplesPerSec=107.56875275999228, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:38:45,786] [INFO] [logging.py:96:log_dist] [Rank 0] step=6300, skipped=106, lr=[7.500166214034776e-07, 7.500166214034776e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6299|ppo_ep: 1|act_loss: 0.009979248046875|cri_loss: 0.00518798828125|unsuper_loss: 0.0 +average reward score: 5.9375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.32%) |Training time=0.46s (21.09%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6300|ppo_ep: 1|act_loss: 0.019439697265625|cri_loss: 0.010284423828125|unsuper_loss: 0.0 +average reward score: 4.953125 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.65s (73.93%) |Training time=0.45s (20.32%) |Others=0.13 (5.75%)|CurSamplesPerSec=14.35 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6301|ppo_ep: 1|act_loss: -0.00232696533203125|cri_loss: -0.00095367431640625|unsuper_loss: 0.0 +average reward score: 5.69921875 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.66s (73.72%) |Training time=0.49s (21.79%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6302|ppo_ep: 1|act_loss: 0.0011081695556640625|cri_loss: 0.0012903213500976562|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.54%) |Training time=0.45s (20.82%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6303|ppo_ep: 1|act_loss: -0.022674560546875|cri_loss: -0.0111541748046875|unsuper_loss: 0.0 +average reward score: 5.45703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.61%) |Training time=0.45s (20.74%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6304|ppo_ep: 1|act_loss: -0.00984954833984375|cri_loss: -0.004364013671875|unsuper_loss: 0.0 +average reward score: 6.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.52%) |Training time=0.45s (20.85%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6305|ppo_ep: 1|act_loss: -0.0477294921875|cri_loss: -0.02325439453125|unsuper_loss: 0.0 +average reward score: 5.34765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.43%) |Training time=0.45s (20.94%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6306|ppo_ep: 1|act_loss: -0.0142974853515625|cri_loss: -0.007049560546875|unsuper_loss: 0.0 +average reward score: 5.67578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.67%) |Training time=0.45s (20.69%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6307|ppo_ep: 1|act_loss: 0.00475311279296875|cri_loss: 0.002620697021484375|unsuper_loss: 0.0 +average reward score: 5.78515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.35%) |Training time=0.46s (21.01%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6308|ppo_ep: 1|act_loss: 0.0255126953125|cri_loss: 0.01300811767578125|unsuper_loss: 0.0 +average reward score: 5.3046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.33%) |Training time=0.45s (20.99%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53 +[2023-04-14 12:39:07,456] [INFO] [logging.py:96:log_dist] [Rank 0] step=6310, skipped=78, lr=[1.3974838139670003e-06, 1.3974838139670003e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:39:07,475] [INFO] [timer.py:199:stop] epoch=0/micro_step=6310/global_step=6310, RunningAvgSamplesPerSec=105.62331246396326, CurrSamplesPerSec=111.24064007134373, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:39:07,567] [INFO] [logging.py:96:log_dist] [Rank 0] step=6310, skipped=106, lr=[7.431559363585e-07, 7.431559363585e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6309|ppo_ep: 1|act_loss: 0.0016880035400390625|cri_loss: 0.0015850067138671875|unsuper_loss: 0.0 +average reward score: 5.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.52%) |Training time=0.45s (20.83%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6310|ppo_ep: 1|act_loss: 0.017578125|cri_loss: 0.0090484619140625|unsuper_loss: 0.0 +average reward score: 5.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.51%) |Training time=0.45s (20.83%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6311|ppo_ep: 1|act_loss: 0.087646484375|cri_loss: 0.044891357421875|unsuper_loss: 0.0 +average reward score: 5.66796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.30%) |Training time=0.45s (21.00%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6312|ppo_ep: 1|act_loss: 0.0124053955078125|cri_loss: 0.006786346435546875|unsuper_loss: 0.0 +average reward score: 5.42578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.37%) |Training time=0.45s (20.98%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +[2023-04-14 12:39:16,100] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 6313|ppo_ep: 1|act_loss: 0.0188751220703125|cri_loss: 0.00971221923828125|unsuper_loss: 0.0 +average reward score: 5.96875 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.45%) |Training time=0.42s (19.87%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.53 +[2023-04-14 12:39:18,241] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 6314|ppo_ep: 1|act_loss: 0.03717041015625|cri_loss: 0.019134521484375|unsuper_loss: 0.0 +average reward score: 6.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.14%) |Training time=0.43s (19.97%) |Others=0.10 (4.89%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6315|ppo_ep: 1|act_loss: 0.008331298828125|cri_loss: 0.004657745361328125|unsuper_loss: 0.0 +average reward score: 6.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.86%) |Training time=0.44s (20.43%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.53 +[2023-04-14 12:39:22,730] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384 +epoch: 0|step: 6316|ppo_ep: 1|act_loss: 0.00103759765625|cri_loss: 0.0007176399230957031|unsuper_loss: 0.0 +average reward score: 5.828125 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.84s (79.41%) |Training time=0.37s (16.09%) |Others=0.10 (4.50%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6317|ppo_ep: 1|act_loss: 0.035888671875|cri_loss: 0.01885986328125|unsuper_loss: 0.0 +average reward score: 5.78515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.86%) |Training time=0.40s (18.42%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6318|ppo_ep: 1|act_loss: 0.0038299560546875|cri_loss: 0.0024509429931640625|unsuper_loss: 0.0 +average reward score: 4.94140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.94%) |Training time=0.46s (21.41%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53 +[2023-04-14 12:39:29,229] [INFO] [logging.py:96:log_dist] [Rank 0] step=6320, skipped=81, lr=[1.3883440965450009e-06, 1.3883440965450009e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:39:29,247] [INFO] [timer.py:199:stop] epoch=0/micro_step=6320/global_step=6320, RunningAvgSamplesPerSec=105.64005873369146, CurrSamplesPerSec=108.1665674061908, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:39:29,341] [INFO] [logging.py:96:log_dist] [Rank 0] step=6320, skipped=106, lr=[7.363212919807011e-07, 7.363212919807011e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6319|ppo_ep: 1|act_loss: -0.0096893310546875|cri_loss: -0.004680633544921875|unsuper_loss: 0.0 +average reward score: 5.80078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.25%) |Training time=0.46s (21.10%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6320|ppo_ep: 1|act_loss: -0.06494140625|cri_loss: -0.031707763671875|unsuper_loss: 0.0 +average reward score: 5.1640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.31%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6321|ppo_ep: 1|act_loss: -0.02008056640625|cri_loss: -0.0095672607421875|unsuper_loss: 0.0 +average reward score: 4.859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.56%) |Training time=0.45s (20.77%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6322|ppo_ep: 1|act_loss: -0.02349853515625|cri_loss: -0.0114288330078125|unsuper_loss: 0.0 +average reward score: 5.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.68%) |Training time=0.44s (20.51%) |Others=0.10 (4.80%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6323|ppo_ep: 1|act_loss: -0.0170745849609375|cri_loss: -0.00827789306640625|unsuper_loss: 0.0 +average reward score: 5.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.03%) |Training time=0.46s (21.33%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6324|ppo_ep: 1|act_loss: -0.0194549560546875|cri_loss: -0.00948333740234375|unsuper_loss: 0.0 +average reward score: 5.41796875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.99%) |Training time=0.47s (21.37%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6325|ppo_ep: 1|act_loss: -0.0080718994140625|cri_loss: -0.00390625|unsuper_loss: 0.0 +average reward score: 5.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.29%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6326|ppo_ep: 1|act_loss: 0.024169921875|cri_loss: 0.0124053955078125|unsuper_loss: 0.0 +average reward score: 5.6640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.53%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6327|ppo_ep: 1|act_loss: 0.0140838623046875|cri_loss: 0.007419586181640625|unsuper_loss: 0.0 +average reward score: 5.3828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.36%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6328|ppo_ep: 1|act_loss: 0.037841796875|cri_loss: 0.0194091796875|unsuper_loss: 0.0 +average reward score: 5.9453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.82%) |Training time=0.47s (21.52%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53 +[2023-04-14 12:39:50,932] [INFO] [logging.py:96:log_dist] [Rank 0] step=6330, skipped=81, lr=[1.3753306724110857e-06, 1.3753306724110857e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:39:50,951] [INFO] [timer.py:199:stop] epoch=0/micro_step=6330/global_step=6330, RunningAvgSamplesPerSec=105.64268490460414, CurrSamplesPerSec=102.43962470224307, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:39:51,059] [INFO] [logging.py:96:log_dist] [Rank 0] step=6330, skipped=106, lr=[7.29512789575999e-07, 7.29512789575999e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6329|ppo_ep: 1|act_loss: 0.0162200927734375|cri_loss: 0.00836944580078125|unsuper_loss: 0.0 +average reward score: 5.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.32%) |Training time=0.48s (21.44%) |Others=0.12 (5.24%)|CurSamplesPerSec=14.43 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6330|ppo_ep: 1|act_loss: 0.0054931640625|cri_loss: 0.0031185150146484375|unsuper_loss: 0.0 +average reward score: 5.328125 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.42%) |Training time=0.45s (20.12%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.15 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6331|ppo_ep: 1|act_loss: -0.003711700439453125|cri_loss: -0.0015411376953125|unsuper_loss: 0.0 +average reward score: 5.39453125 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.31%) |Training time=0.45s (19.34%) |Others=0.10 (4.35%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6332|ppo_ep: 1|act_loss: 0.01558685302734375|cri_loss: 0.00820159912109375|unsuper_loss: 0.0 +average reward score: 5.1015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.46s (21.13%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6333|ppo_ep: 1|act_loss: 0.0157623291015625|cri_loss: 0.00823974609375|unsuper_loss: 0.0 +average reward score: 5.14453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.31%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6334|ppo_ep: 1|act_loss: -0.005840301513671875|cri_loss: -0.002635955810546875|unsuper_loss: 0.0 +average reward score: 5.44921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.77%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6335|ppo_ep: 1|act_loss: -0.0248260498046875|cri_loss: -0.01216888427734375|unsuper_loss: 0.0 +average reward score: 5.03125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.16%) |Training time=0.46s (21.21%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6336|ppo_ep: 1|act_loss: -0.03643798828125|cri_loss: -0.0167236328125|unsuper_loss: 0.0 +average reward score: 5.16015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.70%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6337|ppo_ep: 1|act_loss: -0.00493621826171875|cri_loss: -0.0005397796630859375|unsuper_loss: 0.0 +average reward score: 5.70703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.67%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +[2023-04-14 12:40:10,666] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192 +epoch: 0|step: 6338|ppo_ep: 1|act_loss: 0.005001068115234375|cri_loss: 0.0028247833251953125|unsuper_loss: 0.0 +average reward score: 5.953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.45%) |Training time=0.45s (20.89%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.53 +[2023-04-14 12:40:12,806] [INFO] [logging.py:96:log_dist] [Rank 0] step=6340, skipped=82, lr=[1.3636623034290526e-06, 1.3636623034290526e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:40:12,824] [INFO] [timer.py:199:stop] epoch=0/micro_step=6340/global_step=6340, RunningAvgSamplesPerSec=105.64557854690106, CurrSamplesPerSec=105.26971448381123, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:40:12,917] [INFO] [logging.py:96:log_dist] [Rank 0] step=6340, skipped=106, lr=[7.227305300628223e-07, 7.227305300628223e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6339|ppo_ep: 1|act_loss: -0.0091400146484375|cri_loss: -0.004283905029296875|unsuper_loss: 0.0 +average reward score: 5.25390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.63%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6340|ppo_ep: 1|act_loss: -0.0142822265625|cri_loss: -0.007076263427734375|unsuper_loss: 0.0 +average reward score: 5.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.87%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6341|ppo_ep: 1|act_loss: 0.003917694091796875|cri_loss: 0.00240325927734375|unsuper_loss: 0.0 +average reward score: 4.93359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.12%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6342|ppo_ep: 1|act_loss: -0.0396728515625|cri_loss: -0.0185089111328125|unsuper_loss: 0.0 +average reward score: 5.23828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.14%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6343|ppo_ep: 1|act_loss: -0.007843017578125|cri_loss: -0.003810882568359375|unsuper_loss: 0.0 +average reward score: 5.45703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.18%) |Training time=0.48s (22.19%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6344|ppo_ep: 1|act_loss: 0.00955963134765625|cri_loss: 0.005218505859375|unsuper_loss: 0.0 +average reward score: 5.94921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.80%) |Training time=0.47s (21.58%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6345|ppo_ep: 1|act_loss: 0.01165771484375|cri_loss: 0.006011962890625|unsuper_loss: 0.0 +average reward score: 5.3046875 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.23%) |Training time=0.48s (21.42%) |Others=0.16 (7.35%)|CurSamplesPerSec=14.34 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6346|ppo_ep: 1|act_loss: 0.035186767578125|cri_loss: 0.01885986328125|unsuper_loss: 0.0 +average reward score: 5.18359375 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.67s (74.30%) |Training time=0.48s (21.24%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.23 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6347|ppo_ep: 1|act_loss: -0.0005655288696289062|cri_loss: -0.00018525123596191406|unsuper_loss: 0.0 +average reward score: 4.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.74%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6348|ppo_ep: 1|act_loss: 0.0498046875|cri_loss: 0.0255126953125|unsuper_loss: 0.0 +average reward score: 5.49609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (21.98%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.53 +[2023-04-14 12:40:34,627] [INFO] [logging.py:96:log_dist] [Rank 0] step=6350, skipped=82, lr=[1.3507461951905125e-06, 1.3507461951905125e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:40:34,645] [INFO] [timer.py:199:stop] epoch=0/micro_step=6350/global_step=6350, RunningAvgSamplesPerSec=105.63979259857506, CurrSamplesPerSec=102.75269498806098, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:40:34,738] [INFO] [logging.py:96:log_dist] [Rank 0] step=6350, skipped=106, lr=[7.159746139706194e-07, 7.159746139706194e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6349|ppo_ep: 1|act_loss: -0.00492095947265625|cri_loss: -0.0022411346435546875|unsuper_loss: 0.0 +average reward score: 5.87890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.91%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6350|ppo_ep: 1|act_loss: -0.01776123046875|cri_loss: -0.0073699951171875|unsuper_loss: 0.0 +average reward score: 4.90234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.83%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6351|ppo_ep: 1|act_loss: 0.020477294921875|cri_loss: 0.01078033447265625|unsuper_loss: 0.0 +average reward score: 4.96484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.82%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6352|ppo_ep: 1|act_loss: 0.053955078125|cri_loss: 0.0275115966796875|unsuper_loss: 0.0 +average reward score: 4.65625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.76%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6353|ppo_ep: 1|act_loss: 0.04095458984375|cri_loss: 0.021240234375|unsuper_loss: 0.0 +average reward score: 5.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.47s (21.86%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6354|ppo_ep: 1|act_loss: -0.0173492431640625|cri_loss: -0.00838470458984375|unsuper_loss: 0.0 +average reward score: 5.28125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.47s (21.82%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6355|ppo_ep: 1|act_loss: -0.022491455078125|cri_loss: -0.01107025146484375|unsuper_loss: 0.0 +average reward score: 5.1953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.82%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6356|ppo_ep: 1|act_loss: 0.01422882080078125|cri_loss: 0.00811767578125|unsuper_loss: 0.0 +average reward score: 5.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.31%) |Training time=0.48s (22.08%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6357|ppo_ep: 1|act_loss: 0.045135498046875|cri_loss: 0.0232696533203125|unsuper_loss: 0.0 +average reward score: 5.5703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.47s (21.89%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6358|ppo_ep: 1|act_loss: -0.01306915283203125|cri_loss: -0.00640106201171875|unsuper_loss: 0.0 +average reward score: 5.328125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.42%) |Training time=0.48s (21.99%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.53 +[2023-04-14 12:40:56,326] [INFO] [logging.py:96:log_dist] [Rank 0] step=6360, skipped=82, lr=[1.3378815837745404e-06, 1.3378815837745404e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:40:56,344] [INFO] [timer.py:199:stop] epoch=0/micro_step=6360/global_step=6360, RunningAvgSamplesPerSec=105.63503569608532, CurrSamplesPerSec=101.74754325784137, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:40:56,437] [INFO] [logging.py:96:log_dist] [Rank 0] step=6360, skipped=106, lr=[7.092451414383644e-07, 7.092451414383644e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6359|ppo_ep: 1|act_loss: 0.031951904296875|cri_loss: 0.0164794921875|unsuper_loss: 0.0 +average reward score: 5.8359375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.80%) |Training time=0.48s (21.65%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6360|ppo_ep: 1|act_loss: 0.0017070770263671875|cri_loss: 0.00101470947265625|unsuper_loss: 0.0 +average reward score: 5.47265625 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.71s (73.05%) |Training time=0.53s (22.65%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6361|ppo_ep: 1|act_loss: 0.020721435546875|cri_loss: 0.01094818115234375|unsuper_loss: 0.0 +average reward score: 5.921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.47s (21.96%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6362|ppo_ep: 1|act_loss: 0.03717041015625|cri_loss: 0.0189056396484375|unsuper_loss: 0.0 +average reward score: 6.07421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.27%) |Training time=0.48s (22.10%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6363|ppo_ep: 1|act_loss: 0.008636474609375|cri_loss: 0.00461578369140625|unsuper_loss: 0.0 +average reward score: 5.41015625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.33%) |Training time=0.48s (22.10%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6364|ppo_ep: 1|act_loss: 0.0145111083984375|cri_loss: 0.00763702392578125|unsuper_loss: 0.0 +average reward score: 4.7109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.29%) |Training time=0.48s (22.09%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6365|ppo_ep: 1|act_loss: -0.01392364501953125|cri_loss: -0.006816864013671875|unsuper_loss: 0.0 +average reward score: 5.21484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (21.97%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6366|ppo_ep: 1|act_loss: -0.023895263671875|cri_loss: -0.0116729736328125|unsuper_loss: 0.0 +average reward score: 5.3515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (22.01%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6367|ppo_ep: 1|act_loss: 0.002269744873046875|cri_loss: 0.001384735107421875|unsuper_loss: 0.0 +average reward score: 5.75 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (22.01%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6368|ppo_ep: 1|act_loss: 0.04046630859375|cri_loss: 0.022247314453125|unsuper_loss: 0.0 +average reward score: 5.83203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.22%) |Training time=0.48s (22.17%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.53 +[2023-04-14 12:41:18,188] [INFO] [logging.py:96:log_dist] [Rank 0] step=6370, skipped=82, lr=[1.3250686598657134e-06, 1.3250686598657134e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:41:18,206] [INFO] [timer.py:199:stop] epoch=0/micro_step=6370/global_step=6370, RunningAvgSamplesPerSec=105.62690440321303, CurrSamplesPerSec=102.08720838251938, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:41:18,299] [INFO] [logging.py:96:log_dist] [Rank 0] step=6370, skipped=106, lr=[7.025422122130748e-07, 7.025422122130748e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6369|ppo_ep: 1|act_loss: -0.00763702392578125|cri_loss: -0.003749847412109375|unsuper_loss: 0.0 +average reward score: 5.56640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (21.99%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6370|ppo_ep: 1|act_loss: 0.00490570068359375|cri_loss: 0.003025054931640625|unsuper_loss: 0.0 +average reward score: 5.6796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.28%) |Training time=0.48s (22.10%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6371|ppo_ep: 1|act_loss: 0.0430908203125|cri_loss: 0.023895263671875|unsuper_loss: 0.0 +average reward score: 5.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.79%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6372|ppo_ep: 1|act_loss: 0.001590728759765625|cri_loss: 0.001018524169921875|unsuper_loss: 0.0 +average reward score: 5.15234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.34%) |Training time=0.48s (22.05%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6373|ppo_ep: 1|act_loss: -0.00396728515625|cri_loss: -0.0017547607421875|unsuper_loss: 0.0 +average reward score: 5.66015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.84%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6374|ppo_ep: 1|act_loss: 0.0047454833984375|cri_loss: 0.002521514892578125|unsuper_loss: 0.0 +average reward score: 5.64453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.99%) |Training time=0.46s (21.35%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6375|ppo_ep: 1|act_loss: -0.0148773193359375|cri_loss: -0.007122039794921875|unsuper_loss: 0.0 +average reward score: 4.546875 +------------------------------------------------------------------------------------- +|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.57%) |Training time=0.47s (20.13%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.63 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6376|ppo_ep: 1|act_loss: -0.0074920654296875|cri_loss: -0.0035533905029296875|unsuper_loss: 0.0 +average reward score: 5.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.47s (21.95%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6377|ppo_ep: 1|act_loss: -0.0041961669921875|cri_loss: -0.0018596649169921875|unsuper_loss: 0.0 +average reward score: 5.05078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.29%) |Training time=0.48s (22.09%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6378|ppo_ep: 1|act_loss: -0.0028533935546875|cri_loss: -0.001171112060546875|unsuper_loss: 0.0 +average reward score: 5.5703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.50%) |Training time=0.47s (21.85%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.53 +[2023-04-14 12:41:40,024] [INFO] [logging.py:96:log_dist] [Rank 0] step=6380, skipped=82, lr=[1.3123076133824706e-06, 1.3123076133824706e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:41:40,042] [INFO] [timer.py:199:stop] epoch=0/micro_step=6380/global_step=6380, RunningAvgSamplesPerSec=105.62234845103798, CurrSamplesPerSec=103.69900664528576, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:41:40,135] [INFO] [logging.py:96:log_dist] [Rank 0] step=6380, skipped=106, lr=[6.958659256483305e-07, 6.958659256483305e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6379|ppo_ep: 1|act_loss: -0.0093841552734375|cri_loss: -0.004528045654296875|unsuper_loss: 0.0 +average reward score: 5.49609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.82%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6380|ppo_ep: 1|act_loss: -0.0149078369140625|cri_loss: -0.00719451904296875|unsuper_loss: 0.0 +average reward score: 4.71484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.80%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6381|ppo_ep: 1|act_loss: -0.0292510986328125|cri_loss: -0.01401519775390625|unsuper_loss: 0.0 +average reward score: 5.12109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.52%) |Training time=0.47s (21.80%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6382|ppo_ep: 1|act_loss: -0.0028247833251953125|cri_loss: -0.0012359619140625|unsuper_loss: 0.0 +average reward score: 5.76953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.87%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6383|ppo_ep: 1|act_loss: -0.003734588623046875|cri_loss: -0.0015010833740234375|unsuper_loss: 0.0 +average reward score: 5.01953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.78%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6384|ppo_ep: 1|act_loss: -0.0234222412109375|cri_loss: -0.01132965087890625|unsuper_loss: 0.0 +average reward score: 5.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.13%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6385|ppo_ep: 1|act_loss: -0.020721435546875|cri_loss: -0.010101318359375|unsuper_loss: 0.0 +average reward score: 4.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.84%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6386|ppo_ep: 1|act_loss: 0.003948211669921875|cri_loss: 0.0021457672119140625|unsuper_loss: 0.0 +average reward score: 6.00390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.47s (21.99%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6387|ppo_ep: 1|act_loss: 0.0122222900390625|cri_loss: 0.00632476806640625|unsuper_loss: 0.0 +average reward score: 5.828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.48s (21.97%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6388|ppo_ep: 1|act_loss: 0.021087646484375|cri_loss: 0.01094818115234375|unsuper_loss: 0.0 +average reward score: 5.23828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.47s (21.84%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53 +[2023-04-14 12:42:01,739] [INFO] [logging.py:96:log_dist] [Rank 0] step=6390, skipped=82, lr=[1.299598633474306e-06, 1.299598633474306e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:42:01,758] [INFO] [timer.py:199:stop] epoch=0/micro_step=6390/global_step=6390, RunningAvgSamplesPerSec=105.61863925776754, CurrSamplesPerSec=104.40157717345564, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:42:01,851] [INFO] [logging.py:96:log_dist] [Rank 0] step=6390, skipped=106, lr=[6.892163807028043e-07, 6.892163807028043e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6389|ppo_ep: 1|act_loss: -0.0061798095703125|cri_loss: -0.00295257568359375|unsuper_loss: 0.0 +average reward score: 5.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.94%) |Training time=0.47s (20.66%) |Others=0.10 (4.40%)|CurSamplesPerSec=14.07 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6390|ppo_ep: 1|act_loss: 0.0626220703125|cri_loss: 0.03173828125|unsuper_loss: 0.0 +average reward score: 5.11328125 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.54%) |Training time=0.48s (20.19%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.57 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6391|ppo_ep: 1|act_loss: 0.016357421875|cri_loss: 0.0083770751953125|unsuper_loss: 0.0 +average reward score: 5.328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.39%) |Training time=0.47s (21.95%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6392|ppo_ep: 1|act_loss: 0.0127716064453125|cri_loss: 0.0069580078125|unsuper_loss: 0.0 +average reward score: 5.30078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.11%) |Training time=0.48s (22.27%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +[2023-04-14 12:42:10,680] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 6393|ppo_ep: 1|act_loss: 0.0075225830078125|cri_loss: 0.004016876220703125|unsuper_loss: 0.0 +average reward score: 5.94921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.60%) |Training time=0.48s (22.23%) |Others=0.09 (4.17%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.53 +[2023-04-14 12:42:12,838] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 6394|ppo_ep: 1|act_loss: -0.0056915283203125|cri_loss: -0.0023746490478515625|unsuper_loss: 0.0 +average reward score: 5.25390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.49s (22.54%) |Others=0.09 (4.14%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6395|ppo_ep: 1|act_loss: -0.0128631591796875|cri_loss: -0.005878448486328125|unsuper_loss: 0.0 +average reward score: 5.80859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.04%) |Training time=0.48s (22.35%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6396|ppo_ep: 1|act_loss: 0.03961181640625|cri_loss: 0.0205078125|unsuper_loss: 0.0 +average reward score: 5.4921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.26%) |Training time=0.48s (22.10%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6397|ppo_ep: 1|act_loss: -0.0031223297119140625|cri_loss: -0.0006008148193359375|unsuper_loss: 0.0 +average reward score: 5.31640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.86%) |Training time=0.49s (22.51%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6398|ppo_ep: 1|act_loss: -0.008331298828125|cri_loss: -0.0038623809814453125|unsuper_loss: 0.0 +average reward score: 5.1015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.29%) |Training time=0.48s (22.08%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.53 +[2023-04-14 12:42:23,529] [INFO] [logging.py:96:log_dist] [Rank 0] step=6400, skipped=82, lr=[1.286941908518962e-06, 1.286941908518962e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:42:23,547] [INFO] [timer.py:199:stop] epoch=0/micro_step=6400/global_step=6400, RunningAvgSamplesPerSec=105.61073946158585, CurrSamplesPerSec=101.53287430195913, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:42:23,640] [INFO] [logging.py:96:log_dist] [Rank 0] step=6400, skipped=108, lr=[6.839160649594401e-07, 6.839160649594401e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6399|ppo_ep: 1|act_loss: -0.0230865478515625|cri_loss: -0.01132965087890625|unsuper_loss: 0.0 +average reward score: 4.8046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.26%) |Training time=0.48s (22.13%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6400|ppo_ep: 1|act_loss: -0.00251007080078125|cri_loss: -0.0011920928955078125|unsuper_loss: 0.0 +average reward score: 5.83984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.25%) |Training time=0.48s (22.10%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6401|ppo_ep: 1|act_loss: 0.01190185546875|cri_loss: 0.006153106689453125|unsuper_loss: 0.0 +average reward score: 4.70703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.88%) |Training time=0.49s (22.51%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6402|ppo_ep: 1|act_loss: -0.0122222900390625|cri_loss: -0.00583648681640625|unsuper_loss: 0.0 +average reward score: 5.12109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.25%) |Training time=0.48s (22.10%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6403|ppo_ep: 1|act_loss: -0.01561737060546875|cri_loss: -0.00749969482421875|unsuper_loss: 0.0 +average reward score: 5.87890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.47s (21.96%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6404|ppo_ep: 1|act_loss: 0.018951416015625|cri_loss: 0.009796142578125|unsuper_loss: 0.0 +average reward score: 5.26171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.94%) |Training time=0.49s (22.42%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6405|ppo_ep: 1|act_loss: -0.0061798095703125|cri_loss: -0.0029392242431640625|unsuper_loss: 0.0 +average reward score: 5.84765625 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.40%) |Training time=0.53s (23.90%) |Others=0.11 (4.70%)|CurSamplesPerSec=14.33 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6406|ppo_ep: 1|act_loss: 0.00415802001953125|cri_loss: 0.0023365020751953125|unsuper_loss: 0.0 +average reward score: 4.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.13%) |Training time=0.48s (22.25%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6407|ppo_ep: 1|act_loss: -0.00318145751953125|cri_loss: -0.0015420913696289062|unsuper_loss: 0.0 +average reward score: 6.234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.35%) |Training time=0.48s (22.04%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6408|ppo_ep: 1|act_loss: 0.0098114013671875|cri_loss: 0.005237579345703125|unsuper_loss: 0.0 +average reward score: 5.45703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.14%) |Training time=0.48s (22.22%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +[2023-04-14 12:42:45,229] [INFO] [logging.py:96:log_dist] [Rank 0] step=6410, skipped=82, lr=[1.2743376261196346e-06, 1.2743376261196346e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:42:45,248] [INFO] [timer.py:199:stop] epoch=0/micro_step=6410/global_step=6410, RunningAvgSamplesPerSec=105.60036418493905, CurrSamplesPerSec=102.12643591575606, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:42:45,340] [INFO] [logging.py:96:log_dist] [Rank 0] step=6410, skipped=108, lr=[6.77314903038199e-07, 6.77314903038199e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6409|ppo_ep: 1|act_loss: -0.0075836181640625|cri_loss: -0.0036602020263671875|unsuper_loss: 0.0 +average reward score: 5.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.02%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6410|ppo_ep: 1|act_loss: 0.05438232421875|cri_loss: 0.031829833984375|unsuper_loss: 0.0 +average reward score: 5.4453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.30%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6411|ppo_ep: 1|act_loss: -0.015777587890625|cri_loss: -0.007659912109375|unsuper_loss: 0.0 +average reward score: 5.47265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.98%) |Training time=0.49s (22.39%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6412|ppo_ep: 1|act_loss: 0.059295654296875|cri_loss: 0.0310211181640625|unsuper_loss: 0.0 +average reward score: 5.64453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.47s (21.93%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6413|ppo_ep: 1|act_loss: -0.0099945068359375|cri_loss: -0.004230499267578125|unsuper_loss: 0.0 +average reward score: 5.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.80%) |Training time=0.49s (22.61%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6414|ppo_ep: 1|act_loss: -0.0134124755859375|cri_loss: -0.006267547607421875|unsuper_loss: 0.0 +average reward score: 5.87109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.20%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6415|ppo_ep: 1|act_loss: 0.029022216796875|cri_loss: 0.01480865478515625|unsuper_loss: 0.0 +average reward score: 5.43359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.17%) |Training time=0.48s (22.19%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6416|ppo_ep: 1|act_loss: -0.0063018798828125|cri_loss: -0.003021240234375|unsuper_loss: 0.0 +average reward score: 5.53515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.24%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6417|ppo_ep: 1|act_loss: 0.0084686279296875|cri_loss: 0.004512786865234375|unsuper_loss: 0.0 +average reward score: 5.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.23%) |Training time=0.48s (22.15%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6418|ppo_ep: 1|act_loss: 0.00885009765625|cri_loss: 0.005138397216796875|unsuper_loss: 0.0 +average reward score: 4.6484375 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.63%) |Training time=0.48s (21.50%) |Others=0.11 (4.88%)|CurSamplesPerSec=14.38 |AvgSamplesPerSec=14.53 +[2023-04-14 12:43:07,049] [INFO] [logging.py:96:log_dist] [Rank 0] step=6420, skipped=82, lr=[1.2617859731021937e-06, 1.2617859731021937e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:43:07,067] [INFO] [timer.py:199:stop] epoch=0/micro_step=6420/global_step=6420, RunningAvgSamplesPerSec=105.59150460729504, CurrSamplesPerSec=99.78976137635176, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:43:07,160] [INFO] [logging.py:96:log_dist] [Rank 0] step=6420, skipped=108, lr=[6.707407577070841e-07, 6.707407577070841e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6419|ppo_ep: 1|act_loss: 0.057830810546875|cri_loss: 0.030792236328125|unsuper_loss: 0.0 +average reward score: 5.1328125 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.31%) |Training time=0.48s (21.31%) |Others=0.10 (4.38%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6420|ppo_ep: 1|act_loss: 0.03582763671875|cri_loss: 0.019805908203125|unsuper_loss: 0.0 +average reward score: 4.6484375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.81%) |Training time=0.49s (22.59%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6421|ppo_ep: 1|act_loss: -0.001499176025390625|cri_loss: -0.0002970695495605469|unsuper_loss: 0.0 +average reward score: 4.9140625 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.47%) |Training time=0.47s (20.25%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6422|ppo_ep: 1|act_loss: 0.024444580078125|cri_loss: 0.013214111328125|unsuper_loss: 0.0 +average reward score: 5.203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.47s (21.93%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6423|ppo_ep: 1|act_loss: 0.00295257568359375|cri_loss: 0.0017480850219726562|unsuper_loss: 0.0 +average reward score: 5.3671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.79%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6424|ppo_ep: 1|act_loss: -0.0242156982421875|cri_loss: -0.0118255615234375|unsuper_loss: 0.0 +average reward score: 5.88671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.10%) |Training time=0.48s (22.27%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6425|ppo_ep: 1|act_loss: 0.0140228271484375|cri_loss: 0.007411956787109375|unsuper_loss: 0.0 +average reward score: 4.94140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.03%) |Training time=0.48s (22.22%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6426|ppo_ep: 1|act_loss: -0.0147705078125|cri_loss: -0.007007598876953125|unsuper_loss: 0.0 +average reward score: 4.24609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.47s (21.92%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6427|ppo_ep: 1|act_loss: -0.00894927978515625|cri_loss: -0.0044097900390625|unsuper_loss: 0.0 +average reward score: 5.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.22%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6428|ppo_ep: 1|act_loss: -0.016326904296875|cri_loss: -0.0078277587890625|unsuper_loss: 0.0 +average reward score: 5.37890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.04%) |Training time=0.49s (22.35%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53 +[2023-04-14 12:43:28,885] [INFO] [logging.py:96:log_dist] [Rank 0] step=6430, skipped=82, lr=[1.2492871355124154e-06, 1.2492871355124154e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:43:28,903] [INFO] [timer.py:199:stop] epoch=0/micro_step=6430/global_step=6430, RunningAvgSamplesPerSec=105.58394898947627, CurrSamplesPerSec=101.49256638680498, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:43:28,996] [INFO] [logging.py:96:log_dist] [Rank 0] step=6430, skipped=108, lr=[6.641937264107868e-07, 6.641937264107868e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6429|ppo_ep: 1|act_loss: -0.0140533447265625|cri_loss: -0.00669097900390625|unsuper_loss: 0.0 +average reward score: 4.6875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.31%) |Training time=0.48s (22.09%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6430|ppo_ep: 1|act_loss: -0.015594482421875|cri_loss: -0.007366180419921875|unsuper_loss: 0.0 +average reward score: 5.703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.26%) |Training time=0.48s (22.10%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6431|ppo_ep: 1|act_loss: 0.004291534423828125|cri_loss: 0.0024585723876953125|unsuper_loss: 0.0 +average reward score: 5.0 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.47s (21.88%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6432|ppo_ep: 1|act_loss: 0.0127105712890625|cri_loss: 0.007175445556640625|unsuper_loss: 0.0 +average reward score: 5.15625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.30%) |Training time=0.48s (22.09%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6433|ppo_ep: 1|act_loss: -0.01412200927734375|cri_loss: -0.00665283203125|unsuper_loss: 0.0 +average reward score: 5.9140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.46%) |Training time=0.47s (21.93%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6434|ppo_ep: 1|act_loss: 0.0120086669921875|cri_loss: 0.00629425048828125|unsuper_loss: 0.0 +average reward score: 4.984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.82%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6435|ppo_ep: 1|act_loss: -0.0128173828125|cri_loss: -0.005954742431640625|unsuper_loss: 0.0 +average reward score: 4.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.83%) |Training time=0.47s (21.55%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6436|ppo_ep: 1|act_loss: -0.0032863616943359375|cri_loss: -0.0015459060668945312|unsuper_loss: 0.0 +average reward score: 5.62890625 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.60s (70.26%) |Training time=0.58s (25.32%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.06 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6437|ppo_ep: 1|act_loss: 0.00627899169921875|cri_loss: 0.0032482147216796875|unsuper_loss: 0.0 +average reward score: 4.609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.39%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6438|ppo_ep: 1|act_loss: 0.007965087890625|cri_loss: 0.005157470703125|unsuper_loss: 0.0 +average reward score: 5.69921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.35%) |Training time=0.48s (22.04%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53 +[2023-04-14 12:43:50,625] [INFO] [logging.py:96:log_dist] [Rank 0] step=6440, skipped=82, lr=[1.2368412986132292e-06, 1.2368412986132292e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:43:50,643] [INFO] [timer.py:199:stop] epoch=0/micro_step=6440/global_step=6440, RunningAvgSamplesPerSec=105.5777836336627, CurrSamplesPerSec=102.76473200476545, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:43:50,736] [INFO] [logging.py:96:log_dist] [Rank 0] step=6440, skipped=108, lr=[6.576739061921056e-07, 6.576739061921056e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6439|ppo_ep: 1|act_loss: 0.02630615234375|cri_loss: 0.01354217529296875|unsuper_loss: 0.0 +average reward score: 5.30859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.44%) |Training time=0.47s (21.96%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6440|ppo_ep: 1|act_loss: -0.0191192626953125|cri_loss: -0.0086517333984375|unsuper_loss: 0.0 +average reward score: 5.04296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.75%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6441|ppo_ep: 1|act_loss: 0.00342559814453125|cri_loss: 0.0019407272338867188|unsuper_loss: 0.0 +average reward score: 5.44140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.55%) |Training time=0.47s (21.84%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6442|ppo_ep: 1|act_loss: -0.000881195068359375|cri_loss: -0.0002651214599609375|unsuper_loss: 0.0 +average reward score: 5.34375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.30%) |Training time=0.48s (22.06%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6443|ppo_ep: 1|act_loss: -0.0157928466796875|cri_loss: -0.007617950439453125|unsuper_loss: 0.0 +average reward score: 4.7421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.81%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6444|ppo_ep: 1|act_loss: -0.0029392242431640625|cri_loss: -0.0014276504516601562|unsuper_loss: 0.0 +average reward score: 5.765625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.47%) |Training time=0.47s (21.90%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6445|ppo_ep: 1|act_loss: -0.0023975372314453125|cri_loss: -0.0009541511535644531|unsuper_loss: 0.0 +average reward score: 4.9609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.37%) |Training time=0.48s (22.01%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6446|ppo_ep: 1|act_loss: 0.03143310546875|cri_loss: 0.01885986328125|unsuper_loss: 0.0 +average reward score: 4.34375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.60%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6447|ppo_ep: 1|act_loss: -0.0092315673828125|cri_loss: -0.0044708251953125|unsuper_loss: 0.0 +average reward score: 5.15625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.68%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6448|ppo_ep: 1|act_loss: -0.002841949462890625|cri_loss: 0.0002040863037109375|unsuper_loss: 0.0 +average reward score: 4.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.78%) |Training time=0.47s (20.81%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.08 |AvgSamplesPerSec=14.53 +[2023-04-14 12:44:12,338] [INFO] [logging.py:96:log_dist] [Rank 0] step=6450, skipped=82, lr=[1.224448646881958e-06, 1.224448646881958e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:44:12,356] [INFO] [timer.py:199:stop] epoch=0/micro_step=6450/global_step=6450, RunningAvgSamplesPerSec=105.5742490838407, CurrSamplesPerSec=102.64236918076665, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:44:12,448] [INFO] [logging.py:96:log_dist] [Rank 0] step=6450, skipped=108, lr=[6.511813936905043e-07, 6.511813936905043e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6449|ppo_ep: 1|act_loss: 0.00443267822265625|cri_loss: 0.002407073974609375|unsuper_loss: 0.0 +average reward score: 5.19921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.47s (22.01%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6450|ppo_ep: 1|act_loss: 0.01702880859375|cri_loss: 0.00946807861328125|unsuper_loss: 0.0 +average reward score: 5.30078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.65%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6451|ppo_ep: 1|act_loss: -0.0035552978515625|cri_loss: -0.00168609619140625|unsuper_loss: 0.0 +average reward score: 5.3515625 +------------------------------------------------------------------------------------- +|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.55%) |Training time=0.47s (20.17%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.64 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6452|ppo_ep: 1|act_loss: 0.0164794921875|cri_loss: 0.0085906982421875|unsuper_loss: 0.0 +average reward score: 5.63671875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.96%) |Training time=0.49s (22.45%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6453|ppo_ep: 1|act_loss: 0.007053375244140625|cri_loss: 0.0038814544677734375|unsuper_loss: 0.0 +average reward score: 4.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.23%) |Training time=0.48s (22.16%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6454|ppo_ep: 1|act_loss: -0.013641357421875|cri_loss: -0.0064697265625|unsuper_loss: 0.0 +average reward score: 4.8125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.23%) |Training time=0.48s (22.16%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6455|ppo_ep: 1|act_loss: 0.036041259765625|cri_loss: 0.0184478759765625|unsuper_loss: 0.0 +average reward score: 5.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.13%) |Training time=0.48s (22.28%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6456|ppo_ep: 1|act_loss: 0.0010776519775390625|cri_loss: 0.0009908676147460938|unsuper_loss: 0.0 +average reward score: 6.23828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (22.00%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6457|ppo_ep: 1|act_loss: 0.039031982421875|cri_loss: 0.02099609375|unsuper_loss: 0.0 +average reward score: 5.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.04%) |Training time=0.49s (22.37%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6458|ppo_ep: 1|act_loss: -0.01117706298828125|cri_loss: -0.005397796630859375|unsuper_loss: 0.0 +average reward score: 4.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.74%) |Training time=0.47s (21.63%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53 +[2023-04-14 12:44:34,188] [INFO] [logging.py:96:log_dist] [Rank 0] step=6460, skipped=82, lr=[1.2121093640075971e-06, 1.2121093640075971e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:44:34,207] [INFO] [timer.py:199:stop] epoch=0/micro_step=6460/global_step=6460, RunningAvgSamplesPerSec=105.56811658804929, CurrSamplesPerSec=105.1897695692178, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:44:34,300] [INFO] [logging.py:96:log_dist] [Rank 0] step=6460, skipped=108, lr=[6.447162851406805e-07, 6.447162851406805e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6459|ppo_ep: 1|act_loss: -0.0026264190673828125|cri_loss: -0.0008635520935058594|unsuper_loss: 0.0 +average reward score: 5.09765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.87%) |Training time=0.47s (21.53%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6460|ppo_ep: 1|act_loss: -0.0277099609375|cri_loss: -0.013702392578125|unsuper_loss: 0.0 +average reward score: 5.5 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.27%) |Training time=0.46s (21.11%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6461|ppo_ep: 1|act_loss: 0.051910400390625|cri_loss: 0.0283203125|unsuper_loss: 0.0 +average reward score: 5.41015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.21%) |Training time=0.46s (21.14%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6462|ppo_ep: 1|act_loss: -0.0036907196044921875|cri_loss: -0.001331329345703125|unsuper_loss: 0.0 +average reward score: 5.80078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.40%) |Training time=0.45s (20.97%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6463|ppo_ep: 1|act_loss: 0.0004858970642089844|cri_loss: 0.00047969818115234375|unsuper_loss: 0.0 +average reward score: 5.33984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.41%) |Training time=0.45s (20.95%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6464|ppo_ep: 1|act_loss: -0.0036411285400390625|cri_loss: -0.001377105712890625|unsuper_loss: 0.0 +average reward score: 5.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.13%) |Training time=0.46s (21.26%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6465|ppo_ep: 1|act_loss: 0.0038700103759765625|cri_loss: 0.002208709716796875|unsuper_loss: 0.0 +average reward score: 6.25 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.20%) |Training time=0.46s (21.16%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6466|ppo_ep: 1|act_loss: 0.00542449951171875|cri_loss: 0.00310516357421875|unsuper_loss: 0.0 +average reward score: 4.60546875 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.30%) |Training time=0.45s (19.34%) |Others=0.10 (4.36%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6467|ppo_ep: 1|act_loss: 0.01116943359375|cri_loss: 0.006710052490234375|unsuper_loss: 0.0 +average reward score: 5.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.46%) |Training time=0.45s (20.90%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6468|ppo_ep: 1|act_loss: -0.049163818359375|cri_loss: -0.0239410400390625|unsuper_loss: 0.0 +average reward score: 4.875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.53%) |Training time=0.45s (20.82%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53 +[2023-04-14 12:44:56,036] [INFO] [logging.py:96:log_dist] [Rank 0] step=6470, skipped=82, lr=[1.1998236328880862e-06, 1.1998236328880862e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:44:56,055] [INFO] [timer.py:199:stop] epoch=0/micro_step=6470/global_step=6470, RunningAvgSamplesPerSec=105.57323794039799, CurrSamplesPerSec=107.27137351562263, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:44:56,147] [INFO] [logging.py:96:log_dist] [Rank 0] step=6470, skipped=108, lr=[6.382786763711393e-07, 6.382786763711393e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6469|ppo_ep: 1|act_loss: -0.0016469955444335938|cri_loss: -0.0007343292236328125|unsuper_loss: 0.0 +average reward score: 5.234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.17%) |Training time=0.46s (21.23%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6470|ppo_ep: 1|act_loss: -0.0221710205078125|cri_loss: -0.0108489990234375|unsuper_loss: 0.0 +average reward score: 5.48828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.34%) |Training time=0.45s (21.04%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6471|ppo_ep: 1|act_loss: 0.0247344970703125|cri_loss: 0.01265716552734375|unsuper_loss: 0.0 +average reward score: 4.68359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.26%) |Training time=0.46s (21.11%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6472|ppo_ep: 1|act_loss: -0.020843505859375|cri_loss: -0.0093841552734375|unsuper_loss: 0.0 +average reward score: 5.80859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.17%) |Training time=0.46s (21.20%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6473|ppo_ep: 1|act_loss: -0.0097808837890625|cri_loss: -0.00446319580078125|unsuper_loss: 0.0 +average reward score: 5.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.55%) |Training time=0.45s (20.85%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6474|ppo_ep: 1|act_loss: 0.00592803955078125|cri_loss: 0.0030498504638671875|unsuper_loss: 0.0 +average reward score: 5.75 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.39%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6475|ppo_ep: 1|act_loss: 0.03131103515625|cri_loss: 0.0160369873046875|unsuper_loss: 0.0 +average reward score: 4.97265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.25%) |Training time=0.46s (21.12%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6476|ppo_ep: 1|act_loss: 0.0002980232238769531|cri_loss: 0.00029349327087402344|unsuper_loss: 0.0 +average reward score: 4.92578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.49%) |Training time=0.45s (20.90%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6477|ppo_ep: 1|act_loss: 0.0050506591796875|cri_loss: 0.003326416015625|unsuper_loss: 0.0 +average reward score: 5.1640625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.50%) |Training time=0.46s (20.98%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6478|ppo_ep: 1|act_loss: 0.0130462646484375|cri_loss: 0.006927490234375|unsuper_loss: 0.0 +average reward score: 5.08984375 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.37%) |Training time=0.46s (20.20%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.53 +[2023-04-14 12:45:17,830] [INFO] [logging.py:96:log_dist] [Rank 0] step=6480, skipped=82, lr=[1.1875916356275982e-06, 1.1875916356275982e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:45:17,848] [INFO] [timer.py:199:stop] epoch=0/micro_step=6480/global_step=6480, RunningAvgSamplesPerSec=105.5773912936444, CurrSamplesPerSec=107.25242944209734, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:45:17,941] [INFO] [logging.py:96:log_dist] [Rank 0] step=6480, skipped=108, lr=[6.318686628027723e-07, 6.318686628027723e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6479|ppo_ep: 1|act_loss: -0.0038814544677734375|cri_loss: -0.0016021728515625|unsuper_loss: 0.0 +average reward score: 5.3828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.10%) |Training time=0.46s (21.30%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6480|ppo_ep: 1|act_loss: -0.0041351318359375|cri_loss: -0.0018663406372070312|unsuper_loss: 0.0 +average reward score: 4.90234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.03%) |Training time=0.46s (21.08%) |Others=0.11 (4.89%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6481|ppo_ep: 1|act_loss: -0.0183868408203125|cri_loss: -0.00872039794921875|unsuper_loss: 0.0 +average reward score: 5.5546875 +------------------------------------------------------------------------------------- +|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.79s (76.18%) |Training time=0.46s (19.51%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.63 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6482|ppo_ep: 1|act_loss: 0.034393310546875|cri_loss: 0.01800537109375|unsuper_loss: 0.0 +average reward score: 6.03515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.82%) |Training time=0.47s (21.58%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6483|ppo_ep: 1|act_loss: -0.027374267578125|cri_loss: -0.01319122314453125|unsuper_loss: 0.0 +average reward score: 5.34375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.82%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6484|ppo_ep: 1|act_loss: -0.0150909423828125|cri_loss: -0.007389068603515625|unsuper_loss: 0.0 +average reward score: 4.9296875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.99%) |Training time=0.49s (22.42%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6485|ppo_ep: 1|act_loss: 0.019775390625|cri_loss: 0.01065826416015625|unsuper_loss: 0.0 +average reward score: 5.6640625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.59%) |Training time=0.47s (21.68%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6486|ppo_ep: 1|act_loss: -0.003971099853515625|cri_loss: -0.00188446044921875|unsuper_loss: 0.0 +average reward score: 5.3125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.46%) |Training time=0.41s (18.83%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6487|ppo_ep: 1|act_loss: -0.009490966796875|cri_loss: -0.004245758056640625|unsuper_loss: 0.0 +average reward score: 5.3046875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.31%) |Training time=0.46s (21.02%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6488|ppo_ep: 1|act_loss: 0.0171966552734375|cri_loss: 0.00887298583984375|unsuper_loss: 0.0 +average reward score: 5.30078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.10%) |Training time=0.44s (20.20%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.53 +[2023-04-14 12:45:39,763] [INFO] [logging.py:96:log_dist] [Rank 0] step=6490, skipped=82, lr=[1.1754135535338393e-06, 1.1754135535338393e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:45:39,781] [INFO] [timer.py:199:stop] epoch=0/micro_step=6490/global_step=6490, RunningAvgSamplesPerSec=105.5822909862731, CurrSamplesPerSec=115.98079224638602, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:45:39,874] [INFO] [logging.py:96:log_dist] [Rank 0] step=6490, skipped=108, lr=[6.254863394474447e-07, 6.254863394474447e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6489|ppo_ep: 1|act_loss: 0.0064697265625|cri_loss: 0.003589630126953125|unsuper_loss: 0.0 +average reward score: 5.390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.06%) |Training time=0.44s (20.23%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6490|ppo_ep: 1|act_loss: -0.0025119781494140625|cri_loss: -0.00113677978515625|unsuper_loss: 0.0 +average reward score: 5.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.85%) |Training time=0.44s (20.43%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6491|ppo_ep: 1|act_loss: 0.027984619140625|cri_loss: 0.01507568359375|unsuper_loss: 0.0 +average reward score: 5.82421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.35%) |Training time=0.41s (18.90%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6492|ppo_ep: 1|act_loss: -0.004123687744140625|cri_loss: -0.0017004013061523438|unsuper_loss: 0.0 +average reward score: 4.8984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.08%) |Training time=0.46s (21.23%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6493|ppo_ep: 1|act_loss: 0.004833221435546875|cri_loss: 0.002483367919921875|unsuper_loss: 0.0 +average reward score: 4.6484375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.65%) |Training time=0.48s (21.73%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6494|ppo_ep: 1|act_loss: 0.03521728515625|cri_loss: 0.01904296875|unsuper_loss: 0.0 +average reward score: 5.23046875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.98%) |Training time=0.47s (21.39%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.53 +[2023-04-14 12:45:52,903] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 6495|ppo_ep: 1|act_loss: 0.0124969482421875|cri_loss: 0.00667572021484375|unsuper_loss: 0.0 +average reward score: 5.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.02%) |Training time=0.45s (20.76%) |Others=0.09 (4.22%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.53 +[2023-04-14 12:45:55,239] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 6496|ppo_ep: 1|act_loss: 0.0023250579833984375|cri_loss: 0.0012359619140625|unsuper_loss: 0.0 +average reward score: 5.99609375 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.06%) |Training time=0.45s (19.08%) |Others=0.09 (3.86%)|CurSamplesPerSec=13.70 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6497|ppo_ep: 1|act_loss: -0.01434326171875|cri_loss: -0.006961822509765625|unsuper_loss: 0.0 +average reward score: 5.4296875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.00%) |Training time=0.47s (21.38%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6498|ppo_ep: 1|act_loss: -0.019866943359375|cri_loss: -0.0095367431640625|unsuper_loss: 0.0 +average reward score: 5.30078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.36%) |Training time=0.45s (20.92%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.53 +[2023-04-14 12:46:01,651] [INFO] [logging.py:96:log_dist] [Rank 0] step=6500, skipped=82, lr=[1.1632895671153645e-06, 1.1632895671153645e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:46:01,668] [INFO] [timer.py:199:stop] epoch=0/micro_step=6500/global_step=6500, RunningAvgSamplesPerSec=105.59168797775624, CurrSamplesPerSec=124.59731974764485, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:46:01,761] [INFO] [logging.py:96:log_dist] [Rank 0] step=6500, skipped=110, lr=[6.204004813025569e-07, 6.204004813025569e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6499|ppo_ep: 1|act_loss: 0.0130615234375|cri_loss: 0.006610870361328125|unsuper_loss: 0.0 +average reward score: 5.859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.78%) |Training time=0.42s (19.35%) |Others=0.11 (4.87%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6500|ppo_ep: 1|act_loss: 0.00244140625|cri_loss: 0.0013818740844726562|unsuper_loss: 0.0 +average reward score: 4.9296875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.74%) |Training time=0.40s (18.52%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6501|ppo_ep: 1|act_loss: 0.01165771484375|cri_loss: 0.00713348388671875|unsuper_loss: 0.0 +average reward score: 5.703125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.04%) |Training time=0.47s (21.31%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6502|ppo_ep: 1|act_loss: 0.0173797607421875|cri_loss: 0.0088348388671875|unsuper_loss: 0.0 +average reward score: 4.984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.44%) |Training time=0.45s (20.87%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6503|ppo_ep: 1|act_loss: 0.029022216796875|cri_loss: 0.015045166015625|unsuper_loss: 0.0 +average reward score: 5.40234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.43%) |Training time=0.45s (20.89%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6504|ppo_ep: 1|act_loss: -0.00212860107421875|cri_loss: -0.00066375732421875|unsuper_loss: 0.0 +average reward score: 4.8828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.46s (21.14%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6505|ppo_ep: 1|act_loss: -0.002971649169921875|cri_loss: -0.0011959075927734375|unsuper_loss: 0.0 +average reward score: 5.40234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.97%) |Training time=0.44s (20.31%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6506|ppo_ep: 1|act_loss: 0.01275634765625|cri_loss: 0.006744384765625|unsuper_loss: 0.0 +average reward score: 4.95703125 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.15%) |Training time=0.47s (21.24%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6507|ppo_ep: 1|act_loss: -0.032470703125|cri_loss: -0.01538848876953125|unsuper_loss: 0.0 +average reward score: 4.96484375 +------------------------------------------------------------------------------------- +|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.30%) |Training time=0.49s (21.31%) |Others=0.10 (4.39%)|CurSamplesPerSec=13.95 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6508|ppo_ep: 1|act_loss: -0.0428466796875|cri_loss: -0.02069091796875|unsuper_loss: 0.0 +average reward score: 4.9609375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.68%) |Training time=0.47s (21.68%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.53 +[2023-04-14 12:46:23,525] [INFO] [logging.py:96:log_dist] [Rank 0] step=6510, skipped=82, lr=[1.151219856078901e-06, 1.151219856078901e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:46:23,543] [INFO] [timer.py:199:stop] epoch=0/micro_step=6510/global_step=6510, RunningAvgSamplesPerSec=105.59656922152335, CurrSamplesPerSec=103.94636394544389, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:46:23,636] [INFO] [logging.py:96:log_dist] [Rank 0] step=6510, skipped=110, lr=[6.140682384495902e-07, 6.140682384495902e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6509|ppo_ep: 1|act_loss: 0.0023174285888671875|cri_loss: 0.0017957687377929688|unsuper_loss: 0.0 +average reward score: 5.625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.66%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6510|ppo_ep: 1|act_loss: 0.01549530029296875|cri_loss: 0.00812530517578125|unsuper_loss: 0.0 +average reward score: 5.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.45%) |Training time=0.48s (21.92%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6511|ppo_ep: 1|act_loss: -0.018798828125|cri_loss: -0.00925445556640625|unsuper_loss: 0.0 +average reward score: 5.58984375 +------------------------------------------------------------------------------------- +|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.77s (74.68%) |Training time=0.50s (21.07%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.49 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6512|ppo_ep: 1|act_loss: 0.015350341796875|cri_loss: 0.00786590576171875|unsuper_loss: 0.0 +average reward score: 5.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.44%) |Training time=0.45s (20.91%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6513|ppo_ep: 1|act_loss: -0.0010328292846679688|cri_loss: -0.00041866302490234375|unsuper_loss: 0.0 +average reward score: 5.33984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.55%) |Training time=0.45s (20.64%) |Others=0.10 (4.82%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6514|ppo_ep: 1|act_loss: -0.0087890625|cri_loss: -0.003993988037109375|unsuper_loss: 0.0 +average reward score: 6.12890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.89%) |Training time=0.44s (20.47%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6515|ppo_ep: 1|act_loss: 0.0076751708984375|cri_loss: 0.004138946533203125|unsuper_loss: 0.0 +average reward score: 5.64453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.73%) |Training time=0.45s (20.63%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6516|ppo_ep: 1|act_loss: 0.0009355545043945312|cri_loss: 0.0005583763122558594|unsuper_loss: 0.0 +average reward score: 5.44921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.38%) |Training time=0.45s (20.57%) |Others=0.11 (5.05%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6517|ppo_ep: 1|act_loss: 0.021087646484375|cri_loss: 0.011016845703125|unsuper_loss: 0.0 +average reward score: 5.484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.23%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6518|ppo_ep: 1|act_loss: 0.0050048828125|cri_loss: 0.0026264190673828125|unsuper_loss: 0.0 +average reward score: 5.44140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.88%) |Training time=0.47s (21.50%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53 +[2023-04-14 12:46:45,374] [INFO] [logging.py:96:log_dist] [Rank 0] step=6520, skipped=82, lr=[1.1392045993266831e-06, 1.1392045993266831e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:46:45,392] [INFO] [timer.py:199:stop] epoch=0/micro_step=6520/global_step=6520, RunningAvgSamplesPerSec=105.6001638945027, CurrSamplesPerSec=108.51050440209877, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:46:45,485] [INFO] [logging.py:96:log_dist] [Rank 0] step=6520, skipped=110, lr=[6.077639496548926e-07, 6.077639496548926e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6519|ppo_ep: 1|act_loss: -0.0101165771484375|cri_loss: -0.004627227783203125|unsuper_loss: 0.0 +average reward score: 5.37109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.19%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6520|ppo_ep: 1|act_loss: -0.0202178955078125|cri_loss: -0.0098114013671875|unsuper_loss: 0.0 +average reward score: 5.40234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.17%) |Training time=0.46s (21.21%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6521|ppo_ep: 1|act_loss: 0.021697998046875|cri_loss: 0.01116180419921875|unsuper_loss: 0.0 +average reward score: 5.3671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.25%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6522|ppo_ep: 1|act_loss: -0.0177001953125|cri_loss: -0.00839996337890625|unsuper_loss: 0.0 +average reward score: 5.65625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.40%) |Training time=0.45s (20.96%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6523|ppo_ep: 1|act_loss: -0.0059967041015625|cri_loss: -0.002777099609375|unsuper_loss: 0.0 +average reward score: 5.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.36%) |Training time=0.45s (20.99%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6524|ppo_ep: 1|act_loss: -0.02264404296875|cri_loss: -0.01093292236328125|unsuper_loss: 0.0 +average reward score: 5.390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.21%) |Training time=0.46s (21.16%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6525|ppo_ep: 1|act_loss: -0.0240478515625|cri_loss: -0.01178741455078125|unsuper_loss: 0.0 +average reward score: 5.48046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.46s (21.21%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6526|ppo_ep: 1|act_loss: 0.02386474609375|cri_loss: 0.0123138427734375|unsuper_loss: 0.0 +average reward score: 5.04296875 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.47%) |Training time=0.48s (20.26%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.57 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6527|ppo_ep: 1|act_loss: 0.022735595703125|cri_loss: 0.01175689697265625|unsuper_loss: 0.0 +average reward score: 5.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.28%) |Training time=0.46s (21.10%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6528|ppo_ep: 1|act_loss: 0.0005331039428710938|cri_loss: 0.0003275871276855469|unsuper_loss: 0.0 +average reward score: 4.90234375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.23%) |Training time=0.46s (21.20%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.53 +[2023-04-14 12:47:07,221] [INFO] [logging.py:96:log_dist] [Rank 0] step=6530, skipped=82, lr=[1.1272439749537998e-06, 1.1272439749537998e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:47:07,239] [INFO] [timer.py:199:stop] epoch=0/micro_step=6530/global_step=6530, RunningAvgSamplesPerSec=105.60338435686515, CurrSamplesPerSec=109.80625927543986, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:47:07,332] [INFO] [logging.py:96:log_dist] [Rank 0] step=6530, skipped=110, lr=[6.01487708363232e-07, 6.01487708363232e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6529|ppo_ep: 1|act_loss: 0.05078125|cri_loss: 0.026214599609375|unsuper_loss: 0.0 +average reward score: 5.1875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.39%) |Training time=0.45s (21.01%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6530|ppo_ep: 1|act_loss: 0.002910614013671875|cri_loss: 0.00180816650390625|unsuper_loss: 0.0 +average reward score: 4.74609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.48%) |Training time=0.45s (20.88%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6531|ppo_ep: 1|act_loss: -0.0006227493286132812|cri_loss: 0.0004520416259765625|unsuper_loss: 0.0 +average reward score: 5.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.62%) |Training time=0.45s (20.75%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6532|ppo_ep: 1|act_loss: -0.0004138946533203125|cri_loss: 0.00014734268188476562|unsuper_loss: 0.0 +average reward score: 5.3671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.20%) |Training time=0.46s (21.16%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6533|ppo_ep: 1|act_loss: -0.022857666015625|cri_loss: -0.01116943359375|unsuper_loss: 0.0 +average reward score: 5.15625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.48%) |Training time=0.45s (20.77%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6534|ppo_ep: 1|act_loss: -0.0067138671875|cri_loss: -0.00313568115234375|unsuper_loss: 0.0 +average reward score: 5.72265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.49%) |Training time=0.45s (20.86%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6535|ppo_ep: 1|act_loss: -0.0179595947265625|cri_loss: -0.0081939697265625|unsuper_loss: 0.0 +average reward score: 4.765625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.52%) |Training time=0.45s (20.88%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6536|ppo_ep: 1|act_loss: 0.00745391845703125|cri_loss: 0.003978729248046875|unsuper_loss: 0.0 +average reward score: 5.09375 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.30%) |Training time=0.46s (20.91%) |Others=0.11 (4.79%)|CurSamplesPerSec=14.46 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6537|ppo_ep: 1|act_loss: 0.0206451416015625|cri_loss: 0.01094818115234375|unsuper_loss: 0.0 +average reward score: 4.89453125 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.95%) |Training time=0.47s (20.62%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6538|ppo_ep: 1|act_loss: -0.032745361328125|cri_loss: -0.0157318115234375|unsuper_loss: 0.0 +average reward score: 5.44921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.62%) |Training time=0.45s (20.73%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53 +[2023-04-14 12:47:29,021] [INFO] [logging.py:96:log_dist] [Rank 0] step=6540, skipped=82, lr=[1.1153381602455581e-06, 1.1153381602455581e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:47:29,039] [INFO] [timer.py:199:stop] epoch=0/micro_step=6540/global_step=6540, RunningAvgSamplesPerSec=105.60941207531546, CurrSamplesPerSec=111.99790386308354, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:47:29,132] [INFO] [logging.py:96:log_dist] [Rank 0] step=6540, skipped=110, lr=[5.952396076036457e-07, 5.952396076036457e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6539|ppo_ep: 1|act_loss: 0.002498626708984375|cri_loss: 0.001873016357421875|unsuper_loss: 0.0 +average reward score: 5.484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.56%) |Training time=0.45s (20.82%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6540|ppo_ep: 1|act_loss: -0.006824493408203125|cri_loss: -0.0032329559326171875|unsuper_loss: 0.0 +average reward score: 5.83203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.32%) |Training time=0.45s (21.04%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6541|ppo_ep: 1|act_loss: -0.019256591796875|cri_loss: -0.00939178466796875|unsuper_loss: 0.0 +average reward score: 5.77734375 +------------------------------------------------------------------------------------- +|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.69%) |Training time=0.47s (20.02%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.62 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6542|ppo_ep: 1|act_loss: 0.023956298828125|cri_loss: 0.01236724853515625|unsuper_loss: 0.0 +average reward score: 6.453125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.79%) |Training time=0.47s (21.61%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6543|ppo_ep: 1|act_loss: 0.014129638671875|cri_loss: 0.007312774658203125|unsuper_loss: 0.0 +average reward score: 4.609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.83%) |Training time=0.49s (22.55%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6544|ppo_ep: 1|act_loss: -0.0164642333984375|cri_loss: -0.0080413818359375|unsuper_loss: 0.0 +average reward score: 4.69140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.61%) |Training time=0.49s (22.77%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6545|ppo_ep: 1|act_loss: -0.00740814208984375|cri_loss: -0.003650665283203125|unsuper_loss: 0.0 +average reward score: 4.8515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.92%) |Training time=0.49s (22.47%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6546|ppo_ep: 1|act_loss: -0.0016851425170898438|cri_loss: -0.0007281303405761719|unsuper_loss: 0.0 +average reward score: 5.1953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.48s (22.38%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6547|ppo_ep: 1|act_loss: -0.0064849853515625|cri_loss: -0.0031108856201171875|unsuper_loss: 0.0 +average reward score: 5.515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.81%) |Training time=0.49s (22.57%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6548|ppo_ep: 1|act_loss: -0.0235748291015625|cri_loss: -0.01151275634765625|unsuper_loss: 0.0 +average reward score: 5.35546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.96%) |Training time=0.49s (22.42%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53 +[2023-04-14 12:47:50,874] [INFO] [logging.py:96:log_dist] [Rank 0] step=6550, skipped=82, lr=[1.103487331674853e-06, 1.103487331674853e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:47:50,892] [INFO] [timer.py:199:stop] epoch=0/micro_step=6550/global_step=6550, RunningAvgSamplesPerSec=105.60097993930471, CurrSamplesPerSec=98.59352218869277, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:47:50,985] [INFO] [logging.py:96:log_dist] [Rank 0] step=6550, skipped=110, lr=[5.890197399880581e-07, 5.890197399880581e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6549|ppo_ep: 1|act_loss: -0.0108489990234375|cri_loss: -0.005306243896484375|unsuper_loss: 0.0 +average reward score: 6.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.94%) |Training time=0.49s (22.49%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6550|ppo_ep: 1|act_loss: 0.0272064208984375|cri_loss: 0.01419830322265625|unsuper_loss: 0.0 +average reward score: 5.375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.67%) |Training time=0.49s (22.72%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6551|ppo_ep: 1|act_loss: 0.005901336669921875|cri_loss: 0.003063201904296875|unsuper_loss: 0.0 +average reward score: 5.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.66%) |Training time=0.50s (22.74%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6552|ppo_ep: 1|act_loss: -0.0030727386474609375|cri_loss: -0.000606536865234375|unsuper_loss: 0.0 +average reward score: 4.1875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.63%) |Training time=0.49s (22.68%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6553|ppo_ep: 1|act_loss: 0.006561279296875|cri_loss: 0.003398895263671875|unsuper_loss: 0.0 +average reward score: 5.53515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.87%) |Training time=0.49s (22.52%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6554|ppo_ep: 1|act_loss: 0.011383056640625|cri_loss: 0.005832672119140625|unsuper_loss: 0.0 +average reward score: 5.84765625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.68%) |Training time=0.49s (22.72%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6555|ppo_ep: 1|act_loss: -0.00696563720703125|cri_loss: -0.00336456298828125|unsuper_loss: 0.0 +average reward score: 5.6328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.48s (22.27%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6556|ppo_ep: 1|act_loss: 0.0443115234375|cri_loss: 0.0237274169921875|unsuper_loss: 0.0 +average reward score: 5.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.61s (70.90%) |Training time=0.56s (24.58%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.13 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6557|ppo_ep: 1|act_loss: 0.0042877197265625|cri_loss: 0.002197265625|unsuper_loss: 0.0 +average reward score: 5.73828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.58%) |Training time=0.47s (21.79%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6558|ppo_ep: 1|act_loss: 0.016754150390625|cri_loss: 0.00850677490234375|unsuper_loss: 0.0 +average reward score: 5.75 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.17%) |Training time=0.48s (22.21%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53 +[2023-04-14 12:48:12,678] [INFO] [logging.py:96:log_dist] [Rank 0] step=6560, skipped=82, lr=[1.091691664899555e-06, 1.091691664899555e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:48:12,696] [INFO] [timer.py:199:stop] epoch=0/micro_step=6560/global_step=6560, RunningAvgSamplesPerSec=105.58608899616205, CurrSamplesPerSec=100.87136841519845, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:48:12,789] [INFO] [logging.py:96:log_dist] [Rank 0] step=6560, skipped=110, lr=[5.828281977099129e-07, 5.828281977099129e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6559|ppo_ep: 1|act_loss: -0.0164337158203125|cri_loss: -0.00799560546875|unsuper_loss: 0.0 +average reward score: 5.5234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.23%) |Training time=0.48s (22.15%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6560|ppo_ep: 1|act_loss: -0.00439453125|cri_loss: -0.0017576217651367188|unsuper_loss: 0.0 +average reward score: 5.0078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.48s (22.27%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6561|ppo_ep: 1|act_loss: 0.029449462890625|cri_loss: 0.0149383544921875|unsuper_loss: 0.0 +average reward score: 5.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.14%) |Training time=0.48s (22.24%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6562|ppo_ep: 1|act_loss: 0.00925445556640625|cri_loss: 0.0048828125|unsuper_loss: 0.0 +average reward score: 5.6875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.13%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6563|ppo_ep: 1|act_loss: -0.004062652587890625|cri_loss: -0.0015611648559570312|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.22%) |Training time=0.48s (22.15%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6564|ppo_ep: 1|act_loss: -0.002902984619140625|cri_loss: -0.001293182373046875|unsuper_loss: 0.0 +average reward score: 5.14453125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.59%) |Training time=0.48s (22.24%) |Others=0.11 (5.17%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6565|ppo_ep: 1|act_loss: 0.026123046875|cri_loss: 0.01336669921875|unsuper_loss: 0.0 +average reward score: 5.4296875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.27%) |Training time=0.48s (22.13%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6566|ppo_ep: 1|act_loss: 0.0179443359375|cri_loss: 0.00943756103515625|unsuper_loss: 0.0 +average reward score: 5.12890625 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.69s (73.87%) |Training time=0.50s (21.73%) |Others=0.10 (4.40%)|CurSamplesPerSec=14.02 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6567|ppo_ep: 1|act_loss: 0.02716064453125|cri_loss: 0.0139312744140625|unsuper_loss: 0.0 +average reward score: 5.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.21%) |Training time=0.48s (22.16%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6568|ppo_ep: 1|act_loss: -0.009429931640625|cri_loss: -0.004634857177734375|unsuper_loss: 0.0 +average reward score: 5.96875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.48s (22.35%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +[2023-04-14 12:48:34,461] [INFO] [logging.py:96:log_dist] [Rank 0] step=6570, skipped=82, lr=[1.0799513347598973e-06, 1.0799513347598973e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:48:34,479] [INFO] [timer.py:199:stop] epoch=0/micro_step=6570/global_step=6570, RunningAvgSamplesPerSec=105.57698320907716, CurrSamplesPerSec=100.9552079838854, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:48:34,572] [INFO] [logging.py:96:log_dist] [Rank 0] step=6570, skipped=110, lr=[5.766650725428027e-07, 5.766650725428027e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6569|ppo_ep: 1|act_loss: -0.0192108154296875|cri_loss: -0.00888824462890625|unsuper_loss: 0.0 +average reward score: 5.83984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.22%) |Training time=0.48s (22.16%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6570|ppo_ep: 1|act_loss: -0.0006604194641113281|cri_loss: -6.580352783203125e-05|unsuper_loss: 0.0 +average reward score: 4.609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.98%) |Training time=0.48s (22.39%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6571|ppo_ep: 1|act_loss: -0.014739990234375|cri_loss: -0.00199127197265625|unsuper_loss: 0.0 +average reward score: 5.109375 +------------------------------------------------------------------------------------- +|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.34%) |Training time=0.50s (21.37%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.63 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6572|ppo_ep: 1|act_loss: 0.0235137939453125|cri_loss: 0.01197052001953125|unsuper_loss: 0.0 +average reward score: 5.5546875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.71%) |Training time=0.50s (22.68%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6573|ppo_ep: 1|act_loss: -0.00745391845703125|cri_loss: -0.003570556640625|unsuper_loss: 0.0 +average reward score: 5.0 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.12%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6574|ppo_ep: 1|act_loss: -0.0093536376953125|cri_loss: -0.004589080810546875|unsuper_loss: 0.0 +average reward score: 5.453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.95%) |Training time=0.49s (22.44%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6575|ppo_ep: 1|act_loss: -0.0058135986328125|cri_loss: -0.0026302337646484375|unsuper_loss: 0.0 +average reward score: 4.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.13%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6576|ppo_ep: 1|act_loss: 0.005096435546875|cri_loss: 0.0028324127197265625|unsuper_loss: 0.0 +average reward score: 4.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.17%) |Training time=0.48s (22.19%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6577|ppo_ep: 1|act_loss: 0.022705078125|cri_loss: 0.01177978515625|unsuper_loss: 0.0 +average reward score: 5.515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.02%) |Training time=0.48s (22.35%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6578|ppo_ep: 1|act_loss: -0.009124755859375|cri_loss: -0.004364013671875|unsuper_loss: 0.0 +average reward score: 5.16796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.98%) |Training time=0.48s (22.19%) |Others=0.11 (4.84%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.53 +[2023-04-14 12:48:56,317] [INFO] [logging.py:96:log_dist] [Rank 0] step=6580, skipped=82, lr=[1.0682665152758964e-06, 1.0682665152758964e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:48:56,336] [INFO] [timer.py:199:stop] epoch=0/micro_step=6580/global_step=6580, RunningAvgSamplesPerSec=105.56653477849474, CurrSamplesPerSec=100.78146226450514, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:48:56,428] [INFO] [logging.py:96:log_dist] [Rank 0] step=6580, skipped=110, lr=[5.705304558391109e-07, 5.705304558391109e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6579|ppo_ep: 1|act_loss: -0.004825592041015625|cri_loss: -0.00231170654296875|unsuper_loss: 0.0 +average reward score: 5.6875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.23%) |Training time=0.48s (22.15%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6580|ppo_ep: 1|act_loss: -0.003093719482421875|cri_loss: -0.0014362335205078125|unsuper_loss: 0.0 +average reward score: 5.28125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.18%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6581|ppo_ep: 1|act_loss: 0.0014276504516601562|cri_loss: 0.001483917236328125|unsuper_loss: 0.0 +average reward score: 5.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.14%) |Training time=0.48s (22.22%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6582|ppo_ep: 1|act_loss: 0.00543975830078125|cri_loss: 0.0029315948486328125|unsuper_loss: 0.0 +average reward score: 5.90625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.18%) |Training time=0.48s (22.19%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6583|ppo_ep: 1|act_loss: 0.003963470458984375|cri_loss: 0.00213623046875|unsuper_loss: 0.0 +average reward score: 5.2109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.26%) |Training time=0.48s (22.09%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6584|ppo_ep: 1|act_loss: -0.0157318115234375|cri_loss: -0.007671356201171875|unsuper_loss: 0.0 +average reward score: 5.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.98%) |Training time=0.49s (22.41%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6585|ppo_ep: 1|act_loss: 0.019989013671875|cri_loss: 0.01015472412109375|unsuper_loss: 0.0 +average reward score: 4.8203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.47s (21.86%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6586|ppo_ep: 1|act_loss: 0.0076751708984375|cri_loss: 0.004123687744140625|unsuper_loss: 0.0 +average reward score: 5.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.99%) |Training time=0.48s (20.55%) |Others=0.10 (4.45%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6587|ppo_ep: 1|act_loss: -0.020965576171875|cri_loss: -0.00983428955078125|unsuper_loss: 0.0 +average reward score: 5.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.19%) |Training time=0.48s (22.14%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6588|ppo_ep: 1|act_loss: -0.0211181640625|cri_loss: -0.01003265380859375|unsuper_loss: 0.0 +average reward score: 5.23828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.27%) |Training time=0.48s (22.10%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53 +[2023-04-14 12:49:18,127] [INFO] [logging.py:96:log_dist] [Rank 0] step=6590, skipped=82, lr=[1.0566373796447659e-06, 1.0566373796447659e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:49:18,145] [INFO] [timer.py:199:stop] epoch=0/micro_step=6590/global_step=6590, RunningAvgSamplesPerSec=105.55925555019671, CurrSamplesPerSec=101.66607937390451, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:49:18,238] [INFO] [logging.py:96:log_dist] [Rank 0] step=6590, skipped=110, lr=[5.644244385286548e-07, 5.644244385286548e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6589|ppo_ep: 1|act_loss: -0.019561767578125|cri_loss: -0.009368896484375|unsuper_loss: 0.0 +average reward score: 5.1875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.31%) |Training time=0.48s (22.06%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6590|ppo_ep: 1|act_loss: -0.0198974609375|cri_loss: -0.00963592529296875|unsuper_loss: 0.0 +average reward score: 5.9140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.24%) |Training time=0.48s (22.06%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6591|ppo_ep: 1|act_loss: -0.0243988037109375|cri_loss: -0.011871337890625|unsuper_loss: 0.0 +average reward score: 5.78515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.03%) |Training time=0.48s (22.34%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6592|ppo_ep: 1|act_loss: -0.00626373291015625|cri_loss: -0.002712249755859375|unsuper_loss: 0.0 +average reward score: 5.6015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.19%) |Training time=0.48s (22.16%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6593|ppo_ep: 1|act_loss: 0.025970458984375|cri_loss: 0.01407623291015625|unsuper_loss: 0.0 +average reward score: 5.1953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.14%) |Training time=0.48s (22.23%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6594|ppo_ep: 1|act_loss: 0.0013799667358398438|cri_loss: 0.0013532638549804688|unsuper_loss: 0.0 +average reward score: 5.94921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.48s (22.27%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6595|ppo_ep: 1|act_loss: 0.02069091796875|cri_loss: 0.01064300537109375|unsuper_loss: 0.0 +average reward score: 5.4921875 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.96%) |Training time=0.47s (21.42%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6596|ppo_ep: 1|act_loss: -0.0013408660888671875|cri_loss: -0.00049591064453125|unsuper_loss: 0.0 +average reward score: 5.109375 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.27%) |Training time=0.48s (21.31%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.09 |AvgSamplesPerSec=14.53 +[2023-04-14 12:49:35,683] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 6597|ppo_ep: 1|act_loss: -0.003520965576171875|cri_loss: -0.0013427734375|unsuper_loss: 0.0 +average reward score: 5.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.63%) |Training time=0.48s (22.20%) |Others=0.09 (4.17%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.53 +[2023-04-14 12:49:37,840] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 6598|ppo_ep: 1|act_loss: 0.0361328125|cri_loss: 0.0187530517578125|unsuper_loss: 0.0 +average reward score: 5.328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.45%) |Training time=0.48s (22.24%) |Others=0.09 (4.31%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.53 +[2023-04-14 12:49:39,893] [INFO] [logging.py:96:log_dist] [Rank 0] step=6600, skipped=82, lr=[1.0450641002383495e-06, 1.0450641002383495e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:49:39,911] [INFO] [timer.py:199:stop] epoch=0/micro_step=6600/global_step=6600, RunningAvgSamplesPerSec=105.55175453772948, CurrSamplesPerSec=100.11944716301052, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:49:40,006] [INFO] [logging.py:96:log_dist] [Rank 0] step=6600, skipped=112, lr=[5.59560277077667e-07, 5.59560277077667e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6599|ppo_ep: 1|act_loss: 0.0292510986328125|cri_loss: 0.0148773193359375|unsuper_loss: 0.0 +average reward score: 6.1796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.25%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6600|ppo_ep: 1|act_loss: 0.002239227294921875|cri_loss: 0.00122833251953125|unsuper_loss: 0.0 +average reward score: 5.59765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.21%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6601|ppo_ep: 1|act_loss: -0.0012359619140625|cri_loss: -0.0005125999450683594|unsuper_loss: 0.0 +average reward score: 5.6328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.27%) |Training time=0.48s (22.10%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6602|ppo_ep: 1|act_loss: -0.00958251953125|cri_loss: -0.00464630126953125|unsuper_loss: 0.0 +average reward score: 5.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.65s (71.46%) |Training time=0.56s (24.18%) |Others=0.10 (4.36%)|CurSamplesPerSec=13.88 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6603|ppo_ep: 1|act_loss: 0.0058135986328125|cri_loss: 0.0030841827392578125|unsuper_loss: 0.0 +average reward score: 5.0859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.90%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6604|ppo_ep: 1|act_loss: -0.0102691650390625|cri_loss: -0.00485992431640625|unsuper_loss: 0.0 +average reward score: 5.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.43%) |Training time=0.50s (22.98%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6605|ppo_ep: 1|act_loss: -0.0034236907958984375|cri_loss: -0.0015735626220703125|unsuper_loss: 0.0 +average reward score: 5.28125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.93%) |Training time=0.49s (22.46%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6606|ppo_ep: 1|act_loss: -0.02056884765625|cri_loss: -0.01004791259765625|unsuper_loss: 0.0 +average reward score: 5.08984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.88%) |Training time=0.49s (22.50%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6607|ppo_ep: 1|act_loss: -0.0178070068359375|cri_loss: -0.0086517333984375|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.88%) |Training time=0.47s (21.49%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6608|ppo_ep: 1|act_loss: 0.016571044921875|cri_loss: 0.00848388671875|unsuper_loss: 0.0 +average reward score: 5.13671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.04%) |Training time=0.46s (21.34%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53 +[2023-04-14 12:50:01,722] [INFO] [logging.py:96:log_dist] [Rank 0] step=6610, skipped=82, lr=[1.0335468486005652e-06, 1.0335468486005652e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:50:01,740] [INFO] [timer.py:199:stop] epoch=0/micro_step=6610/global_step=6610, RunningAvgSamplesPerSec=105.54412564005361, CurrSamplesPerSec=105.0409292468261, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:50:01,836] [INFO] [logging.py:96:log_dist] [Rank 0] step=6610, skipped=112, lr=[5.53505966464195e-07, 5.53505966464195e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6609|ppo_ep: 1|act_loss: -0.007537841796875|cri_loss: -0.0036907196044921875|unsuper_loss: 0.0 +average reward score: 5.42578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.75%) |Training time=0.47s (21.53%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6610|ppo_ep: 1|act_loss: -0.001495361328125|cri_loss: -0.0006341934204101562|unsuper_loss: 0.0 +average reward score: 6.0 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.78%) |Training time=0.47s (21.60%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6611|ppo_ep: 1|act_loss: -0.0090789794921875|cri_loss: -0.004428863525390625|unsuper_loss: 0.0 +average reward score: 4.9296875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.08%) |Training time=0.47s (21.35%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6612|ppo_ep: 1|act_loss: -0.01470184326171875|cri_loss: -0.0068206787109375|unsuper_loss: 0.0 +average reward score: 5.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.75%) |Training time=0.47s (21.64%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6613|ppo_ep: 1|act_loss: 0.0101318359375|cri_loss: 0.00521087646484375|unsuper_loss: 0.0 +average reward score: 5.36328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.63%) |Training time=0.47s (21.69%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6614|ppo_ep: 1|act_loss: -0.000881195068359375|cri_loss: 7.82012939453125e-05|unsuper_loss: 0.0 +average reward score: 4.9921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.77%) |Training time=0.47s (21.62%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6615|ppo_ep: 1|act_loss: -0.00189971923828125|cri_loss: -0.0008196830749511719|unsuper_loss: 0.0 +average reward score: 5.88671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.36%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6616|ppo_ep: 1|act_loss: 0.0089111328125|cri_loss: 0.004726409912109375|unsuper_loss: 0.0 +average reward score: 5.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.91%) |Training time=0.47s (21.47%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6617|ppo_ep: 1|act_loss: -0.017181396484375|cri_loss: -0.0083770751953125|unsuper_loss: 0.0 +average reward score: 5.984375 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.20%) |Training time=0.48s (21.70%) |Others=0.14 (6.10%)|CurSamplesPerSec=14.37 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6618|ppo_ep: 1|act_loss: -0.006389617919921875|cri_loss: -0.0030574798583984375|unsuper_loss: 0.0 +average reward score: 5.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.68s (74.84%) |Training time=0.47s (20.68%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.23 |AvgSamplesPerSec=14.53 +[2023-04-14 12:50:23,554] [INFO] [logging.py:96:log_dist] [Rank 0] step=6620, skipped=82, lr=[1.0220857954448672e-06, 1.0220857954448672e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:50:23,572] [INFO] [timer.py:199:stop] epoch=0/micro_step=6620/global_step=6620, RunningAvgSamplesPerSec=105.54226757249026, CurrSamplesPerSec=101.52726766602092, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:50:23,665] [INFO] [logging.py:96:log_dist] [Rank 0] step=6620, skipped=112, lr=[5.474805075879616e-07, 5.474805075879616e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6619|ppo_ep: 1|act_loss: -0.0173492431640625|cri_loss: -0.00852203369140625|unsuper_loss: 0.0 +average reward score: 5.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.51%) |Training time=0.48s (21.91%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6620|ppo_ep: 1|act_loss: -0.02215576171875|cri_loss: -0.01073455810546875|unsuper_loss: 0.0 +average reward score: 5.5 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.35%) |Training time=0.48s (22.02%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6621|ppo_ep: 1|act_loss: 0.03692626953125|cri_loss: 0.0188446044921875|unsuper_loss: 0.0 +average reward score: 5.125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.01%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6622|ppo_ep: 1|act_loss: -0.00305938720703125|cri_loss: -0.001110076904296875|unsuper_loss: 0.0 +average reward score: 4.94921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.12%) |Training time=0.48s (22.24%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6623|ppo_ep: 1|act_loss: 0.00664520263671875|cri_loss: 0.0036258697509765625|unsuper_loss: 0.0 +average reward score: 5.5703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.08%) |Training time=0.48s (22.30%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6624|ppo_ep: 1|act_loss: 0.002655029296875|cri_loss: 0.0015382766723632812|unsuper_loss: 0.0 +average reward score: 5.0 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.28%) |Training time=0.48s (22.14%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6625|ppo_ep: 1|act_loss: -0.017791748046875|cri_loss: -0.008697509765625|unsuper_loss: 0.0 +average reward score: 5.609375 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.48%) |Training time=0.48s (21.08%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.13 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6626|ppo_ep: 1|act_loss: -0.005893707275390625|cri_loss: -0.0027294158935546875|unsuper_loss: 0.0 +average reward score: 5.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.30%) |Training time=0.48s (22.06%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6627|ppo_ep: 1|act_loss: -0.0082244873046875|cri_loss: -0.003978729248046875|unsuper_loss: 0.0 +average reward score: 5.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.48s (22.11%) |Others=0.10 (4.84%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6628|ppo_ep: 1|act_loss: -0.011962890625|cri_loss: -0.0058441162109375|unsuper_loss: 0.0 +average reward score: 5.21875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.04%) |Training time=0.48s (22.33%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53 +[2023-04-14 12:50:45,332] [INFO] [logging.py:96:log_dist] [Rank 0] step=6630, skipped=82, lr=[1.0106811106517118e-06, 1.0106811106517118e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:50:45,350] [INFO] [timer.py:199:stop] epoch=0/micro_step=6630/global_step=6630, RunningAvgSamplesPerSec=105.53529721787794, CurrSamplesPerSec=101.13496418923755, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:50:45,442] [INFO] [logging.py:96:log_dist] [Rank 0] step=6630, skipped=112, lr=[5.41483989760803e-07, 5.41483989760803e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6629|ppo_ep: 1|act_loss: 0.043121337890625|cri_loss: 0.0227813720703125|unsuper_loss: 0.0 +average reward score: 5.6953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.16%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6630|ppo_ep: 1|act_loss: -0.00467681884765625|cri_loss: -0.001865386962890625|unsuper_loss: 0.0 +average reward score: 5.37890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.90%) |Training time=0.49s (22.49%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6631|ppo_ep: 1|act_loss: -0.00705718994140625|cri_loss: -0.003284454345703125|unsuper_loss: 0.0 +average reward score: 5.27734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.47s (21.97%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6632|ppo_ep: 1|act_loss: -0.001850128173828125|cri_loss: -0.0005612373352050781|unsuper_loss: 0.0 +average reward score: 5.32421875 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.35%) |Training time=0.48s (20.35%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6633|ppo_ep: 1|act_loss: 0.007160186767578125|cri_loss: 0.0037708282470703125|unsuper_loss: 0.0 +average reward score: 5.4453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.21%) |Training time=0.48s (22.15%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6634|ppo_ep: 1|act_loss: 0.026824951171875|cri_loss: 0.0143890380859375|unsuper_loss: 0.0 +average reward score: 5.37109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.21%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6635|ppo_ep: 1|act_loss: -0.00769805908203125|cri_loss: -0.0037708282470703125|unsuper_loss: 0.0 +average reward score: 5.69921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.20%) |Training time=0.48s (22.17%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6636|ppo_ep: 1|act_loss: -0.016510009765625|cri_loss: -0.00434112548828125|unsuper_loss: 0.0 +average reward score: 5.8203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.21%) |Training time=0.48s (22.17%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6637|ppo_ep: 1|act_loss: 0.0094146728515625|cri_loss: 0.004886627197265625|unsuper_loss: 0.0 +average reward score: 4.875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.13%) |Training time=0.48s (22.25%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6638|ppo_ep: 1|act_loss: -0.014373779296875|cri_loss: -0.006885528564453125|unsuper_loss: 0.0 +average reward score: 4.91015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.11%) |Training time=0.48s (22.26%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.53 +[2023-04-14 12:51:07,159] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +[2023-04-14 12:51:07,159] [INFO] [logging.py:96:log_dist] [Rank 0] step=6640, skipped=83, lr=[1.0004652290207957e-06, 1.0004652290207957e-06], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:51:07,160] [INFO] [timer.py:199:stop] epoch=0/micro_step=6640/global_step=6640, RunningAvgSamplesPerSec=105.52910092880747, CurrSamplesPerSec=109.88258198147307, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:51:07,252] [INFO] [logging.py:96:log_dist] [Rank 0] step=6640, skipped=112, lr=[5.355165018655778e-07, 5.355165018655778e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6639|ppo_ep: 1|act_loss: -0.00891876220703125|cri_loss: -0.004344940185546875|unsuper_loss: 0.0 +average reward score: 5.36328125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.34%) |Training time=0.45s (21.05%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.53 +[2023-04-14 12:51:09,301] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 6640|ppo_ep: 1|act_loss: -0.0029621124267578125|cri_loss: -0.0013227462768554688|unsuper_loss: 0.0 +average reward score: 5.09375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.80%) |Training time=0.46s (21.55%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.53 +epoch: 0|step: 6641|ppo_ep: 1|act_loss: -0.01140594482421875|cri_loss: -0.00524139404296875|unsuper_loss: 0.0 +average reward score: 5.4453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.12%) |Training time=0.48s (22.26%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6642|ppo_ep: 1|act_loss: 0.020294189453125|cri_loss: 0.010894775390625|unsuper_loss: 0.0 +average reward score: 5.3046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.99%) |Training time=0.49s (22.40%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6643|ppo_ep: 1|act_loss: 0.0182037353515625|cri_loss: 0.00933837890625|unsuper_loss: 0.0 +average reward score: 4.203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.02%) |Training time=0.48s (22.35%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6644|ppo_ep: 1|act_loss: -0.00525665283203125|cri_loss: -0.002307891845703125|unsuper_loss: 0.0 +average reward score: 5.11328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.30%) |Training time=0.48s (22.07%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6645|ppo_ep: 1|act_loss: 0.0092010498046875|cri_loss: 0.005702972412109375|unsuper_loss: 0.0 +average reward score: 6.05859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.23%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6646|ppo_ep: 1|act_loss: -0.037841796875|cri_loss: -0.01470947265625|unsuper_loss: 0.0 +average reward score: 5.76171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.22%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6647|ppo_ep: 1|act_loss: 0.005802154541015625|cri_loss: 0.0029850006103515625|unsuper_loss: 0.0 +average reward score: 5.6953125 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.88%) |Training time=0.49s (20.83%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6648|ppo_ep: 1|act_loss: 0.0277862548828125|cri_loss: 0.01418304443359375|unsuper_loss: 0.0 +average reward score: 4.96875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.47s (21.53%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.54 +[2023-04-14 12:51:28,951] [INFO] [logging.py:96:log_dist] [Rank 0] step=6650, skipped=84, lr=[9.90295265354164e-07, 9.90295265354164e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:51:28,969] [INFO] [timer.py:199:stop] epoch=0/micro_step=6650/global_step=6650, RunningAvgSamplesPerSec=105.52190506169607, CurrSamplesPerSec=98.76568618000208, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:51:29,062] [INFO] [logging.py:96:log_dist] [Rank 0] step=6650, skipped=112, lr=[5.295781323548535e-07, 5.295781323548535e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6649|ppo_ep: 1|act_loss: -0.032806396484375|cri_loss: -0.01139068603515625|unsuper_loss: 0.0 +average reward score: 5.41015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.01%) |Training time=0.49s (22.40%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6650|ppo_ep: 1|act_loss: -0.01148223876953125|cri_loss: -0.005680084228515625|unsuper_loss: 0.0 +average reward score: 5.47265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.22%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6651|ppo_ep: 1|act_loss: -0.0098876953125|cri_loss: -0.004856109619140625|unsuper_loss: 0.0 +average reward score: 5.765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.47s (21.90%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6652|ppo_ep: 1|act_loss: 0.0227203369140625|cri_loss: 0.0144195556640625|unsuper_loss: 0.0 +average reward score: 4.796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.03%) |Training time=0.48s (22.36%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6653|ppo_ep: 1|act_loss: 0.0352783203125|cri_loss: 0.0180816650390625|unsuper_loss: 0.0 +average reward score: 5.27734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.35%) |Training time=0.48s (22.01%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6654|ppo_ep: 1|act_loss: -0.01111602783203125|cri_loss: -0.004726409912109375|unsuper_loss: 0.0 +average reward score: 5.01171875 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.62s (72.03%) |Training time=0.49s (21.78%) |Others=0.14 (6.19%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6655|ppo_ep: 1|act_loss: 0.003253936767578125|cri_loss: 0.002178192138671875|unsuper_loss: 0.0 +average reward score: 5.5546875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.06%) |Training time=0.48s (22.33%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6656|ppo_ep: 1|act_loss: 0.01036834716796875|cri_loss: 0.00536346435546875|unsuper_loss: 0.0 +average reward score: 4.9453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.04%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6657|ppo_ep: 1|act_loss: -0.001857757568359375|cri_loss: -0.0005521774291992188|unsuper_loss: 0.0 +average reward score: 5.34765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.06%) |Training time=0.48s (22.32%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6658|ppo_ep: 1|act_loss: -0.00725555419921875|cri_loss: -0.003559112548828125|unsuper_loss: 0.0 +average reward score: 5.63671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.20%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.54 +[2023-04-14 12:51:50,678] [INFO] [logging.py:96:log_dist] [Rank 0] step=6660, skipped=84, lr=[9.790493086186587e-07, 9.790493086186587e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:51:50,697] [INFO] [timer.py:199:stop] epoch=0/micro_step=6660/global_step=6660, RunningAvgSamplesPerSec=105.51379557442812, CurrSamplesPerSec=99.45649093894681, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:51:50,790] [INFO] [logging.py:96:log_dist] [Rank 0] step=6660, skipped=112, lr=[5.236689692495916e-07, 5.236689692495916e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6659|ppo_ep: 1|act_loss: -0.0159912109375|cri_loss: -0.00777435302734375|unsuper_loss: 0.0 +average reward score: 5.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.48s (22.38%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6660|ppo_ep: 1|act_loss: -0.00875091552734375|cri_loss: -0.00411224365234375|unsuper_loss: 0.0 +average reward score: 5.234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.33%) |Training time=0.47s (22.01%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6661|ppo_ep: 1|act_loss: -0.009613037109375|cri_loss: -0.0046539306640625|unsuper_loss: 0.0 +average reward score: 5.36328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.29%) |Training time=0.48s (22.08%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6662|ppo_ep: 1|act_loss: -0.020050048828125|cri_loss: -0.00936126708984375|unsuper_loss: 0.0 +average reward score: 5.8046875 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.27%) |Training time=0.54s (24.21%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.42 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6663|ppo_ep: 1|act_loss: -0.0089111328125|cri_loss: -0.00431060791015625|unsuper_loss: 0.0 +average reward score: 4.75 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.76%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6664|ppo_ep: 1|act_loss: -0.006011962890625|cri_loss: -0.00070953369140625|unsuper_loss: 0.0 +average reward score: 5.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.94%) |Training time=0.46s (21.38%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6665|ppo_ep: 1|act_loss: -0.01068115234375|cri_loss: -0.005191802978515625|unsuper_loss: 0.0 +average reward score: 5.3203125 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.04%) |Training time=0.45s (21.27%) |Others=0.10 (4.69%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6666|ppo_ep: 1|act_loss: -0.01541900634765625|cri_loss: -0.0076141357421875|unsuper_loss: 0.0 +average reward score: 5.34375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.34%) |Training time=0.47s (22.00%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54 +[2023-04-14 12:52:07,930] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384 +epoch: 0|step: 6667|ppo_ep: 1|act_loss: 0.03076171875|cri_loss: 0.0161285400390625|unsuper_loss: 0.0 +average reward score: 5.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.71%) |Training time=0.43s (20.58%) |Others=0.10 (4.71%)|CurSamplesPerSec=15.15 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6668|ppo_ep: 1|act_loss: 0.005889892578125|cri_loss: 0.003192901611328125|unsuper_loss: 0.0 +average reward score: 4.875 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.02%) |Training time=0.45s (21.30%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.54 +[2023-04-14 12:52:12,186] [INFO] [logging.py:96:log_dist] [Rank 0] step=6670, skipped=85, lr=[9.689766831817931e-07, 9.689766831817931e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:52:12,205] [INFO] [timer.py:199:stop] epoch=0/micro_step=6670/global_step=6670, RunningAvgSamplesPerSec=105.51352462606164, CurrSamplesPerSec=108.40804827478699, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:52:12,297] [INFO] [logging.py:96:log_dist] [Rank 0] step=6670, skipped=112, lr=[5.177891001378454e-07, 5.177891001378454e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6669|ppo_ep: 1|act_loss: -0.0132293701171875|cri_loss: -0.006320953369140625|unsuper_loss: 0.0 +average reward score: 5.38671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.93%) |Training time=0.46s (21.40%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6670|ppo_ep: 1|act_loss: -0.0029659271240234375|cri_loss: -0.0012969970703125|unsuper_loss: 0.0 +average reward score: 5.30078125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.05%) |Training time=0.45s (21.24%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6671|ppo_ep: 1|act_loss: -0.00601959228515625|cri_loss: -0.00296783447265625|unsuper_loss: 0.0 +average reward score: 5.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.85%) |Training time=0.46s (21.49%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6672|ppo_ep: 1|act_loss: -0.0157928466796875|cri_loss: -0.007663726806640625|unsuper_loss: 0.0 +average reward score: 4.9453125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.19%) |Training time=0.45s (21.10%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6673|ppo_ep: 1|act_loss: 0.002552032470703125|cri_loss: 0.001552581787109375|unsuper_loss: 0.0 +average reward score: 5.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.21%) |Training time=0.45s (21.11%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6674|ppo_ep: 1|act_loss: -0.0108184814453125|cri_loss: -0.005218505859375|unsuper_loss: 0.0 +average reward score: 5.38671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.12%) |Training time=0.45s (21.19%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6675|ppo_ep: 1|act_loss: -0.0014591217041015625|cri_loss: -0.0005102157592773438|unsuper_loss: 0.0 +average reward score: 5.95703125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.32%) |Training time=0.45s (20.86%) |Others=0.10 (4.81%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6676|ppo_ep: 1|act_loss: 0.00400543212890625|cri_loss: 0.00254058837890625|unsuper_loss: 0.0 +average reward score: 5.6875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.12%) |Training time=0.45s (21.22%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6677|ppo_ep: 1|act_loss: 0.00536346435546875|cri_loss: 0.0029087066650390625|unsuper_loss: 0.0 +average reward score: 5.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.21%) |Training time=0.45s (21.09%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6678|ppo_ep: 1|act_loss: 0.0085906982421875|cri_loss: 0.004543304443359375|unsuper_loss: 0.0 +average reward score: 6.25 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.34%) |Training time=0.48s (20.36%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.54 +[2023-04-14 12:52:33,771] [INFO] [logging.py:96:log_dist] [Rank 0] step=6680, skipped=85, lr=[9.578391801772933e-07, 9.578391801772933e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:52:33,790] [INFO] [timer.py:199:stop] epoch=0/micro_step=6680/global_step=6680, RunningAvgSamplesPerSec=105.51922652617614, CurrSamplesPerSec=109.9554075841187, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:52:33,882] [INFO] [logging.py:96:log_dist] [Rank 0] step=6680, skipped=112, lr=[5.119386121734576e-07, 5.119386121734576e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6679|ppo_ep: 1|act_loss: -0.0096435546875|cri_loss: -0.0047149658203125|unsuper_loss: 0.0 +average reward score: 6.37109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.19%) |Training time=0.45s (21.15%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6680|ppo_ep: 1|act_loss: -0.0172576904296875|cri_loss: -0.00783538818359375|unsuper_loss: 0.0 +average reward score: 5.3125 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.22%) |Training time=0.45s (21.08%) |Others=0.10 (4.70%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6681|ppo_ep: 1|act_loss: -0.0127716064453125|cri_loss: -0.006191253662109375|unsuper_loss: 0.0 +average reward score: 5.703125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.88%) |Training time=0.46s (21.44%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6682|ppo_ep: 1|act_loss: -0.00936126708984375|cri_loss: -0.0045166015625|unsuper_loss: 0.0 +average reward score: 4.59375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.16%) |Training time=0.45s (21.14%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6683|ppo_ep: 1|act_loss: -0.0095062255859375|cri_loss: -0.003978729248046875|unsuper_loss: 0.0 +average reward score: 5.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.46s (21.29%) |Others=0.11 (4.89%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6684|ppo_ep: 1|act_loss: 0.025115966796875|cri_loss: 0.012786865234375|unsuper_loss: 0.0 +average reward score: 5.875 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.62s (71.17%) |Training time=0.56s (24.41%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.07 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6685|ppo_ep: 1|act_loss: -0.01519775390625|cri_loss: -0.00749969482421875|unsuper_loss: 0.0 +average reward score: 5.41015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.73%) |Training time=0.45s (21.13%) |Others=0.11 (5.14%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6686|ppo_ep: 1|act_loss: -0.0013332366943359375|cri_loss: -0.0005702972412109375|unsuper_loss: 0.0 +average reward score: 5.21875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.80%) |Training time=0.46s (21.52%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6687|ppo_ep: 1|act_loss: -0.019500732421875|cri_loss: -0.00909423828125|unsuper_loss: 0.0 +average reward score: 5.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.89%) |Training time=0.46s (21.41%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6688|ppo_ep: 1|act_loss: -0.01041412353515625|cri_loss: -0.005092620849609375|unsuper_loss: 0.0 +average reward score: 5.38671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.06%) |Training time=0.45s (21.25%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.54 +[2023-04-14 12:52:55,313] [INFO] [logging.py:96:log_dist] [Rank 0] step=6690, skipped=85, lr=[9.467589978251612e-07, 9.467589978251612e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:52:55,331] [INFO] [timer.py:199:stop] epoch=0/micro_step=6690/global_step=6690, RunningAvgSamplesPerSec=105.5206408271309, CurrSamplesPerSec=109.22053367771504, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:52:55,424] [INFO] [logging.py:96:log_dist] [Rank 0] step=6690, skipped=112, lr=[5.061175920747744e-07, 5.061175920747744e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6689|ppo_ep: 1|act_loss: -0.0106201171875|cri_loss: -0.00505828857421875|unsuper_loss: 0.0 +average reward score: 5.125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.97%) |Training time=0.46s (21.36%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6690|ppo_ep: 1|act_loss: -0.00559234619140625|cri_loss: -0.00270843505859375|unsuper_loss: 0.0 +average reward score: 6.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.88%) |Training time=0.46s (21.42%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6691|ppo_ep: 1|act_loss: -0.0083770751953125|cri_loss: -0.004062652587890625|unsuper_loss: 0.0 +average reward score: 5.73828125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.86%) |Training time=0.46s (21.45%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6692|ppo_ep: 1|act_loss: -0.00913238525390625|cri_loss: -0.0028133392333984375|unsuper_loss: 0.0 +average reward score: 5.828125 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.00%) |Training time=0.45s (21.32%) |Others=0.10 (4.68%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6693|ppo_ep: 1|act_loss: -0.0193328857421875|cri_loss: -0.0092010498046875|unsuper_loss: 0.0 +average reward score: 5.26171875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.62%) |Training time=0.46s (21.62%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6694|ppo_ep: 1|act_loss: -0.01210784912109375|cri_loss: -0.00600433349609375|unsuper_loss: 0.0 +average reward score: 6.19140625 +------------------------------------------------------------------------------------- +|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.59%) |Training time=0.50s (21.05%) |Others=0.10 (4.35%)|CurSamplesPerSec=13.60 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6695|ppo_ep: 1|act_loss: 0.0144500732421875|cri_loss: 0.007442474365234375|unsuper_loss: 0.0 +average reward score: 4.94140625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.01%) |Training time=0.46s (21.31%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6696|ppo_ep: 1|act_loss: 0.005863189697265625|cri_loss: 0.003330230712890625|unsuper_loss: 0.0 +average reward score: 5.046875 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.15%) |Training time=0.45s (21.15%) |Others=0.10 (4.70%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6697|ppo_ep: 1|act_loss: -0.009918212890625|cri_loss: -0.00487518310546875|unsuper_loss: 0.0 +average reward score: 5.25390625 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.06%) |Training time=0.45s (21.25%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6698|ppo_ep: 1|act_loss: 0.02923583984375|cri_loss: 0.01513671875|unsuper_loss: 0.0 +average reward score: 5.6796875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.80%) |Training time=0.46s (21.55%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.54 +[2023-04-14 12:53:16,902] [INFO] [logging.py:96:log_dist] [Rank 0] step=6700, skipped=85, lr=[9.357363003604284e-07, 9.357363003604284e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:53:16,920] [INFO] [timer.py:199:stop] epoch=0/micro_step=6700/global_step=6700, RunningAvgSamplesPerSec=105.52344625762034, CurrSamplesPerSec=109.05564597185241, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:53:17,004] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +[2023-04-14 12:53:17,005] [INFO] [logging.py:96:log_dist] [Rank 0] step=6700, skipped=113, lr=[5.009039403318924e-07, 5.009039403318924e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6699|ppo_ep: 1|act_loss: -0.01666259765625|cri_loss: -0.00447845458984375|unsuper_loss: 0.0 +average reward score: 4.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.36%) |Training time=0.46s (21.45%) |Others=0.09 (4.19%)|CurSamplesPerSec=15.06 |AvgSamplesPerSec=14.54 +[2023-04-14 12:53:19,135] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 6700|ppo_ep: 1|act_loss: 0.007534027099609375|cri_loss: 0.003940582275390625|unsuper_loss: 0.0 +average reward score: 5.96875 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.21%) |Training time=0.46s (21.58%) |Others=0.09 (4.20%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6701|ppo_ep: 1|act_loss: -0.0025157928466796875|cri_loss: -0.0008792877197265625|unsuper_loss: 0.0 +average reward score: 5.5234375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.04%) |Training time=0.46s (21.30%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6702|ppo_ep: 1|act_loss: 0.00467681884765625|cri_loss: 0.002513885498046875|unsuper_loss: 0.0 +average reward score: 5.2734375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.84%) |Training time=0.46s (21.48%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6703|ppo_ep: 1|act_loss: 0.0185394287109375|cri_loss: 0.0096435546875|unsuper_loss: 0.0 +average reward score: 5.203125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.01%) |Training time=0.46s (21.30%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6704|ppo_ep: 1|act_loss: 0.00250244140625|cri_loss: 0.0015621185302734375|unsuper_loss: 0.0 +average reward score: 5.515625 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.34%) |Training time=0.45s (20.96%) |Others=0.10 (4.69%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6705|ppo_ep: 1|act_loss: -0.0096435546875|cri_loss: -0.0043487548828125|unsuper_loss: 0.0 +average reward score: 4.9296875 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.16%) |Training time=0.45s (21.15%) |Others=0.10 (4.68%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6706|ppo_ep: 1|act_loss: 0.00850677490234375|cri_loss: 0.0045928955078125|unsuper_loss: 0.0 +average reward score: 5.43359375 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.45%) |Training time=0.44s (20.85%) |Others=0.10 (4.70%)|CurSamplesPerSec=15.03 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6707|ppo_ep: 1|act_loss: -0.016876220703125|cri_loss: -0.008056640625|unsuper_loss: 0.0 +average reward score: 5.56640625 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.94%) |Training time=0.46s (21.39%) |Others=0.10 (4.67%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6708|ppo_ep: 1|act_loss: -0.020416259765625|cri_loss: -0.0099334716796875|unsuper_loss: 0.0 +average reward score: 5.32421875 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.13%) |Training time=0.52s (23.65%) |Others=0.12 (5.22%)|CurSamplesPerSec=14.41 |AvgSamplesPerSec=14.54 +[2023-04-14 12:53:38,530] [INFO] [logging.py:96:log_dist] [Rank 0] step=6710, skipped=85, lr=[9.247712511660617e-07, 9.247712511660617e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:53:38,547] [INFO] [timer.py:199:stop] epoch=0/micro_step=6710/global_step=6710, RunningAvgSamplesPerSec=105.52877706520603, CurrSamplesPerSec=108.05545040370528, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:53:38,640] [INFO] [logging.py:96:log_dist] [Rank 0] step=6710, skipped=114, lr=[4.957142900498335e-07, 4.957142900498335e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6709|ppo_ep: 1|act_loss: -0.00634765625|cri_loss: -0.0029582977294921875|unsuper_loss: 0.0 +average reward score: 5.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.11%) |Training time=0.46s (19.61%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6710|ppo_ep: 1|act_loss: -0.012725830078125|cri_loss: -0.0059661865234375|unsuper_loss: 0.0 +average reward score: 5.39453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.81%) |Training time=0.44s (20.52%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6711|ppo_ep: 1|act_loss: 0.0023860931396484375|cri_loss: 0.0013742446899414062|unsuper_loss: 0.0 +average reward score: 5.5234375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.72%) |Training time=0.44s (20.61%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6712|ppo_ep: 1|act_loss: 0.0177459716796875|cri_loss: 0.00939178466796875|unsuper_loss: 0.0 +average reward score: 5.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (20.88%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6713|ppo_ep: 1|act_loss: 0.0116729736328125|cri_loss: 0.006237030029296875|unsuper_loss: 0.0 +average reward score: 5.03515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.73%) |Training time=0.44s (20.54%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6714|ppo_ep: 1|act_loss: 0.0052642822265625|cri_loss: 0.0027484893798828125|unsuper_loss: 0.0 +average reward score: 5.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.03%) |Training time=0.46s (20.56%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.15 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6715|ppo_ep: 1|act_loss: -0.00588226318359375|cri_loss: -0.002285003662109375|unsuper_loss: 0.0 +average reward score: 5.81640625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.57%) |Training time=0.44s (20.76%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6716|ppo_ep: 1|act_loss: 0.007537841796875|cri_loss: 0.00385284423828125|unsuper_loss: 0.0 +average reward score: 6.08984375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.76%) |Training time=0.44s (20.56%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6717|ppo_ep: 1|act_loss: 0.00479888916015625|cri_loss: 0.00278472900390625|unsuper_loss: 0.0 +average reward score: 5.68359375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.42%) |Training time=0.45s (20.93%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6718|ppo_ep: 1|act_loss: 0.0034942626953125|cri_loss: 0.0019989013671875|unsuper_loss: 0.0 +average reward score: 5.24609375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.10%) |Training time=0.45s (21.22%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.54 +[2023-04-14 12:54:00,080] [INFO] [logging.py:96:log_dist] [Rank 0] step=6720, skipped=85, lr=[9.138640127705436e-07, 9.138640127705436e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:54:00,098] [INFO] [timer.py:199:stop] epoch=0/micro_step=6720/global_step=6720, RunningAvgSamplesPerSec=105.53805823334427, CurrSamplesPerSec=109.25004944906014, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:54:00,190] [INFO] [logging.py:96:log_dist] [Rank 0] step=6720, skipped=114, lr=[4.899762375939118e-07, 4.899762375939118e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6719|ppo_ep: 1|act_loss: -0.02252197265625|cri_loss: -0.01092529296875|unsuper_loss: 0.0 +average reward score: 5.78515625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.03%) |Training time=0.46s (21.31%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6720|ppo_ep: 1|act_loss: 0.0220947265625|cri_loss: 0.01131439208984375|unsuper_loss: 0.0 +average reward score: 6.6484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.86%) |Training time=0.46s (21.47%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6721|ppo_ep: 1|act_loss: -0.0132293701171875|cri_loss: -0.00577545166015625|unsuper_loss: 0.0 +average reward score: 5.41796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.69%) |Training time=0.46s (21.65%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6722|ppo_ep: 1|act_loss: -0.018951416015625|cri_loss: -0.00917816162109375|unsuper_loss: 0.0 +average reward score: 4.51171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.37%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6723|ppo_ep: 1|act_loss: -0.013641357421875|cri_loss: -0.006664276123046875|unsuper_loss: 0.0 +average reward score: 5.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.92%) |Training time=0.46s (21.42%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6724|ppo_ep: 1|act_loss: -0.0198211669921875|cri_loss: -0.009765625|unsuper_loss: 0.0 +average reward score: 5.37890625 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.04%) |Training time=0.48s (20.65%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6725|ppo_ep: 1|act_loss: 0.004795074462890625|cri_loss: 0.0027618408203125|unsuper_loss: 0.0 +average reward score: 5.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.46s (21.40%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6726|ppo_ep: 1|act_loss: 0.00786590576171875|cri_loss: 0.004146575927734375|unsuper_loss: 0.0 +average reward score: 4.609375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.02%) |Training time=0.46s (21.30%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6727|ppo_ep: 1|act_loss: -0.0087127685546875|cri_loss: -0.00384521484375|unsuper_loss: 0.0 +average reward score: 5.765625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.69%) |Training time=0.46s (21.66%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6728|ppo_ep: 1|act_loss: 0.0158233642578125|cri_loss: 0.00830841064453125|unsuper_loss: 0.0 +average reward score: 5.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.98%) |Training time=0.46s (21.35%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.54 +[2023-04-14 12:54:21,710] [INFO] [logging.py:96:log_dist] [Rank 0] step=6730, skipped=85, lr=[9.03014746845457e-07, 9.03014746845457e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:54:21,729] [INFO] [timer.py:199:stop] epoch=0/micro_step=6730/global_step=6730, RunningAvgSamplesPerSec=105.53983507346179, CurrSamplesPerSec=108.58169774427452, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:54:21,821] [INFO] [logging.py:96:log_dist] [Rank 0] step=6730, skipped=114, lr=[4.842679785390672e-07, 4.842679785390672e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6729|ppo_ep: 1|act_loss: 0.0019283294677734375|cri_loss: 0.0010194778442382812|unsuper_loss: 0.0 +average reward score: 5.609375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.93%) |Training time=0.46s (21.40%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6730|ppo_ep: 1|act_loss: -0.023345947265625|cri_loss: -0.01141357421875|unsuper_loss: 0.0 +average reward score: 5.7265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.99%) |Training time=0.46s (21.32%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6731|ppo_ep: 1|act_loss: 0.00859832763671875|cri_loss: 0.00452423095703125|unsuper_loss: 0.0 +average reward score: 5.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.80%) |Training time=0.46s (21.52%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6732|ppo_ep: 1|act_loss: -0.002971649169921875|cri_loss: -0.0011320114135742188|unsuper_loss: 0.0 +average reward score: 5.890625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.45s (21.19%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6733|ppo_ep: 1|act_loss: -0.011474609375|cri_loss: -0.00536346435546875|unsuper_loss: 0.0 +average reward score: 5.203125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.81%) |Training time=0.46s (21.52%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6734|ppo_ep: 1|act_loss: -0.00032138824462890625|cri_loss: -0.00010442733764648438|unsuper_loss: 0.0 +average reward score: 5.109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.62%) |Training time=0.47s (21.72%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6735|ppo_ep: 1|act_loss: -0.010223388671875|cri_loss: -0.004852294921875|unsuper_loss: 0.0 +average reward score: 4.62890625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.12%) |Training time=0.45s (21.21%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6736|ppo_ep: 1|act_loss: 0.0158538818359375|cri_loss: 0.0081787109375|unsuper_loss: 0.0 +average reward score: 4.828125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.90%) |Training time=0.46s (21.44%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6737|ppo_ep: 1|act_loss: -0.01104736328125|cri_loss: -0.005374908447265625|unsuper_loss: 0.0 +average reward score: 5.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.89%) |Training time=0.46s (21.44%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6738|ppo_ep: 1|act_loss: 0.015716552734375|cri_loss: 0.008941650390625|unsuper_loss: 0.0 +average reward score: 5.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.89%) |Training time=0.46s (21.44%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.54 +[2023-04-14 12:54:43,139] [INFO] [logging.py:96:log_dist] [Rank 0] step=6740, skipped=85, lr=[8.922236142030963e-07, 8.922236142030963e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:54:43,165] [INFO] [timer.py:199:stop] epoch=0/micro_step=6740/global_step=6740, RunningAvgSamplesPerSec=105.54288850018418, CurrSamplesPerSec=105.9612526723656, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:54:43,311] [INFO] [logging.py:96:log_dist] [Rank 0] step=6740, skipped=114, lr=[4.785895974954696e-07, 4.785895974954696e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6739|ppo_ep: 1|act_loss: 0.042236328125|cri_loss: 0.021759033203125|unsuper_loss: 0.0 +average reward score: 5.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.74%) |Training time=0.47s (21.34%) |Others=0.15 (6.92%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6740|ppo_ep: 1|act_loss: 0.003856658935546875|cri_loss: 0.0022125244140625|unsuper_loss: 0.0 +average reward score: 4.85546875 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.02%) |Training time=0.47s (20.50%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.08 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6741|ppo_ep: 1|act_loss: -0.006992340087890625|cri_loss: -0.0033550262451171875|unsuper_loss: 0.0 +average reward score: 5.44140625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.82%) |Training time=0.47s (21.49%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6742|ppo_ep: 1|act_loss: 0.002826690673828125|cri_loss: 0.0014934539794921875|unsuper_loss: 0.0 +average reward score: 5.0078125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.33%) |Training time=0.43s (19.96%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6743|ppo_ep: 1|act_loss: 0.0209503173828125|cri_loss: 0.01145172119140625|unsuper_loss: 0.0 +average reward score: 5.35546875 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.67s (75.40%) |Training time=0.44s (19.95%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.46 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6744|ppo_ep: 1|act_loss: -0.01523590087890625|cri_loss: -0.00753021240234375|unsuper_loss: 0.0 +average reward score: 6.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.75s (76.03%) |Training time=0.45s (19.51%) |Others=0.10 (4.46%)|CurSamplesPerSec=13.92 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6745|ppo_ep: 1|act_loss: 0.063232421875|cri_loss: 0.03399658203125|unsuper_loss: 0.0 +average reward score: 5.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.24%) |Training time=0.44s (20.04%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6746|ppo_ep: 1|act_loss: 0.0103607177734375|cri_loss: 0.0072021484375|unsuper_loss: 0.0 +average reward score: 5.68359375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.31%) |Training time=0.44s (19.99%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6747|ppo_ep: 1|act_loss: -0.0131072998046875|cri_loss: -0.006072998046875|unsuper_loss: 0.0 +average reward score: 5.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.35%) |Training time=0.48s (21.81%) |Others=0.11 (4.83%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6748|ppo_ep: 1|act_loss: 0.0301055908203125|cri_loss: 0.01544952392578125|unsuper_loss: 0.0 +average reward score: 5.671875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.14%) |Training time=0.48s (22.24%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.54 +[2023-04-14 12:55:05,236] [INFO] [logging.py:96:log_dist] [Rank 0] step=6750, skipped=85, lr=[8.81490774794079e-07, 8.81490774794079e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:55:05,255] [INFO] [timer.py:199:stop] epoch=0/micro_step=6750/global_step=6750, RunningAvgSamplesPerSec=105.54850591402027, CurrSamplesPerSec=105.97112470885476, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:55:05,348] [INFO] [logging.py:96:log_dist] [Rank 0] step=6750, skipped=114, lr=[4.729411786304247e-07, 4.729411786304247e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6749|ppo_ep: 1|act_loss: -0.0013751983642578125|cri_loss: -0.00048041343688964844|unsuper_loss: 0.0 +average reward score: 5.23828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.91%) |Training time=0.47s (21.48%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6750|ppo_ep: 1|act_loss: 0.0038166046142578125|cri_loss: 0.002071380615234375|unsuper_loss: 0.0 +average reward score: 5.60546875 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.95%) |Training time=0.47s (21.46%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6751|ppo_ep: 1|act_loss: -0.01177978515625|cri_loss: -0.005767822265625|unsuper_loss: 0.0 +average reward score: 5.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.35%) |Training time=0.46s (21.04%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6752|ppo_ep: 1|act_loss: 0.0181884765625|cri_loss: 0.009521484375|unsuper_loss: 0.0 +average reward score: 5.59765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.21%) |Training time=0.46s (21.15%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6753|ppo_ep: 1|act_loss: -0.011199951171875|cri_loss: -0.005107879638671875|unsuper_loss: 0.0 +average reward score: 5.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.06%) |Training time=0.47s (21.32%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6754|ppo_ep: 1|act_loss: -0.00250244140625|cri_loss: -0.0010585784912109375|unsuper_loss: 0.0 +average reward score: 5.14453125 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.30%) |Training time=0.45s (19.38%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6755|ppo_ep: 1|act_loss: -0.004077911376953125|cri_loss: -0.0019178390502929688|unsuper_loss: 0.0 +average reward score: 5.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.38%) |Training time=0.43s (19.83%) |Others=0.10 (4.79%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6756|ppo_ep: 1|act_loss: -0.00409698486328125|cri_loss: -0.0018758773803710938|unsuper_loss: 0.0 +average reward score: 4.796875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.28%) |Training time=0.46s (21.02%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6757|ppo_ep: 1|act_loss: 0.0287628173828125|cri_loss: 0.0147857666015625|unsuper_loss: 0.0 +average reward score: 4.98046875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.43%) |Training time=0.46s (21.01%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6758|ppo_ep: 1|act_loss: 0.03680419921875|cri_loss: 0.018829345703125|unsuper_loss: 0.0 +average reward score: 5.3515625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.02%) |Training time=0.47s (21.39%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.54 +[2023-04-14 12:55:27,208] [INFO] [logging.py:96:log_dist] [Rank 0] step=6760, skipped=85, lr=[8.708163877049794e-07, 8.708163877049794e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:55:27,226] [INFO] [timer.py:199:stop] epoch=0/micro_step=6760/global_step=6760, RunningAvgSamplesPerSec=105.55200119918464, CurrSamplesPerSec=105.85219716823623, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:55:27,322] [INFO] [logging.py:96:log_dist] [Rank 0] step=6760, skipped=114, lr=[4.673228056671253e-07, 4.673228056671253e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6759|ppo_ep: 1|act_loss: 0.0150146484375|cri_loss: 0.00763702392578125|unsuper_loss: 0.0 +average reward score: 5.51171875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.88%) |Training time=0.47s (21.37%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6760|ppo_ep: 1|act_loss: -0.008392333984375|cri_loss: -0.00408172607421875|unsuper_loss: 0.0 +average reward score: 5.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.84%) |Training time=0.47s (21.49%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6761|ppo_ep: 1|act_loss: 0.03912353515625|cri_loss: 0.020965576171875|unsuper_loss: 0.0 +average reward score: 5.9609375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.82%) |Training time=0.47s (21.58%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6762|ppo_ep: 1|act_loss: -0.00550079345703125|cri_loss: -0.0025272369384765625|unsuper_loss: 0.0 +average reward score: 5.44921875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.95%) |Training time=0.47s (21.45%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6763|ppo_ep: 1|act_loss: -0.0089874267578125|cri_loss: -0.00429534912109375|unsuper_loss: 0.0 +average reward score: 4.921875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.84%) |Training time=0.47s (21.58%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6764|ppo_ep: 1|act_loss: 0.06707763671875|cri_loss: 0.035980224609375|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.72s (74.51%) |Training time=0.48s (20.82%) |Others=0.11 (4.67%)|CurSamplesPerSec=13.86 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6765|ppo_ep: 1|act_loss: 0.0087127685546875|cri_loss: 0.004486083984375|unsuper_loss: 0.0 +average reward score: 5.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.60%) |Training time=0.48s (21.82%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6766|ppo_ep: 1|act_loss: 0.017425537109375|cri_loss: 0.00891876220703125|unsuper_loss: 0.0 +average reward score: 5.7421875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.62%) |Training time=0.48s (21.79%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6767|ppo_ep: 1|act_loss: -0.010345458984375|cri_loss: -0.004726409912109375|unsuper_loss: 0.0 +average reward score: 5.23046875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.66%) |Training time=0.47s (21.75%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6768|ppo_ep: 1|act_loss: -0.04034423828125|cri_loss: -0.01947021484375|unsuper_loss: 0.0 +average reward score: 4.66015625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.60%) |Training time=0.48s (21.80%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.54 +[2023-04-14 12:55:49,293] [INFO] [logging.py:96:log_dist] [Rank 0] step=6770, skipped=85, lr=[8.602006111559606e-07, 8.602006111559606e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:55:49,311] [INFO] [timer.py:199:stop] epoch=0/micro_step=6770/global_step=6770, RunningAvgSamplesPerSec=105.54869605550402, CurrSamplesPerSec=104.80069649682399, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:55:49,407] [INFO] [logging.py:96:log_dist] [Rank 0] step=6770, skipped=114, lr=[4.6173456188341234e-07, 4.6173456188341234e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6769|ppo_ep: 1|act_loss: -0.00492095947265625|cri_loss: -0.002368927001953125|unsuper_loss: 0.0 +average reward score: 5.3515625 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.57%) |Training time=0.47s (20.02%) |Others=0.10 (4.42%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6770|ppo_ep: 1|act_loss: -0.00428009033203125|cri_loss: -0.0020275115966796875|unsuper_loss: 0.0 +average reward score: 5.9296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.16%) |Training time=0.46s (21.19%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6771|ppo_ep: 1|act_loss: -0.02099609375|cri_loss: -0.010284423828125|unsuper_loss: 0.0 +average reward score: 5.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.69%) |Training time=0.47s (21.69%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6772|ppo_ep: 1|act_loss: 0.03363037109375|cri_loss: 0.0172271728515625|unsuper_loss: 0.0 +average reward score: 5.56640625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.39%) |Training time=0.46s (20.97%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6773|ppo_ep: 1|act_loss: -0.00351715087890625|cri_loss: -0.0007381439208984375|unsuper_loss: 0.0 +average reward score: 5.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.19%) |Training time=0.46s (20.40%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.07 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6774|ppo_ep: 1|act_loss: -0.0094451904296875|cri_loss: -0.00452423095703125|unsuper_loss: 0.0 +average reward score: 5.49609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.07%) |Training time=0.46s (21.24%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6775|ppo_ep: 1|act_loss: 0.01076507568359375|cri_loss: 0.00554656982421875|unsuper_loss: 0.0 +average reward score: 5.859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.46s (21.18%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6776|ppo_ep: 1|act_loss: -0.003173828125|cri_loss: -0.001331329345703125|unsuper_loss: 0.0 +average reward score: 5.125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.03%) |Training time=0.46s (21.34%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6777|ppo_ep: 1|act_loss: -0.00521087646484375|cri_loss: -0.0022106170654296875|unsuper_loss: 0.0 +average reward score: 5.23046875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.36%) |Training time=0.48s (22.05%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6778|ppo_ep: 1|act_loss: 0.01116943359375|cri_loss: 0.005718231201171875|unsuper_loss: 0.0 +average reward score: 5.45703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.12%) |Training time=0.46s (21.25%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.54 +[2023-04-14 12:56:11,135] [INFO] [logging.py:96:log_dist] [Rank 0] step=6780, skipped=85, lr=[8.496436024984428e-07, 8.496436024984428e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:56:11,153] [INFO] [timer.py:199:stop] epoch=0/micro_step=6780/global_step=6780, RunningAvgSamplesPerSec=105.54960072512817, CurrSamplesPerSec=107.5528061003715, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:56:11,246] [INFO] [logging.py:96:log_dist] [Rank 0] step=6780, skipped=114, lr=[4.5617653011053977e-07, 4.5617653011053977e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6779|ppo_ep: 1|act_loss: 0.0100555419921875|cri_loss: 0.00524139404296875|unsuper_loss: 0.0 +average reward score: 5.98828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.28%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6780|ppo_ep: 1|act_loss: -0.01525115966796875|cri_loss: -0.007476806640625|unsuper_loss: 0.0 +average reward score: 5.8046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.89%) |Training time=0.47s (21.48%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6781|ppo_ep: 1|act_loss: 0.020843505859375|cri_loss: 0.01080322265625|unsuper_loss: 0.0 +average reward score: 4.92578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.01%) |Training time=0.46s (21.36%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6782|ppo_ep: 1|act_loss: -0.04522705078125|cri_loss: -0.022186279296875|unsuper_loss: 0.0 +average reward score: 5.19921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.03%) |Training time=0.46s (21.36%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6783|ppo_ep: 1|act_loss: -0.006313323974609375|cri_loss: -0.0029087066650390625|unsuper_loss: 0.0 +average reward score: 5.66015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.80%) |Training time=0.47s (21.59%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6784|ppo_ep: 1|act_loss: -0.0184173583984375|cri_loss: -0.00901031494140625|unsuper_loss: 0.0 +average reward score: 4.83203125 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.53%) |Training time=0.54s (23.98%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.30 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6785|ppo_ep: 1|act_loss: -0.0084991455078125|cri_loss: -0.004001617431640625|unsuper_loss: 0.0 +average reward score: 5.36328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.09%) |Training time=0.46s (21.29%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6786|ppo_ep: 1|act_loss: -0.0009622573852539062|cri_loss: -0.00033354759216308594|unsuper_loss: 0.0 +average reward score: 5.6640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.11%) |Training time=0.46s (21.28%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6787|ppo_ep: 1|act_loss: -0.005367279052734375|cri_loss: -0.002521514892578125|unsuper_loss: 0.0 +average reward score: 5.72265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.90%) |Training time=0.47s (21.47%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6788|ppo_ep: 1|act_loss: -0.00386810302734375|cri_loss: -0.001888275146484375|unsuper_loss: 0.0 +average reward score: 5.3828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.92%) |Training time=0.46s (21.44%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.54 +[2023-04-14 12:56:32,879] [INFO] [logging.py:96:log_dist] [Rank 0] step=6790, skipped=85, lr=[8.391455182127607e-07, 8.391455182127607e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:56:32,898] [INFO] [timer.py:199:stop] epoch=0/micro_step=6790/global_step=6790, RunningAvgSamplesPerSec=105.54857659001864, CurrSamplesPerSec=106.04438580204238, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:56:32,990] [INFO] [logging.py:96:log_dist] [Rank 0] step=6790, skipped=114, lr=[4.506487927319475e-07, 4.506487927319475e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6789|ppo_ep: 1|act_loss: 0.0081634521484375|cri_loss: 0.00420379638671875|unsuper_loss: 0.0 +average reward score: 5.7109375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.43%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6790|ppo_ep: 1|act_loss: -0.0150146484375|cri_loss: -0.006427764892578125|unsuper_loss: 0.0 +average reward score: 5.890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.60%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6791|ppo_ep: 1|act_loss: -0.012847900390625|cri_loss: -0.006328582763671875|unsuper_loss: 0.0 +average reward score: 6.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.05%) |Training time=0.46s (21.34%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6792|ppo_ep: 1|act_loss: 0.02069091796875|cri_loss: 0.01058197021484375|unsuper_loss: 0.0 +average reward score: 4.94140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.95%) |Training time=0.46s (21.38%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6793|ppo_ep: 1|act_loss: 0.00525665283203125|cri_loss: 0.003208160400390625|unsuper_loss: 0.0 +average reward score: 5.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.58%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6794|ppo_ep: 1|act_loss: 0.0136566162109375|cri_loss: 0.0073394775390625|unsuper_loss: 0.0 +average reward score: 5.03125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.46s (21.39%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6795|ppo_ep: 1|act_loss: -0.0012187957763671875|cri_loss: -0.00019693374633789062|unsuper_loss: 0.0 +average reward score: 6.234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.65%) |Training time=0.47s (21.62%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6796|ppo_ep: 1|act_loss: -0.006439208984375|cri_loss: -0.003108978271484375|unsuper_loss: 0.0 +average reward score: 5.4296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.02%) |Training time=0.46s (21.38%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6797|ppo_ep: 1|act_loss: 0.0185546875|cri_loss: 0.0095672607421875|unsuper_loss: 0.0 +average reward score: 5.5703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.83%) |Training time=0.44s (20.52%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6798|ppo_ep: 1|act_loss: -0.00604248046875|cri_loss: -0.0026149749755859375|unsuper_loss: 0.0 +average reward score: 4.9296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.13%) |Training time=0.46s (21.24%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.54 +[2023-04-14 12:56:54,707] [INFO] [logging.py:96:log_dist] [Rank 0] step=6800, skipped=85, lr=[8.2870651390585e-07, 8.2870651390585e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:56:54,726] [INFO] [timer.py:199:stop] epoch=0/micro_step=6800/global_step=6800, RunningAvgSamplesPerSec=105.55108170847568, CurrSamplesPerSec=114.21317600887035, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:56:54,818] [INFO] [logging.py:96:log_dist] [Rank 0] step=6800, skipped=114, lr=[4.4515143168203685e-07, 4.4515143168203685e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6799|ppo_ep: 1|act_loss: -0.0230712890625|cri_loss: -0.0112762451171875|unsuper_loss: 0.0 +average reward score: 5.48046875 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.55%) |Training time=0.44s (19.14%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6800|ppo_ep: 1|act_loss: 0.00914764404296875|cri_loss: 0.0051727294921875|unsuper_loss: 0.0 +average reward score: 5.55078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.80%) |Training time=0.44s (20.53%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.54 +[2023-04-14 12:56:59,114] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 6801|ppo_ep: 1|act_loss: -0.00785064697265625|cri_loss: -0.0037937164306640625|unsuper_loss: 0.0 +average reward score: 5.546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.44%) |Training time=0.44s (20.36%) |Others=0.09 (4.20%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.54 +[2023-04-14 12:57:01,326] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 6802|ppo_ep: 1|act_loss: -0.00553131103515625|cri_loss: -0.002628326416015625|unsuper_loss: 0.0 +average reward score: 5.078125 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.79%) |Training time=0.46s (20.84%) |Others=0.12 (5.38%)|CurSamplesPerSec=14.43 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6803|ppo_ep: 1|act_loss: -0.00485992431640625|cri_loss: -0.0023365020751953125|unsuper_loss: 0.0 +average reward score: 5.484375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.59%) |Training time=0.45s (20.76%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6804|ppo_ep: 1|act_loss: 0.05767822265625|cri_loss: 0.029541015625|unsuper_loss: 0.0 +average reward score: 5.078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.40%) |Training time=0.45s (20.95%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6805|ppo_ep: 1|act_loss: -0.0240478515625|cri_loss: -0.01161956787109375|unsuper_loss: 0.0 +average reward score: 5.125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.54%) |Training time=0.45s (20.84%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6806|ppo_ep: 1|act_loss: -0.010101318359375|cri_loss: -0.004650115966796875|unsuper_loss: 0.0 +average reward score: 6.125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (20.97%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6807|ppo_ep: 1|act_loss: 0.0011091232299804688|cri_loss: 0.0006418228149414062|unsuper_loss: 0.0 +average reward score: 5.83984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (20.94%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6808|ppo_ep: 1|act_loss: 0.019500732421875|cri_loss: 0.01010894775390625|unsuper_loss: 0.0 +average reward score: 4.984375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.85%) |Training time=0.47s (21.53%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.54 +[2023-04-14 12:57:16,401] [INFO] [logging.py:96:log_dist] [Rank 0] step=6810, skipped=85, lr=[8.183267443089356e-07, 8.183267443089356e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:57:16,420] [INFO] [timer.py:199:stop] epoch=0/micro_step=6810/global_step=6810, RunningAvgSamplesPerSec=105.55701946349674, CurrSamplesPerSec=104.74279208024329, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:57:16,513] [INFO] [logging.py:96:log_dist] [Rank 0] step=6810, skipped=116, lr=[4.4077546857126923e-07, 4.4077546857126923e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6809|ppo_ep: 1|act_loss: -0.0061492919921875|cri_loss: -0.002872467041015625|unsuper_loss: 0.0 +average reward score: 5.6875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.83%) |Training time=0.47s (21.57%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6810|ppo_ep: 1|act_loss: 0.0145263671875|cri_loss: 0.00749969482421875|unsuper_loss: 0.0 +average reward score: 6.078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.02%) |Training time=0.46s (21.36%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6811|ppo_ep: 1|act_loss: 0.005298614501953125|cri_loss: 0.0028667449951171875|unsuper_loss: 0.0 +average reward score: 5.1015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.47%) |Training time=0.45s (20.87%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6812|ppo_ep: 1|act_loss: 0.030303955078125|cri_loss: 0.0157012939453125|unsuper_loss: 0.0 +average reward score: 5.83203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.30%) |Training time=0.45s (21.07%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6813|ppo_ep: 1|act_loss: 0.031890869140625|cri_loss: 0.0163726806640625|unsuper_loss: 0.0 +average reward score: 5.7109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.37%) |Training time=0.45s (20.98%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6814|ppo_ep: 1|act_loss: 0.00494384765625|cri_loss: 0.002788543701171875|unsuper_loss: 0.0 +average reward score: 5.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.78%) |Training time=0.48s (21.54%) |Others=0.13 (5.69%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6815|ppo_ep: 1|act_loss: -0.010528564453125|cri_loss: -0.004947662353515625|unsuper_loss: 0.0 +average reward score: 5.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.23%) |Training time=0.46s (21.11%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6816|ppo_ep: 1|act_loss: -0.0227203369140625|cri_loss: -0.01078033447265625|unsuper_loss: 0.0 +average reward score: 6.0078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.52%) |Training time=0.45s (20.84%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6817|ppo_ep: 1|act_loss: -0.007720947265625|cri_loss: -0.0037822723388671875|unsuper_loss: 0.0 +average reward score: 4.859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.42%) |Training time=0.45s (20.92%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6818|ppo_ep: 1|act_loss: 0.015472412109375|cri_loss: 0.00791168212890625|unsuper_loss: 0.0 +average reward score: 4.86328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.39%) |Training time=0.45s (20.92%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.54 +[2023-04-14 12:57:38,029] [INFO] [logging.py:96:log_dist] [Rank 0] step=6820, skipped=85, lr=[8.080063632752432e-07, 8.080063632752432e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:57:38,048] [INFO] [timer.py:199:stop] epoch=0/micro_step=6820/global_step=6820, RunningAvgSamplesPerSec=105.56231609549081, CurrSamplesPerSec=111.32460272236474, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:57:38,141] [INFO] [logging.py:96:log_dist] [Rank 0] step=6820, skipped=116, lr=[4.3533298994971135e-07, 4.3533298994971135e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6819|ppo_ep: 1|act_loss: 0.01313018798828125|cri_loss: 0.00687408447265625|unsuper_loss: 0.0 +average reward score: 5.67578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.90%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6820|ppo_ep: 1|act_loss: 0.002880096435546875|cri_loss: 0.0019025802612304688|unsuper_loss: 0.0 +average reward score: 4.97265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.41%) |Training time=0.45s (20.93%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6821|ppo_ep: 1|act_loss: 0.01995849609375|cri_loss: 0.01031494140625|unsuper_loss: 0.0 +average reward score: 5.06640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.46%) |Training time=0.45s (20.88%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6822|ppo_ep: 1|act_loss: 0.004322052001953125|cri_loss: 0.0023365020751953125|unsuper_loss: 0.0 +average reward score: 5.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.25%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6823|ppo_ep: 1|act_loss: 0.0030231475830078125|cri_loss: 0.001598358154296875|unsuper_loss: 0.0 +average reward score: 5.04296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.50%) |Training time=0.45s (20.84%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6824|ppo_ep: 1|act_loss: 0.0154876708984375|cri_loss: 0.00792694091796875|unsuper_loss: 0.0 +average reward score: 5.23046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.49%) |Training time=0.45s (20.85%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6825|ppo_ep: 1|act_loss: 0.0019969940185546875|cri_loss: 0.0011749267578125|unsuper_loss: 0.0 +average reward score: 5.6796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.33%) |Training time=0.45s (21.05%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6826|ppo_ep: 1|act_loss: 0.00228118896484375|cri_loss: 0.0012102127075195312|unsuper_loss: 0.0 +average reward score: 5.29296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.59%) |Training time=0.45s (20.75%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6827|ppo_ep: 1|act_loss: 0.0092926025390625|cri_loss: 0.004863739013671875|unsuper_loss: 0.0 +average reward score: 4.99609375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.68%) |Training time=0.45s (20.67%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6828|ppo_ep: 1|act_loss: 0.01129150390625|cri_loss: 0.005802154541015625|unsuper_loss: 0.0 +average reward score: 4.984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.64%) |Training time=0.45s (20.71%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.54 +[2023-04-14 12:57:59,584] [INFO] [logging.py:96:log_dist] [Rank 0] step=6830, skipped=85, lr=[7.977455237777167e-07, 7.977455237777167e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:57:59,603] [INFO] [timer.py:199:stop] epoch=0/micro_step=6830/global_step=6830, RunningAvgSamplesPerSec=105.57015739655915, CurrSamplesPerSec=110.71612653627207, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:57:59,695] [INFO] [logging.py:96:log_dist] [Rank 0] step=6830, skipped=116, lr=[4.2992111467398014e-07, 4.2992111467398014e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6829|ppo_ep: 1|act_loss: 0.03900146484375|cri_loss: 0.021759033203125|unsuper_loss: 0.0 +average reward score: 5.11328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.47%) |Training time=0.45s (20.90%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6830|ppo_ep: 1|act_loss: -0.0240936279296875|cri_loss: -0.011871337890625|unsuper_loss: 0.0 +average reward score: 5.34765625 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.36%) |Training time=0.45s (19.30%) |Others=0.10 (4.34%)|CurSamplesPerSec=13.78 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6831|ppo_ep: 1|act_loss: -0.05914306640625|cri_loss: -0.0196075439453125|unsuper_loss: 0.0 +average reward score: 4.9765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.60%) |Training time=0.45s (20.71%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6832|ppo_ep: 1|act_loss: -0.03326416015625|cri_loss: -0.0150299072265625|unsuper_loss: 0.0 +average reward score: 5.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.63%) |Training time=0.45s (19.94%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.15 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6833|ppo_ep: 1|act_loss: 0.0269775390625|cri_loss: 0.0163726806640625|unsuper_loss: 0.0 +average reward score: 5.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.94%) |Training time=0.44s (20.43%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6834|ppo_ep: 1|act_loss: 0.03277587890625|cri_loss: 0.0167388916015625|unsuper_loss: 0.0 +average reward score: 5.078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.54%) |Training time=0.45s (20.83%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6835|ppo_ep: 1|act_loss: -0.0255889892578125|cri_loss: -0.01232147216796875|unsuper_loss: 0.0 +average reward score: 4.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.35%) |Training time=0.45s (21.01%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6836|ppo_ep: 1|act_loss: -0.007732391357421875|cri_loss: -0.003696441650390625|unsuper_loss: 0.0 +average reward score: 5.96875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.83%) |Training time=0.44s (20.51%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6837|ppo_ep: 1|act_loss: -0.0099334716796875|cri_loss: -0.004741668701171875|unsuper_loss: 0.0 +average reward score: 5.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.43%) |Training time=0.45s (20.93%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6838|ppo_ep: 1|act_loss: 0.00641632080078125|cri_loss: 0.0033054351806640625|unsuper_loss: 0.0 +average reward score: 5.671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.45%) |Training time=0.45s (20.91%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.54 +[2023-04-14 12:58:21,453] [INFO] [logging.py:96:log_dist] [Rank 0] step=6840, skipped=85, lr=[7.875443779067518e-07, 7.875443779067518e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:58:21,472] [INFO] [timer.py:199:stop] epoch=0/micro_step=6840/global_step=6840, RunningAvgSamplesPerSec=105.57848061468555, CurrSamplesPerSec=109.96874088494299, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:58:21,564] [INFO] [logging.py:96:log_dist] [Rank 0] step=6840, skipped=116, lr=[4.2453992296112384e-07, 4.2453992296112384e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6839|ppo_ep: 1|act_loss: 0.01873779296875|cri_loss: 0.00957489013671875|unsuper_loss: 0.0 +average reward score: 4.9765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.38%) |Training time=0.45s (20.99%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6840|ppo_ep: 1|act_loss: 0.0223388671875|cri_loss: 0.01148223876953125|unsuper_loss: 0.0 +average reward score: 5.9140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.43%) |Training time=0.45s (20.92%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6841|ppo_ep: 1|act_loss: 0.00296783447265625|cri_loss: 0.0015993118286132812|unsuper_loss: 0.0 +average reward score: 5.5 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.84%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6842|ppo_ep: 1|act_loss: -0.007354736328125|cri_loss: -0.003299713134765625|unsuper_loss: 0.0 +average reward score: 5.71484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.32%) |Training time=0.45s (21.04%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6843|ppo_ep: 1|act_loss: -0.006191253662109375|cri_loss: -0.0029296875|unsuper_loss: 0.0 +average reward score: 5.26171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.52%) |Training time=0.45s (20.82%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6844|ppo_ep: 1|act_loss: -0.024139404296875|cri_loss: -0.01123809814453125|unsuper_loss: 0.0 +average reward score: 4.97265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.42%) |Training time=0.45s (20.94%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6845|ppo_ep: 1|act_loss: -0.0137786865234375|cri_loss: -0.006679534912109375|unsuper_loss: 0.0 +average reward score: 6.1796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.55%) |Training time=0.45s (20.79%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6846|ppo_ep: 1|act_loss: 0.00154876708984375|cri_loss: 0.0009756088256835938|unsuper_loss: 0.0 +average reward score: 5.3359375 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.22%) |Training time=0.45s (19.35%) |Others=0.10 (4.42%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6847|ppo_ep: 1|act_loss: 0.0138397216796875|cri_loss: 0.00823974609375|unsuper_loss: 0.0 +average reward score: 5.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.40%) |Training time=0.51s (23.10%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6848|ppo_ep: 1|act_loss: 0.0116729736328125|cri_loss: 0.006023406982421875|unsuper_loss: 0.0 +average reward score: 5.3125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.76%) |Training time=0.42s (19.48%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.54 +[2023-04-14 12:58:43,264] [INFO] [logging.py:96:log_dist] [Rank 0] step=6850, skipped=85, lr=[7.774030768679357e-07, 7.774030768679357e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:58:43,283] [INFO] [timer.py:199:stop] epoch=0/micro_step=6850/global_step=6850, RunningAvgSamplesPerSec=105.58711633613972, CurrSamplesPerSec=132.29534435123225, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:58:43,375] [INFO] [logging.py:96:log_dist] [Rank 0] step=6850, skipped=116, lr=[4.191894945733832e-07, 4.191894945733832e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6849|ppo_ep: 1|act_loss: 0.0125579833984375|cri_loss: 0.00640869140625|unsuper_loss: 0.0 +average reward score: 5.6015625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.68s (76.71%) |Training time=0.41s (18.73%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6850|ppo_ep: 1|act_loss: -0.00998687744140625|cri_loss: -0.004856109619140625|unsuper_loss: 0.0 +average reward score: 6.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.99%) |Training time=0.46s (21.31%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6851|ppo_ep: 1|act_loss: -0.0087127685546875|cri_loss: -0.004150390625|unsuper_loss: 0.0 +average reward score: 5.75390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.15%) |Training time=0.46s (21.19%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6852|ppo_ep: 1|act_loss: -0.015838623046875|cri_loss: -0.0067901611328125|unsuper_loss: 0.0 +average reward score: 5.6171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.00%) |Training time=0.46s (21.32%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6853|ppo_ep: 1|act_loss: 0.003696441650390625|cri_loss: 0.002101898193359375|unsuper_loss: 0.0 +average reward score: 5.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.38%) |Training time=0.46s (21.04%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6854|ppo_ep: 1|act_loss: -0.0147705078125|cri_loss: -0.0072479248046875|unsuper_loss: 0.0 +average reward score: 4.82421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.72%) |Training time=0.45s (20.60%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6855|ppo_ep: 1|act_loss: 0.0623779296875|cri_loss: 0.033233642578125|unsuper_loss: 0.0 +average reward score: 5.83984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.53%) |Training time=0.45s (20.87%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6856|ppo_ep: 1|act_loss: -0.020172119140625|cri_loss: -0.009246826171875|unsuper_loss: 0.0 +average reward score: 5.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.61%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6857|ppo_ep: 1|act_loss: 0.021636962890625|cri_loss: 0.0110321044921875|unsuper_loss: 0.0 +average reward score: 5.1875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.26%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6858|ppo_ep: 1|act_loss: 0.00704193115234375|cri_loss: 0.003887176513671875|unsuper_loss: 0.0 +average reward score: 4.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.25%) |Training time=0.48s (22.09%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.54 +[2023-04-14 12:59:04,969] [INFO] [logging.py:96:log_dist] [Rank 0] step=6860, skipped=85, lr=[7.673217709798165e-07, 7.673217709798165e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:59:04,988] [INFO] [timer.py:199:stop] epoch=0/micro_step=6860/global_step=6860, RunningAvgSamplesPerSec=105.58874862976468, CurrSamplesPerSec=101.04542453423234, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:59:05,081] [INFO] [logging.py:96:log_dist] [Rank 0] step=6860, skipped=116, lr=[4.1386990881701476e-07, 4.1386990881701476e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6859|ppo_ep: 1|act_loss: -0.013885498046875|cri_loss: -0.00675201416015625|unsuper_loss: 0.0 +average reward score: 4.53515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.28%) |Training time=0.48s (22.08%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6860|ppo_ep: 1|act_loss: 0.002838134765625|cri_loss: 0.0017108917236328125|unsuper_loss: 0.0 +average reward score: 5.08984375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.21%) |Training time=0.48s (22.15%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6861|ppo_ep: 1|act_loss: 0.009490966796875|cri_loss: 0.00521087646484375|unsuper_loss: 0.0 +average reward score: 5.43359375 +------------------------------------------------------------------------------------- +|E2E latency=2.42s |Gather latency=0.00s (0.00%) |Generate time=1.84s (75.80%) |Training time=0.49s (20.11%) |Others=0.10 (4.09%)|CurSamplesPerSec=13.21 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6862|ppo_ep: 1|act_loss: 0.0081939697265625|cri_loss: 0.00434112548828125|unsuper_loss: 0.0 +average reward score: 5.1328125 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.86%) |Training time=0.45s (19.70%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.10 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6863|ppo_ep: 1|act_loss: -0.00571441650390625|cri_loss: -0.00274658203125|unsuper_loss: 0.0 +average reward score: 5.87109375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.85%) |Training time=0.42s (19.45%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6864|ppo_ep: 1|act_loss: 0.01397705078125|cri_loss: 0.0092926025390625|unsuper_loss: 0.0 +average reward score: 5.703125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.66%) |Training time=0.42s (19.62%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6865|ppo_ep: 1|act_loss: -0.015228271484375|cri_loss: -0.007534027099609375|unsuper_loss: 0.0 +average reward score: 5.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.61%) |Training time=0.42s (19.59%) |Others=0.10 (4.80%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6866|ppo_ep: 1|act_loss: 0.004055023193359375|cri_loss: 0.0021114349365234375|unsuper_loss: 0.0 +average reward score: 5.1640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.97%) |Training time=0.44s (20.35%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6867|ppo_ep: 1|act_loss: -0.002750396728515625|cri_loss: -0.0011930465698242188|unsuper_loss: 0.0 +average reward score: 5.56640625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.29%) |Training time=0.43s (20.04%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.54 +[2023-04-14 12:59:24,692] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 6868|ppo_ep: 1|act_loss: -0.062408447265625|cri_loss: -0.026763916015625|unsuper_loss: 0.0 +average reward score: 5.3515625 +------------------------------------------------------------------------------------- +|E2E latency=2.11s |Gather latency=0.00s (0.00%) |Generate time=1.61s (76.19%) |Training time=0.40s (19.08%) |Others=0.10 (4.74%)|CurSamplesPerSec=15.13 |AvgSamplesPerSec=14.54 +[2023-04-14 12:59:26,832] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +[2023-04-14 12:59:26,833] [INFO] [logging.py:96:log_dist] [Rank 0] step=6870, skipped=87, lr=[7.59300023224231e-07, 7.59300023224231e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:59:26,833] [INFO] [timer.py:199:stop] epoch=0/micro_step=6870/global_step=6870, RunningAvgSamplesPerSec=105.6030931208463, CurrSamplesPerSec=119.03673509931426, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:59:26,925] [INFO] [logging.py:96:log_dist] [Rank 0] step=6870, skipped=116, lr=[4.085812445411133e-07, 4.085812445411133e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6869|ppo_ep: 1|act_loss: -0.018341064453125|cri_loss: -0.00882720947265625|unsuper_loss: 0.0 +average reward score: 5.5234375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.29%) |Training time=0.43s (20.13%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6870|ppo_ep: 1|act_loss: -0.0390625|cri_loss: -0.019287109375|unsuper_loss: 0.0 +average reward score: 4.8828125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.02%) |Training time=0.49s (22.40%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6871|ppo_ep: 1|act_loss: 0.02459716796875|cri_loss: 0.01280975341796875|unsuper_loss: 0.0 +average reward score: 5.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.23%) |Training time=0.49s (22.27%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6872|ppo_ep: 1|act_loss: 0.0074005126953125|cri_loss: 0.0037975311279296875|unsuper_loss: 0.0 +average reward score: 5.48046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.61%) |Training time=0.47s (21.67%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6873|ppo_ep: 1|act_loss: 0.006969451904296875|cri_loss: 0.00408935546875|unsuper_loss: 0.0 +average reward score: 5.73046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.35%) |Training time=0.41s (19.06%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6874|ppo_ep: 1|act_loss: -0.00328826904296875|cri_loss: -0.0014743804931640625|unsuper_loss: 0.0 +average reward score: 5.375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.66s (76.41%) |Training time=0.41s (18.89%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6875|ppo_ep: 1|act_loss: -0.016204833984375|cri_loss: -0.00778961181640625|unsuper_loss: 0.0 +average reward score: 5.96484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.31%) |Training time=0.46s (21.18%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6876|ppo_ep: 1|act_loss: -0.011138916015625|cri_loss: -0.00545501708984375|unsuper_loss: 0.0 +average reward score: 5.18359375 +------------------------------------------------------------------------------------- +|E2E latency=2.38s |Gather latency=0.00s (0.00%) |Generate time=1.78s (74.83%) |Training time=0.50s (20.93%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.42 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6877|ppo_ep: 1|act_loss: 0.018096923828125|cri_loss: 0.0096282958984375|unsuper_loss: 0.0 +average reward score: 5.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.47%) |Training time=0.45s (20.86%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6878|ppo_ep: 1|act_loss: -0.0166473388671875|cri_loss: -0.00775909423828125|unsuper_loss: 0.0 +average reward score: 5.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.25%) |Training time=0.43s (20.08%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.54 +[2023-04-14 12:59:48,741] [INFO] [logging.py:96:log_dist] [Rank 0] step=6880, skipped=87, lr=[7.493270845701285e-07, 7.493270845701285e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 12:59:48,759] [INFO] [timer.py:199:stop] epoch=0/micro_step=6880/global_step=6880, RunningAvgSamplesPerSec=105.60848080411316, CurrSamplesPerSec=114.59905703068834, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 12:59:48,851] [INFO] [logging.py:96:log_dist] [Rank 0] step=6880, skipped=116, lr=[4.033235801364402e-07, 4.033235801364402e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6879|ppo_ep: 1|act_loss: -0.0301055908203125|cri_loss: -0.0129241943359375|unsuper_loss: 0.0 +average reward score: 5.19921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.01%) |Training time=0.44s (20.45%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6880|ppo_ep: 1|act_loss: -0.00040602684020996094|cri_loss: -5.7220458984375e-05|unsuper_loss: 0.0 +average reward score: 6.0 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.07%) |Training time=0.46s (21.28%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6881|ppo_ep: 1|act_loss: -0.0007367134094238281|cri_loss: -0.000293731689453125|unsuper_loss: 0.0 +average reward score: 5.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.25%) |Training time=0.46s (21.22%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6882|ppo_ep: 1|act_loss: 0.005901336669921875|cri_loss: 0.00348663330078125|unsuper_loss: 0.0 +average reward score: 5.91015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.16%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6883|ppo_ep: 1|act_loss: 0.0136260986328125|cri_loss: 0.007183074951171875|unsuper_loss: 0.0 +average reward score: 6.16796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.16%) |Training time=0.46s (21.30%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6884|ppo_ep: 1|act_loss: 0.0162353515625|cri_loss: 0.0089569091796875|unsuper_loss: 0.0 +average reward score: 4.76953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.22%) |Training time=0.46s (21.16%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6885|ppo_ep: 1|act_loss: -0.008392333984375|cri_loss: -0.0038623809814453125|unsuper_loss: 0.0 +average reward score: 5.65234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.43%) |Training time=0.45s (20.92%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6886|ppo_ep: 1|act_loss: -0.0019283294677734375|cri_loss: -0.0008502006530761719|unsuper_loss: 0.0 +average reward score: 6.03515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.36%) |Training time=0.46s (21.01%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6887|ppo_ep: 1|act_loss: 0.0006146430969238281|cri_loss: 0.00042629241943359375|unsuper_loss: 0.0 +average reward score: 5.59765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.78%) |Training time=0.45s (20.69%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6888|ppo_ep: 1|act_loss: 0.009735107421875|cri_loss: 0.00511932373046875|unsuper_loss: 0.0 +average reward score: 5.375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.19%) |Training time=0.45s (20.88%) |Others=0.11 (4.93%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.54 +[2023-04-14 13:00:10,366] [INFO] [logging.py:96:log_dist] [Rank 0] step=6890, skipped=87, lr=[7.394145572206594e-07, 7.394145572206594e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:00:10,384] [INFO] [timer.py:199:stop] epoch=0/micro_step=6890/global_step=6890, RunningAvgSamplesPerSec=105.61396103794894, CurrSamplesPerSec=112.90964551727154, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:00:10,477] [INFO] [logging.py:96:log_dist] [Rank 0] step=6890, skipped=116, lr=[3.9809699353426603e-07, 3.9809699353426603e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6889|ppo_ep: 1|act_loss: -0.007568359375|cri_loss: -0.0036182403564453125|unsuper_loss: 0.0 +average reward score: 5.890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.66%) |Training time=0.45s (20.69%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6890|ppo_ep: 1|act_loss: 0.0322265625|cri_loss: 0.0164947509765625|unsuper_loss: 0.0 +average reward score: 5.84765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.21%) |Training time=0.46s (21.14%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6891|ppo_ep: 1|act_loss: 0.00508880615234375|cri_loss: 0.002838134765625|unsuper_loss: 0.0 +average reward score: 5.00390625 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.63s (72.97%) |Training time=0.50s (22.53%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.31 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6892|ppo_ep: 1|act_loss: -0.0269317626953125|cri_loss: -0.0132293701171875|unsuper_loss: 0.0 +average reward score: 5.48046875 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.09%) |Training time=0.46s (19.59%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.70 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6893|ppo_ep: 1|act_loss: -0.014617919921875|cri_loss: -0.007213592529296875|unsuper_loss: 0.0 +average reward score: 5.48046875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.13%) |Training time=0.49s (22.27%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6894|ppo_ep: 1|act_loss: -0.0035247802734375|cri_loss: -0.0015897750854492188|unsuper_loss: 0.0 +average reward score: 5.25390625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.09%) |Training time=0.49s (22.29%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.54 +[2023-04-14 13:00:23,665] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384 +epoch: 0|step: 6895|ppo_ep: 1|act_loss: -0.02349853515625|cri_loss: -0.01157379150390625|unsuper_loss: 0.0 +average reward score: 5.625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.20%) |Training time=0.46s (21.22%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6896|ppo_ep: 1|act_loss: 0.01934814453125|cri_loss: 0.01012420654296875|unsuper_loss: 0.0 +average reward score: 5.9375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.01%) |Training time=0.49s (22.37%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6897|ppo_ep: 1|act_loss: -0.0141448974609375|cri_loss: -0.006839752197265625|unsuper_loss: 0.0 +average reward score: 4.65625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.80%) |Training time=0.47s (21.58%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6898|ppo_ep: 1|act_loss: 0.0094146728515625|cri_loss: 0.00492095947265625|unsuper_loss: 0.0 +average reward score: 5.42578125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.15%) |Training time=0.48s (22.25%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.54 +[2023-04-14 13:00:32,334] [INFO] [logging.py:96:log_dist] [Rank 0] step=6900, skipped=88, lr=[7.305450557257688e-07, 7.305450557257688e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:00:32,352] [INFO] [timer.py:199:stop] epoch=0/micro_step=6900/global_step=6900, RunningAvgSamplesPerSec=105.61108100532957, CurrSamplesPerSec=104.086465661044, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:00:32,445] [INFO] [logging.py:96:log_dist] [Rank 0] step=6900, skipped=116, lr=[3.9290156220521235e-07, 3.9290156220521235e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6899|ppo_ep: 1|act_loss: -0.0037364959716796875|cri_loss: -0.0016689300537109375|unsuper_loss: 0.0 +average reward score: 5.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.82%) |Training time=0.47s (21.66%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6900|ppo_ep: 1|act_loss: 0.0152740478515625|cri_loss: 0.0078887939453125|unsuper_loss: 0.0 +average reward score: 4.59765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.19%) |Training time=0.48s (22.20%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6901|ppo_ep: 1|act_loss: -0.01605224609375|cri_loss: -0.007904052734375|unsuper_loss: 0.0 +average reward score: 5.3515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (21.95%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6902|ppo_ep: 1|act_loss: -0.00234222412109375|cri_loss: -0.0009698867797851562|unsuper_loss: 0.0 +average reward score: 5.01953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.46%) |Training time=0.48s (21.91%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.54 +[2023-04-14 13:00:41,119] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 6903|ppo_ep: 1|act_loss: 0.004150390625|cri_loss: 0.003192901611328125|unsuper_loss: 0.0 +average reward score: 5.17578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.48s (22.33%) |Others=0.09 (4.05%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.54 +[2023-04-14 13:00:43,275] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 6904|ppo_ep: 1|act_loss: -0.030059814453125|cri_loss: -0.014678955078125|unsuper_loss: 0.0 +average reward score: 5.9375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.47s (21.87%) |Others=0.09 (4.18%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6905|ppo_ep: 1|act_loss: -0.02294921875|cri_loss: -0.01105499267578125|unsuper_loss: 0.0 +average reward score: 5.89453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (21.99%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6906|ppo_ep: 1|act_loss: -0.00711822509765625|cri_loss: -0.0034732818603515625|unsuper_loss: 0.0 +average reward score: 5.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.47s (21.80%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6907|ppo_ep: 1|act_loss: -0.0452880859375|cri_loss: -0.021453857421875|unsuper_loss: 0.0 +average reward score: 5.73046875 +------------------------------------------------------------------------------------- +|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.15%) |Training time=0.49s (20.61%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.52 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6908|ppo_ep: 1|act_loss: -0.00554656982421875|cri_loss: -0.002651214599609375|unsuper_loss: 0.0 +average reward score: 5.109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.04%) |Training time=0.44s (20.26%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.54 +[2023-04-14 13:00:54,154] [INFO] [logging.py:96:log_dist] [Rank 0] step=6910, skipped=88, lr=[7.207477138987122e-07, 7.207477138987122e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:00:54,172] [INFO] [timer.py:199:stop] epoch=0/micro_step=6910/global_step=6910, RunningAvgSamplesPerSec=105.6090924949022, CurrSamplesPerSec=118.3087503415692, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:00:54,265] [INFO] [logging.py:96:log_dist] [Rank 0] step=6910, skipped=118, lr=[3.88767700704093e-07, 3.88767700704093e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6909|ppo_ep: 1|act_loss: -0.0126800537109375|cri_loss: -0.00616455078125|unsuper_loss: 0.0 +average reward score: 5.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.05%) |Training time=0.43s (20.28%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6910|ppo_ep: 1|act_loss: 0.0088958740234375|cri_loss: 0.004871368408203125|unsuper_loss: 0.0 +average reward score: 5.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.99%) |Training time=0.46s (21.32%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6911|ppo_ep: 1|act_loss: 0.00817108154296875|cri_loss: 0.0043182373046875|unsuper_loss: 0.0 +average reward score: 5.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.04%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6912|ppo_ep: 1|act_loss: -0.016021728515625|cri_loss: -0.007843017578125|unsuper_loss: 0.0 +average reward score: 4.8046875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.96%) |Training time=0.46s (21.37%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6913|ppo_ep: 1|act_loss: 0.0341796875|cri_loss: 0.0176849365234375|unsuper_loss: 0.0 +average reward score: 4.98828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.40%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6914|ppo_ep: 1|act_loss: -0.0117950439453125|cri_loss: -0.00492095947265625|unsuper_loss: 0.0 +average reward score: 5.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.45s (21.18%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6915|ppo_ep: 1|act_loss: 0.0032196044921875|cri_loss: 0.001708984375|unsuper_loss: 0.0 +average reward score: 5.30078125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.17%) |Training time=0.45s (21.24%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6916|ppo_ep: 1|act_loss: -0.0145416259765625|cri_loss: -0.007175445556640625|unsuper_loss: 0.0 +average reward score: 5.06640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.17%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6917|ppo_ep: 1|act_loss: -0.0222930908203125|cri_loss: -0.01084136962890625|unsuper_loss: 0.0 +average reward score: 5.21484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.33%) |Training time=0.45s (21.07%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6918|ppo_ep: 1|act_loss: -0.0175933837890625|cri_loss: -0.00868988037109375|unsuper_loss: 0.0 +average reward score: 5.23046875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.97%) |Training time=0.46s (21.35%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.54 +[2023-04-14 13:01:15,589] [INFO] [logging.py:96:log_dist] [Rank 0] step=6920, skipped=88, lr=[7.110112069915053e-07, 7.110112069915053e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:01:15,607] [INFO] [timer.py:199:stop] epoch=0/micro_step=6920/global_step=6920, RunningAvgSamplesPerSec=105.61367204126265, CurrSamplesPerSec=104.81772013304312, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:01:15,700] [INFO] [logging.py:96:log_dist] [Rank 0] step=6920, skipped=118, lr=[3.836285426178418e-07, 3.836285426178418e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6919|ppo_ep: 1|act_loss: -0.02325439453125|cri_loss: -0.0114898681640625|unsuper_loss: 0.0 +average reward score: 4.4921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.73%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6920|ppo_ep: 1|act_loss: 0.0015735626220703125|cri_loss: 0.0011959075927734375|unsuper_loss: 0.0 +average reward score: 5.515625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.95%) |Training time=0.46s (21.13%) |Others=0.11 (4.92%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6921|ppo_ep: 1|act_loss: -0.00385284423828125|cri_loss: -0.0017328262329101562|unsuper_loss: 0.0 +average reward score: 5.83984375 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.58%) |Training time=0.47s (21.00%) |Others=0.10 (4.42%)|CurSamplesPerSec=14.15 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6922|ppo_ep: 1|act_loss: -0.00983428955078125|cri_loss: -0.0047607421875|unsuper_loss: 0.0 +average reward score: 6.046875 +------------------------------------------------------------------------------------- +|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.89%) |Training time=0.49s (20.85%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.64 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6923|ppo_ep: 1|act_loss: -0.0155029296875|cri_loss: -0.007350921630859375|unsuper_loss: 0.0 +average reward score: 4.94921875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.13%) |Training time=0.45s (21.18%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6924|ppo_ep: 1|act_loss: 0.0159149169921875|cri_loss: 0.00820159912109375|unsuper_loss: 0.0 +average reward score: 5.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.95%) |Training time=0.46s (21.39%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6925|ppo_ep: 1|act_loss: 0.02459716796875|cri_loss: 0.01308441162109375|unsuper_loss: 0.0 +average reward score: 5.109375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.05%) |Training time=0.46s (21.28%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6926|ppo_ep: 1|act_loss: -0.02740478515625|cri_loss: -0.013519287109375|unsuper_loss: 0.0 +average reward score: 5.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.27%) |Training time=0.45s (21.06%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6927|ppo_ep: 1|act_loss: 0.019256591796875|cri_loss: 0.00994110107421875|unsuper_loss: 0.0 +average reward score: 5.46484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.24%) |Training time=0.45s (21.18%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6928|ppo_ep: 1|act_loss: -0.00569915771484375|cri_loss: -0.002689361572265625|unsuper_loss: 0.0 +average reward score: 4.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.45s (21.11%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.54 +[2023-04-14 13:01:37,364] [INFO] [logging.py:96:log_dist] [Rank 0] step=6930, skipped=88, lr=[7.013356793226694e-07, 7.013356793226694e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:01:37,383] [INFO] [timer.py:199:stop] epoch=0/micro_step=6930/global_step=6930, RunningAvgSamplesPerSec=105.61659593065568, CurrSamplesPerSec=112.18822396565588, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:01:37,475] [INFO] [logging.py:96:log_dist] [Rank 0] step=6930, skipped=118, lr=[3.7852075426205644e-07, 3.7852075426205644e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6929|ppo_ep: 1|act_loss: -0.0022640228271484375|cri_loss: -0.0007987022399902344|unsuper_loss: 0.0 +average reward score: 4.76953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.39%) |Training time=0.45s (20.94%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6930|ppo_ep: 1|act_loss: 0.00907135009765625|cri_loss: 0.00473785400390625|unsuper_loss: 0.0 +average reward score: 5.1640625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.38%) |Training time=0.45s (20.94%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6931|ppo_ep: 1|act_loss: 0.0194549560546875|cri_loss: 0.01029205322265625|unsuper_loss: 0.0 +average reward score: 5.54296875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.08%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6932|ppo_ep: 1|act_loss: -0.004150390625|cri_loss: -0.00170135498046875|unsuper_loss: 0.0 +average reward score: 5.06640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.41%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6933|ppo_ep: 1|act_loss: -0.0095367431640625|cri_loss: -0.00458526611328125|unsuper_loss: 0.0 +average reward score: 4.95703125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.18%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6934|ppo_ep: 1|act_loss: 0.0804443359375|cri_loss: 0.04730224609375|unsuper_loss: 0.0 +average reward score: 5.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.05%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6935|ppo_ep: 1|act_loss: -0.00833892822265625|cri_loss: -0.0038661956787109375|unsuper_loss: 0.0 +average reward score: 5.125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.53%) |Training time=0.47s (21.82%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6936|ppo_ep: 1|act_loss: 0.00691986083984375|cri_loss: 0.0047149658203125|unsuper_loss: 0.0 +average reward score: 5.1328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.58%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6937|ppo_ep: 1|act_loss: -0.0009016990661621094|cri_loss: -0.00012063980102539062|unsuper_loss: 0.0 +average reward score: 5.41796875 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.59s (69.74%) |Training time=0.59s (25.82%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.05 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6938|ppo_ep: 1|act_loss: -0.00502777099609375|cri_loss: -0.0024127960205078125|unsuper_loss: 0.0 +average reward score: 5.05859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (21.95%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.54 +[2023-04-14 13:01:59,006] [INFO] [logging.py:96:log_dist] [Rank 0] step=6940, skipped=88, lr=[6.917212743068631e-07, 6.917212743068631e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:01:59,025] [INFO] [timer.py:199:stop] epoch=0/micro_step=6940/global_step=6940, RunningAvgSamplesPerSec=105.61316551068417, CurrSamplesPerSec=110.00605527115124, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:01:59,117] [INFO] [logging.py:96:log_dist] [Rank 0] step=6940, skipped=118, lr=[3.73444411346483e-07, 3.73444411346483e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6939|ppo_ep: 1|act_loss: -0.0012607574462890625|cri_loss: -0.00046753883361816406|unsuper_loss: 0.0 +average reward score: 5.26171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.35%) |Training time=0.45s (21.02%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6940|ppo_ep: 1|act_loss: 0.0012426376342773438|cri_loss: 0.0006957054138183594|unsuper_loss: 0.0 +average reward score: 5.62109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.28%) |Training time=0.45s (21.07%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6941|ppo_ep: 1|act_loss: -0.0323486328125|cri_loss: -0.0152435302734375|unsuper_loss: 0.0 +average reward score: 5.43359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.65%) |Training time=0.44s (20.68%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6942|ppo_ep: 1|act_loss: 0.03973388671875|cri_loss: 0.020416259765625|unsuper_loss: 0.0 +average reward score: 5.51953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.09%) |Training time=0.46s (21.27%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6943|ppo_ep: 1|act_loss: 0.0152435302734375|cri_loss: 0.00778961181640625|unsuper_loss: 0.0 +average reward score: 4.96484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.76%) |Training time=0.46s (21.58%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6944|ppo_ep: 1|act_loss: -0.019775390625|cri_loss: -0.00980377197265625|unsuper_loss: 0.0 +average reward score: 6.36328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.50%) |Training time=0.48s (21.88%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6945|ppo_ep: 1|act_loss: -0.0194091796875|cri_loss: -0.00959014892578125|unsuper_loss: 0.0 +average reward score: 4.8984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.74%) |Training time=0.46s (21.60%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6946|ppo_ep: 1|act_loss: -0.0009250640869140625|cri_loss: -0.00030112266540527344|unsuper_loss: 0.0 +average reward score: 6.0 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.38%) |Training time=0.47s (21.98%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6947|ppo_ep: 1|act_loss: -0.016876220703125|cri_loss: -0.00833892822265625|unsuper_loss: 0.0 +average reward score: 5.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.60%) |Training time=0.47s (21.73%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6948|ppo_ep: 1|act_loss: -0.0037174224853515625|cri_loss: -0.0017242431640625|unsuper_loss: 0.0 +average reward score: 5.1640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.56%) |Training time=0.47s (21.78%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.54 +[2023-04-14 13:02:20,540] [INFO] [logging.py:96:log_dist] [Rank 0] step=6950, skipped=88, lr=[6.821681344527626e-07, 6.821681344527626e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:02:20,558] [INFO] [timer.py:199:stop] epoch=0/micro_step=6950/global_step=6950, RunningAvgSamplesPerSec=105.61406004000848, CurrSamplesPerSec=104.52931262752917, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:02:20,651] [INFO] [logging.py:96:log_dist] [Rank 0] step=6950, skipped=118, lr=[3.683995891147696e-07, 3.683995891147696e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6949|ppo_ep: 1|act_loss: -0.019744873046875|cri_loss: -0.00972747802734375|unsuper_loss: 0.0 +average reward score: 4.98828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.80%) |Training time=0.47s (21.69%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6950|ppo_ep: 1|act_loss: 0.0229644775390625|cri_loss: 0.01183319091796875|unsuper_loss: 0.0 +average reward score: 5.9609375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.02%) |Training time=0.47s (21.40%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6951|ppo_ep: 1|act_loss: -0.0074310302734375|cri_loss: -0.0035552978515625|unsuper_loss: 0.0 +average reward score: 5.6640625 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.68s (74.76%) |Training time=0.47s (20.80%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.20 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6952|ppo_ep: 1|act_loss: -0.001399993896484375|cri_loss: 0.001354217529296875|unsuper_loss: 0.0 +average reward score: 5.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.54%) |Training time=0.47s (21.82%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6953|ppo_ep: 1|act_loss: -0.0012025833129882812|cri_loss: -0.0003528594970703125|unsuper_loss: 0.0 +average reward score: 4.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.89%) |Training time=0.46s (19.80%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.76 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6954|ppo_ep: 1|act_loss: 0.03936767578125|cri_loss: 0.0204010009765625|unsuper_loss: 0.0 +average reward score: 5.41796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.72%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6955|ppo_ep: 1|act_loss: 0.0828857421875|cri_loss: 0.0494384765625|unsuper_loss: 0.0 +average reward score: 5.48828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.11%) |Training time=0.46s (21.34%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6956|ppo_ep: 1|act_loss: 0.0193939208984375|cri_loss: 0.010009765625|unsuper_loss: 0.0 +average reward score: 5.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.36%) |Training time=0.48s (22.02%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6957|ppo_ep: 1|act_loss: -0.0172119140625|cri_loss: -0.00844573974609375|unsuper_loss: 0.0 +average reward score: 5.296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.80%) |Training time=0.46s (21.49%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6958|ppo_ep: 1|act_loss: 0.001201629638671875|cri_loss: 0.0013179779052734375|unsuper_loss: 0.0 +average reward score: 4.984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.46%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.54 +[2023-04-14 13:02:42,376] [INFO] [logging.py:96:log_dist] [Rank 0] step=6960, skipped=88, lr=[6.726764013609494e-07, 6.726764013609494e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:02:42,395] [INFO] [timer.py:199:stop] epoch=0/micro_step=6960/global_step=6960, RunningAvgSamplesPerSec=105.61365815136743, CurrSamplesPerSec=104.42098973666357, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:02:42,487] [INFO] [logging.py:96:log_dist] [Rank 0] step=6960, skipped=118, lr=[3.63386362343352e-07, 3.63386362343352e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6959|ppo_ep: 1|act_loss: -0.0089263916015625|cri_loss: -0.00392913818359375|unsuper_loss: 0.0 +average reward score: 5.41796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.76%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6960|ppo_ep: 1|act_loss: 0.005954742431640625|cri_loss: 0.003894805908203125|unsuper_loss: 0.0 +average reward score: 5.484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.64%) |Training time=0.47s (21.71%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6961|ppo_ep: 1|act_loss: -0.0018558502197265625|cri_loss: -0.0008111000061035156|unsuper_loss: 0.0 +average reward score: 4.68359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.94%) |Training time=0.46s (21.50%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6962|ppo_ep: 1|act_loss: -0.0201416015625|cri_loss: -0.007419586181640625|unsuper_loss: 0.0 +average reward score: 5.4453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.58%) |Training time=0.47s (21.76%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6963|ppo_ep: 1|act_loss: -0.01812744140625|cri_loss: -0.00882720947265625|unsuper_loss: 0.0 +average reward score: 5.15625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.59%) |Training time=0.47s (21.86%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6964|ppo_ep: 1|act_loss: -0.00725555419921875|cri_loss: -0.0033168792724609375|unsuper_loss: 0.0 +average reward score: 5.37109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.54%) |Training time=0.47s (21.80%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6965|ppo_ep: 1|act_loss: 0.0039215087890625|cri_loss: 0.002716064453125|unsuper_loss: 0.0 +average reward score: 5.1328125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.46s (21.54%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6966|ppo_ep: 1|act_loss: 0.0004565715789794922|cri_loss: 0.00033473968505859375|unsuper_loss: 0.0 +average reward score: 5.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.47s (21.98%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6967|ppo_ep: 1|act_loss: -0.090576171875|cri_loss: -0.0345458984375|unsuper_loss: 0.0 +average reward score: 5.4921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.77%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6968|ppo_ep: 1|act_loss: 0.017822265625|cri_loss: 0.00933074951171875|unsuper_loss: 0.0 +average reward score: 5.109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.68%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.54 +[2023-04-14 13:03:04,094] [INFO] [logging.py:96:log_dist] [Rank 0] step=6970, skipped=88, lr=[6.632462157218022e-07, 6.632462157218022e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:03:04,112] [INFO] [timer.py:199:stop] epoch=0/micro_step=6970/global_step=6970, RunningAvgSamplesPerSec=105.61183941805801, CurrSamplesPerSec=100.77752732722541, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:03:04,205] [INFO] [logging.py:96:log_dist] [Rank 0] step=6970, skipped=118, lr=[3.5840480534034355e-07, 3.5840480534034355e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6969|ppo_ep: 1|act_loss: 0.007671356201171875|cri_loss: 0.004016876220703125|unsuper_loss: 0.0 +average reward score: 5.8984375 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.23%) |Training time=0.48s (20.51%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.66 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6970|ppo_ep: 1|act_loss: 0.0205230712890625|cri_loss: 0.01056671142578125|unsuper_loss: 0.0 +average reward score: 5.0234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.63%) |Training time=0.47s (21.71%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6971|ppo_ep: 1|act_loss: -0.00946807861328125|cri_loss: -0.004543304443359375|unsuper_loss: 0.0 +average reward score: 5.45703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.67%) |Training time=0.47s (21.78%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6972|ppo_ep: 1|act_loss: -0.007415771484375|cri_loss: -0.003566741943359375|unsuper_loss: 0.0 +average reward score: 5.80859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.57%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6973|ppo_ep: 1|act_loss: -0.034423828125|cri_loss: -0.0169219970703125|unsuper_loss: 0.0 +average reward score: 5.34765625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.73%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6974|ppo_ep: 1|act_loss: 0.00567626953125|cri_loss: 0.002979278564453125|unsuper_loss: 0.0 +average reward score: 5.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.64%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6975|ppo_ep: 1|act_loss: -0.020263671875|cri_loss: -0.009857177734375|unsuper_loss: 0.0 +average reward score: 5.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.71%) |Training time=0.47s (21.73%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6976|ppo_ep: 1|act_loss: 0.00650787353515625|cri_loss: 0.0033054351806640625|unsuper_loss: 0.0 +average reward score: 5.28515625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.49%) |Training time=0.47s (21.86%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6977|ppo_ep: 1|act_loss: -0.0411376953125|cri_loss: -0.019805908203125|unsuper_loss: 0.0 +average reward score: 4.625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.71%) |Training time=0.46s (21.63%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6978|ppo_ep: 1|act_loss: 0.01885986328125|cri_loss: 0.0095672607421875|unsuper_loss: 0.0 +average reward score: 5.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.40%) |Training time=0.47s (21.97%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.54 +[2023-04-14 13:03:25,646] [INFO] [logging.py:96:log_dist] [Rank 0] step=6980, skipped=88, lr=[6.53877717313422e-07, 6.53877717313422e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:03:25,664] [INFO] [timer.py:199:stop] epoch=0/micro_step=6980/global_step=6980, RunningAvgSamplesPerSec=105.6101181820172, CurrSamplesPerSec=102.82117836505277, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:03:25,757] [INFO] [logging.py:96:log_dist] [Rank 0] step=6980, skipped=118, lr=[3.5345499194443663e-07, 3.5345499194443663e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6979|ppo_ep: 1|act_loss: -0.0111846923828125|cri_loss: -0.00531768798828125|unsuper_loss: 0.0 +average reward score: 4.7109375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.86%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6980|ppo_ep: 1|act_loss: -0.02044677734375|cri_loss: -0.00836181640625|unsuper_loss: 0.0 +average reward score: 5.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.64s (71.45%) |Training time=0.55s (24.16%) |Others=0.10 (4.38%)|CurSamplesPerSec=13.94 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6981|ppo_ep: 1|act_loss: -0.0110626220703125|cri_loss: -0.005458831787109375|unsuper_loss: 0.0 +average reward score: 5.90625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.54%) |Training time=0.47s (21.83%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6982|ppo_ep: 1|act_loss: 0.0313720703125|cri_loss: 0.01617431640625|unsuper_loss: 0.0 +average reward score: 4.60546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.54%) |Training time=0.47s (21.81%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6983|ppo_ep: 1|act_loss: 0.0027866363525390625|cri_loss: 0.0015430450439453125|unsuper_loss: 0.0 +average reward score: 5.45703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.91%) |Training time=0.46s (21.43%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6984|ppo_ep: 1|act_loss: -0.00066375732421875|cri_loss: -0.0002378225326538086|unsuper_loss: 0.0 +average reward score: 5.03125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.91%) |Training time=0.46s (21.43%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6985|ppo_ep: 1|act_loss: 0.031341552734375|cri_loss: 0.015899658203125|unsuper_loss: 0.0 +average reward score: 6.01171875 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.57%) |Training time=0.47s (20.19%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6986|ppo_ep: 1|act_loss: 0.0004143714904785156|cri_loss: 0.00025391578674316406|unsuper_loss: 0.0 +average reward score: 5.1171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.46s (21.61%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6987|ppo_ep: 1|act_loss: 0.005809783935546875|cri_loss: 0.0031070709228515625|unsuper_loss: 0.0 +average reward score: 4.97265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.61%) |Training time=0.44s (20.71%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6988|ppo_ep: 1|act_loss: 0.029205322265625|cri_loss: 0.0149383544921875|unsuper_loss: 0.0 +average reward score: 5.1328125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.17%) |Training time=0.46s (21.19%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.54 +[2023-04-14 13:03:47,461] [INFO] [logging.py:96:log_dist] [Rank 0] step=6990, skipped=88, lr=[6.445710449995562e-07, 6.445710449995562e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:03:47,479] [INFO] [timer.py:199:stop] epoch=0/micro_step=6990/global_step=6990, RunningAvgSamplesPerSec=105.60882625178036, CurrSamplesPerSec=114.9070274944245, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:03:47,572] [INFO] [logging.py:96:log_dist] [Rank 0] step=6990, skipped=118, lr=[3.48536995523808e-07, 3.48536995523808e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6989|ppo_ep: 1|act_loss: -0.0030765533447265625|cri_loss: -0.0014123916625976562|unsuper_loss: 0.0 +average reward score: 5.17578125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.87%) |Training time=0.44s (20.58%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6990|ppo_ep: 1|act_loss: -0.019012451171875|cri_loss: -0.0092620849609375|unsuper_loss: 0.0 +average reward score: 5.05078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.51%) |Training time=0.45s (20.84%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6991|ppo_ep: 1|act_loss: 0.0038299560546875|cri_loss: 0.00209808349609375|unsuper_loss: 0.0 +average reward score: 5.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.55%) |Training time=0.45s (20.80%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6992|ppo_ep: 1|act_loss: -0.0175018310546875|cri_loss: -0.0086212158203125|unsuper_loss: 0.0 +average reward score: 5.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (20.95%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6993|ppo_ep: 1|act_loss: -0.0030231475830078125|cri_loss: -0.00106048583984375|unsuper_loss: 0.0 +average reward score: 5.83203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.39%) |Training time=0.45s (21.06%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6994|ppo_ep: 1|act_loss: -0.0001442432403564453|cri_loss: 3.1948089599609375e-05|unsuper_loss: 0.0 +average reward score: 5.75 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.29%) |Training time=0.45s (21.05%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6995|ppo_ep: 1|act_loss: 0.0048828125|cri_loss: 0.002719879150390625|unsuper_loss: 0.0 +average reward score: 4.8359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.28%) |Training time=0.46s (21.16%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6996|ppo_ep: 1|act_loss: 0.0269775390625|cri_loss: 0.01395416259765625|unsuper_loss: 0.0 +average reward score: 4.69921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.66%) |Training time=0.46s (21.60%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6997|ppo_ep: 1|act_loss: 0.00444793701171875|cri_loss: 0.0022983551025390625|unsuper_loss: 0.0 +average reward score: 5.73046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.78%) |Training time=0.46s (21.55%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.54 +epoch: 0|step: 6998|ppo_ep: 1|act_loss: -0.0012836456298828125|cri_loss: -0.00047588348388671875|unsuper_loss: 0.0 +average reward score: 5.37109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.41%) |Training time=0.47s (21.91%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.55 +[2023-04-14 13:04:08,962] [INFO] [logging.py:96:log_dist] [Rank 0] step=7000, skipped=88, lr=[6.353263367275399e-07, 6.353263367275399e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:04:08,980] [INFO] [timer.py:199:stop] epoch=0/micro_step=7000/global_step=7000, RunningAvgSamplesPerSec=105.61414569249841, CurrSamplesPerSec=114.3478458671951, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:04:09,073] [INFO] [logging.py:96:log_dist] [Rank 0] step=7000, skipped=118, lr=[3.43650888975027e-07, 3.43650888975027e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 6999|ppo_ep: 1|act_loss: -0.026153564453125|cri_loss: -0.012939453125|unsuper_loss: 0.0 +average reward score: 5.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.84%) |Training time=0.44s (20.60%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7000|ppo_ep: 1|act_loss: 0.012725830078125|cri_loss: 0.00653839111328125|unsuper_loss: 0.0 +average reward score: 5.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.61s (71.96%) |Training time=0.46s (20.72%) |Others=0.16 (7.32%)|CurSamplesPerSec=14.27 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7001|ppo_ep: 1|act_loss: -0.005931854248046875|cri_loss: -0.0025920867919921875|unsuper_loss: 0.0 +average reward score: 4.66796875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.04%) |Training time=0.44s (20.36%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7002|ppo_ep: 1|act_loss: -0.0015783309936523438|cri_loss: -0.0006132125854492188|unsuper_loss: 0.0 +average reward score: 4.69140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.45s (21.07%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7003|ppo_ep: 1|act_loss: 0.017547607421875|cri_loss: 0.009063720703125|unsuper_loss: 0.0 +average reward score: 5.6171875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.77%) |Training time=0.44s (20.65%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7004|ppo_ep: 1|act_loss: 0.03875732421875|cri_loss: 0.0197906494140625|unsuper_loss: 0.0 +average reward score: 5.015625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.32%) |Training time=0.45s (21.02%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.55 +[2023-04-14 13:04:22,083] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 7005|ppo_ep: 1|act_loss: 0.0027103424072265625|cri_loss: 0.0014028549194335938|unsuper_loss: 0.0 +average reward score: 5.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.47%) |Training time=0.46s (21.29%) |Others=0.09 (4.23%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.55 +[2023-04-14 13:04:24,221] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 7006|ppo_ep: 1|act_loss: -0.00681304931640625|cri_loss: -0.0032558441162109375|unsuper_loss: 0.0 +average reward score: 5.15234375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.77%) |Training time=0.45s (21.04%) |Others=0.09 (4.20%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7007|ppo_ep: 1|act_loss: -0.01065826416015625|cri_loss: -0.005084991455078125|unsuper_loss: 0.0 +average reward score: 5.125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.07%) |Training time=0.46s (21.32%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7008|ppo_ep: 1|act_loss: 0.01336669921875|cri_loss: 0.007213592529296875|unsuper_loss: 0.0 +average reward score: 5.6875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.34%) |Training time=0.45s (21.00%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.55 +[2023-04-14 13:04:30,588] [INFO] [logging.py:96:log_dist] [Rank 0] step=7010, skipped=88, lr=[6.261437295262503e-07, 6.261437295262503e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:04:30,607] [INFO] [timer.py:199:stop] epoch=0/micro_step=7010/global_step=7010, RunningAvgSamplesPerSec=105.62016992871143, CurrSamplesPerSec=106.12538378393765, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:04:30,699] [INFO] [logging.py:96:log_dist] [Rank 0] step=7010, skipped=120, lr=[3.397650131284941e-07, 3.397650131284941e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7009|ppo_ep: 1|act_loss: 0.00827789306640625|cri_loss: 0.00453948974609375|unsuper_loss: 0.0 +average reward score: 5.8828125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.24%) |Training time=0.46s (21.28%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7010|ppo_ep: 1|act_loss: 0.003917694091796875|cri_loss: 0.0020294189453125|unsuper_loss: 0.0 +average reward score: 5.19140625 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.48%) |Training time=0.45s (20.07%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.25 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7011|ppo_ep: 1|act_loss: 0.00323486328125|cri_loss: 0.0018863677978515625|unsuper_loss: 0.0 +average reward score: 5.08984375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.45s (20.79%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7012|ppo_ep: 1|act_loss: 0.0350341796875|cri_loss: 0.0192108154296875|unsuper_loss: 0.0 +average reward score: 5.41015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.45s (20.77%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7013|ppo_ep: 1|act_loss: 0.01261138916015625|cri_loss: 0.00699615478515625|unsuper_loss: 0.0 +average reward score: 5.50390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.45s (20.88%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7014|ppo_ep: 1|act_loss: -0.021697998046875|cri_loss: -0.01053619384765625|unsuper_loss: 0.0 +average reward score: 5.375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.68%) |Training time=0.45s (20.68%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7015|ppo_ep: 1|act_loss: -0.0042572021484375|cri_loss: -0.0020732879638671875|unsuper_loss: 0.0 +average reward score: 6.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.75%) |Training time=0.44s (20.67%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7016|ppo_ep: 1|act_loss: -0.01311492919921875|cri_loss: -0.00646209716796875|unsuper_loss: 0.0 +average reward score: 4.98046875 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.76s (76.24%) |Training time=0.45s (19.44%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7017|ppo_ep: 1|act_loss: -0.0018291473388671875|cri_loss: -0.0008478164672851562|unsuper_loss: 0.0 +average reward score: 5.03125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.45s (21.14%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7018|ppo_ep: 1|act_loss: 0.03436279296875|cri_loss: 0.017791748046875|unsuper_loss: 0.0 +average reward score: 5.73828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.64%) |Training time=0.44s (20.68%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.55 +[2023-04-14 13:04:52,328] [INFO] [logging.py:96:log_dist] [Rank 0] step=7020, skipped=88, lr=[6.170233595040777e-07, 6.170233595040777e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:04:52,347] [INFO] [timer.py:199:stop] epoch=0/micro_step=7020/global_step=7020, RunningAvgSamplesPerSec=105.6290418746056, CurrSamplesPerSec=111.25124374398021, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:04:52,439] [INFO] [logging.py:96:log_dist] [Rank 0] step=7020, skipped=120, lr=[3.3493649053890325e-07, 3.3493649053890325e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7019|ppo_ep: 1|act_loss: 0.0511474609375|cri_loss: 0.0259552001953125|unsuper_loss: 0.0 +average reward score: 5.51953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.47%) |Training time=0.45s (20.98%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7020|ppo_ep: 1|act_loss: -0.01282501220703125|cri_loss: -0.0062713623046875|unsuper_loss: 0.0 +average reward score: 5.234375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.37%) |Training time=0.45s (20.96%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7021|ppo_ep: 1|act_loss: 0.04193115234375|cri_loss: 0.0215911865234375|unsuper_loss: 0.0 +average reward score: 5.45703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.42%) |Training time=0.45s (21.02%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7022|ppo_ep: 1|act_loss: -0.0235748291015625|cri_loss: -0.01166534423828125|unsuper_loss: 0.0 +average reward score: 5.73046875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.16%) |Training time=0.45s (21.19%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7023|ppo_ep: 1|act_loss: -0.025726318359375|cri_loss: -0.01245880126953125|unsuper_loss: 0.0 +average reward score: 5.98046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.44%) |Training time=0.45s (20.90%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7024|ppo_ep: 1|act_loss: 0.0184783935546875|cri_loss: 0.009521484375|unsuper_loss: 0.0 +average reward score: 5.05859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.70%) |Training time=0.45s (20.73%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7025|ppo_ep: 1|act_loss: 0.004638671875|cri_loss: 0.0024890899658203125|unsuper_loss: 0.0 +average reward score: 5.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.20%) |Training time=0.43s (20.21%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7026|ppo_ep: 1|act_loss: -0.016021728515625|cri_loss: -0.007556915283203125|unsuper_loss: 0.0 +average reward score: 5.28125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.92%) |Training time=0.44s (20.41%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7027|ppo_ep: 1|act_loss: 0.003475189208984375|cri_loss: 0.0026454925537109375|unsuper_loss: 0.0 +average reward score: 6.0234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.90%) |Training time=0.44s (20.41%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7028|ppo_ep: 1|act_loss: -0.01137542724609375|cri_loss: -0.005615234375|unsuper_loss: 0.0 +average reward score: 5.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.90%) |Training time=0.44s (20.44%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55 +[2023-04-14 13:05:13,796] [INFO] [logging.py:96:log_dist] [Rank 0] step=7030, skipped=88, lr=[6.079653618469082e-07, 6.079653618469082e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:05:13,814] [INFO] [timer.py:199:stop] epoch=0/micro_step=7030/global_step=7030, RunningAvgSamplesPerSec=105.64047894564446, CurrSamplesPerSec=121.2592382456413, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:05:13,907] [INFO] [logging.py:96:log_dist] [Rank 0] step=7030, skipped=120, lr=[3.3014005941345406e-07, 3.3014005941345406e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7029|ppo_ep: 1|act_loss: -0.0184326171875|cri_loss: -0.00904083251953125|unsuper_loss: 0.0 +average reward score: 5.6875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.61%) |Training time=0.43s (19.83%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7030|ppo_ep: 1|act_loss: 0.02105712890625|cri_loss: 0.0115814208984375|unsuper_loss: 0.0 +average reward score: 5.28125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.10%) |Training time=0.43s (20.22%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7031|ppo_ep: 1|act_loss: 0.05078125|cri_loss: 0.026123046875|unsuper_loss: 0.0 +average reward score: 5.375 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.72%) |Training time=0.44s (19.03%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7032|ppo_ep: 1|act_loss: -0.00595855712890625|cri_loss: -0.00264739990234375|unsuper_loss: 0.0 +average reward score: 5.5234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.95%) |Training time=0.44s (20.40%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7033|ppo_ep: 1|act_loss: -0.0072021484375|cri_loss: -0.003490447998046875|unsuper_loss: 0.0 +average reward score: 5.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.13%) |Training time=0.44s (20.33%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7034|ppo_ep: 1|act_loss: 0.0024433135986328125|cri_loss: 0.0013751983642578125|unsuper_loss: 0.0 +average reward score: 5.55078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.65%) |Training time=0.45s (20.69%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7035|ppo_ep: 1|act_loss: 0.00522613525390625|cri_loss: 0.0029888153076171875|unsuper_loss: 0.0 +average reward score: 4.90625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.80%) |Training time=0.44s (20.62%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7036|ppo_ep: 1|act_loss: -0.01873779296875|cri_loss: -0.0090789794921875|unsuper_loss: 0.0 +average reward score: 5.03125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.91%) |Training time=0.44s (20.43%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7037|ppo_ep: 1|act_loss: 0.0185394287109375|cri_loss: 0.00945281982421875|unsuper_loss: 0.0 +average reward score: 5.1875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.01%) |Training time=0.44s (20.40%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7038|ppo_ep: 1|act_loss: -0.0100555419921875|cri_loss: -0.004840850830078125|unsuper_loss: 0.0 +average reward score: 5.21484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.00%) |Training time=0.44s (20.35%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55 +[2023-04-14 13:05:35,490] [INFO] [logging.py:96:log_dist] [Rank 0] step=7040, skipped=88, lr=[5.989698708161196e-07, 5.989698708161196e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:05:35,509] [INFO] [timer.py:199:stop] epoch=0/micro_step=7040/global_step=7040, RunningAvgSamplesPerSec=105.6536996369175, CurrSamplesPerSec=118.7872625896097, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:05:35,601] [INFO] [logging.py:96:log_dist] [Rank 0] step=7040, skipped=120, lr=[3.253757908468269e-07, 3.253757908468269e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7039|ppo_ep: 1|act_loss: -0.0106964111328125|cri_loss: -0.005218505859375|unsuper_loss: 0.0 +average reward score: 5.48828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.62%) |Training time=0.43s (19.87%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7040|ppo_ep: 1|act_loss: -0.014617919921875|cri_loss: -0.006984710693359375|unsuper_loss: 0.0 +average reward score: 5.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.71s (76.03%) |Training time=0.44s (19.51%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.25 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7041|ppo_ep: 1|act_loss: 0.017822265625|cri_loss: 0.00927734375|unsuper_loss: 0.0 +average reward score: 5.41796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.94%) |Training time=0.44s (20.39%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7042|ppo_ep: 1|act_loss: -0.01445770263671875|cri_loss: -0.007171630859375|unsuper_loss: 0.0 +average reward score: 5.0859375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.98%) |Training time=0.44s (20.36%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7043|ppo_ep: 1|act_loss: -0.0104522705078125|cri_loss: -0.0050811767578125|unsuper_loss: 0.0 +average reward score: 5.09375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.82%) |Training time=0.44s (20.51%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7044|ppo_ep: 1|act_loss: 0.016082763671875|cri_loss: 0.00823211669921875|unsuper_loss: 0.0 +average reward score: 5.6953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.31%) |Training time=0.43s (19.98%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7045|ppo_ep: 1|act_loss: -0.027099609375|cri_loss: -0.013214111328125|unsuper_loss: 0.0 +average reward score: 5.62890625 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.15%) |Training time=0.50s (22.40%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.43 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7046|ppo_ep: 1|act_loss: 0.0109405517578125|cri_loss: 0.005657196044921875|unsuper_loss: 0.0 +average reward score: 5.72265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.04%) |Training time=0.44s (20.30%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7047|ppo_ep: 1|act_loss: -0.02777099609375|cri_loss: -0.01348114013671875|unsuper_loss: 0.0 +average reward score: 5.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.05%) |Training time=0.44s (20.36%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7048|ppo_ep: 1|act_loss: -0.02685546875|cri_loss: -0.0132293701171875|unsuper_loss: 0.0 +average reward score: 5.546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.89%) |Training time=0.44s (20.43%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.55 +[2023-04-14 13:05:57,120] [INFO] [logging.py:96:log_dist] [Rank 0] step=7050, skipped=88, lr=[5.900370197465865e-07, 5.900370197465865e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:05:57,139] [INFO] [timer.py:199:stop] epoch=0/micro_step=7050/global_step=7050, RunningAvgSamplesPerSec=105.66728407954258, CurrSamplesPerSec=116.75401299782442, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:05:57,231] [INFO] [logging.py:96:log_dist] [Rank 0] step=7050, skipped=120, lr=[3.206437554569741e-07, 3.206437554569741e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7049|ppo_ep: 1|act_loss: 0.002655029296875|cri_loss: 0.001415252685546875|unsuper_loss: 0.0 +average reward score: 5.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.05%) |Training time=0.44s (20.37%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7050|ppo_ep: 1|act_loss: -0.005207061767578125|cri_loss: -0.002410888671875|unsuper_loss: 0.0 +average reward score: 5.62109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.94%) |Training time=0.44s (20.38%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7051|ppo_ep: 1|act_loss: 0.00331878662109375|cri_loss: 0.0018138885498046875|unsuper_loss: 0.0 +average reward score: 5.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.00%) |Training time=0.44s (20.41%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7052|ppo_ep: 1|act_loss: 0.0107879638671875|cri_loss: 0.005695343017578125|unsuper_loss: 0.0 +average reward score: 5.625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.98%) |Training time=0.44s (20.36%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7053|ppo_ep: 1|act_loss: -0.0012302398681640625|cri_loss: -0.0005526542663574219|unsuper_loss: 0.0 +average reward score: 6.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.50%) |Training time=0.43s (19.90%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7054|ppo_ep: 1|act_loss: -0.007366180419921875|cri_loss: -0.002979278564453125|unsuper_loss: 0.0 +average reward score: 5.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.93%) |Training time=0.44s (20.38%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7055|ppo_ep: 1|act_loss: 0.01214599609375|cri_loss: 0.006175994873046875|unsuper_loss: 0.0 +average reward score: 5.76171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.05%) |Training time=0.44s (20.41%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7056|ppo_ep: 1|act_loss: 0.006378173828125|cri_loss: 0.0032520294189453125|unsuper_loss: 0.0 +average reward score: 6.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.86%) |Training time=0.44s (20.48%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7057|ppo_ep: 1|act_loss: 0.006343841552734375|cri_loss: 0.003448486328125|unsuper_loss: 0.0 +average reward score: 5.9453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.13%) |Training time=0.44s (20.28%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7058|ppo_ep: 1|act_loss: 0.0113677978515625|cri_loss: 0.005924224853515625|unsuper_loss: 0.0 +average reward score: 5.15625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.17%) |Training time=0.43s (20.15%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55 +[2023-04-14 13:06:18,609] [INFO] [logging.py:96:log_dist] [Rank 0] step=7060, skipped=88, lr=[5.811669410447133e-07, 5.811669410447133e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:06:18,627] [INFO] [timer.py:199:stop] epoch=0/micro_step=7060/global_step=7060, RunningAvgSamplesPerSec=105.68131365475776, CurrSamplesPerSec=114.69336225070093, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:06:18,721] [INFO] [logging.py:96:log_dist] [Rank 0] step=7060, skipped=120, lr=[3.1594402338407633e-07, 3.1594402338407633e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7059|ppo_ep: 1|act_loss: 0.0213165283203125|cri_loss: 0.01114654541015625|unsuper_loss: 0.0 +average reward score: 5.72265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.86%) |Training time=0.44s (20.56%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7060|ppo_ep: 1|act_loss: -0.0136260986328125|cri_loss: -0.0066986083984375|unsuper_loss: 0.0 +average reward score: 5.3671875 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.62s (72.07%) |Training time=0.53s (23.48%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7061|ppo_ep: 1|act_loss: 0.0153656005859375|cri_loss: 0.00801849365234375|unsuper_loss: 0.0 +average reward score: 5.81640625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.12%) |Training time=0.47s (21.37%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7062|ppo_ep: 1|act_loss: -0.003650665283203125|cri_loss: -0.0014925003051757812|unsuper_loss: 0.0 +average reward score: 5.703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.65%) |Training time=0.45s (20.70%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7063|ppo_ep: 1|act_loss: -0.0135955810546875|cri_loss: -0.00644683837890625|unsuper_loss: 0.0 +average reward score: 5.3046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.78%) |Training time=0.43s (19.65%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7064|ppo_ep: 1|act_loss: -0.035491943359375|cri_loss: -0.017486572265625|unsuper_loss: 0.0 +average reward score: 5.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.67s (77.20%) |Training time=0.39s (18.02%) |Others=0.10 (4.78%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.55 +[2023-04-14 13:06:31,684] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 7065|ppo_ep: 1|act_loss: 0.0009374618530273438|cri_loss: 0.0006699562072753906|unsuper_loss: 0.0 +average reward score: 5.7421875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.76%) |Training time=0.42s (19.61%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7066|ppo_ep: 1|act_loss: -0.005615234375|cri_loss: -0.0027637481689453125|unsuper_loss: 0.0 +average reward score: 5.88671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.89%) |Training time=0.44s (20.38%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7067|ppo_ep: 1|act_loss: -0.010498046875|cri_loss: -0.004825592041015625|unsuper_loss: 0.0 +average reward score: 5.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.76%) |Training time=0.45s (20.65%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7068|ppo_ep: 1|act_loss: 0.00209808349609375|cri_loss: 0.00121307373046875|unsuper_loss: 0.0 +average reward score: 6.6171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.81%) |Training time=0.44s (20.50%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.55 +[2023-04-14 13:06:40,356] [INFO] [logging.py:96:log_dist] [Rank 0] step=7070, skipped=89, lr=[5.73237649271517e-07, 5.73237649271517e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:06:40,357] [INFO] [timer.py:199:stop] epoch=0/micro_step=7070/global_step=7070, RunningAvgSamplesPerSec=105.69375759915556, CurrSamplesPerSec=115.86235091814396, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:06:40,469] [INFO] [logging.py:96:log_dist] [Rank 0] step=7070, skipped=120, lr=[3.1127666428949986e-07, 3.1127666428949986e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7069|ppo_ep: 1|act_loss: 0.012603759765625|cri_loss: 0.00643157958984375|unsuper_loss: 0.0 +average reward score: 5.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.66s (74.89%) |Training time=0.45s (20.20%) |Others=0.11 (4.91%)|CurSamplesPerSec=14.45 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7070|ppo_ep: 1|act_loss: -0.01456451416015625|cri_loss: -0.00681304931640625|unsuper_loss: 0.0 +average reward score: 5.0078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.87%) |Training time=0.44s (20.44%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7071|ppo_ep: 1|act_loss: -0.023345947265625|cri_loss: -0.0114288330078125|unsuper_loss: 0.0 +average reward score: 6.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.38%) |Training time=0.45s (20.95%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7072|ppo_ep: 1|act_loss: 0.0129547119140625|cri_loss: 0.0068206787109375|unsuper_loss: 0.0 +average reward score: 6.015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.30%) |Training time=0.45s (21.01%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7073|ppo_ep: 1|act_loss: -0.00811004638671875|cri_loss: -0.003925323486328125|unsuper_loss: 0.0 +average reward score: 5.7109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.45s (20.86%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7074|ppo_ep: 1|act_loss: 0.0012836456298828125|cri_loss: 0.0007758140563964844|unsuper_loss: 0.0 +average reward score: 6.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.44%) |Training time=0.45s (20.90%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7075|ppo_ep: 1|act_loss: -0.0047607421875|cri_loss: -0.002140045166015625|unsuper_loss: 0.0 +average reward score: 5.55859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.88%) |Training time=0.44s (20.46%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7076|ppo_ep: 1|act_loss: -0.010772705078125|cri_loss: -0.005107879638671875|unsuper_loss: 0.0 +average reward score: 5.83984375 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.14%) |Training time=0.46s (19.57%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7077|ppo_ep: 1|act_loss: 0.005046844482421875|cri_loss: 0.0028209686279296875|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.43%) |Training time=0.45s (21.01%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7078|ppo_ep: 1|act_loss: -0.0077362060546875|cri_loss: -0.003391265869140625|unsuper_loss: 0.0 +average reward score: 5.203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.75%) |Training time=0.44s (20.58%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.55 +[2023-04-14 13:07:02,095] [INFO] [logging.py:96:log_dist] [Rank 0] step=7080, skipped=89, lr=[5.644871995139011e-07, 5.644871995139011e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:07:02,113] [INFO] [timer.py:199:stop] epoch=0/micro_step=7080/global_step=7080, RunningAvgSamplesPerSec=105.70222970266056, CurrSamplesPerSec=112.85040635914649, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:07:02,205] [INFO] [logging.py:96:log_dist] [Rank 0] step=7080, skipped=120, lr=[3.066417473547667e-07, 3.066417473547667e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7079|ppo_ep: 1|act_loss: 0.032135009765625|cri_loss: 0.0170440673828125|unsuper_loss: 0.0 +average reward score: 6.09375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.68%) |Training time=0.45s (20.69%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7080|ppo_ep: 1|act_loss: -0.023345947265625|cri_loss: -0.0113067626953125|unsuper_loss: 0.0 +average reward score: 5.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.48%) |Training time=0.45s (20.85%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7081|ppo_ep: 1|act_loss: 0.00737762451171875|cri_loss: 0.004241943359375|unsuper_loss: 0.0 +average reward score: 5.125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.87%) |Training time=0.44s (20.44%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7082|ppo_ep: 1|act_loss: -0.0083160400390625|cri_loss: -0.0031337738037109375|unsuper_loss: 0.0 +average reward score: 5.45703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.47s (21.59%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7083|ppo_ep: 1|act_loss: 0.008056640625|cri_loss: 0.00444793701171875|unsuper_loss: 0.0 +average reward score: 5.6484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.10%) |Training time=0.46s (21.29%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7084|ppo_ep: 1|act_loss: -0.0013437271118164062|cri_loss: -0.00020885467529296875|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.89%) |Training time=0.46s (21.46%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7085|ppo_ep: 1|act_loss: 0.0086517333984375|cri_loss: 0.00460052490234375|unsuper_loss: 0.0 +average reward score: 5.81640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.07%) |Training time=0.46s (21.26%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7086|ppo_ep: 1|act_loss: 0.0016613006591796875|cri_loss: 0.0010709762573242188|unsuper_loss: 0.0 +average reward score: 5.71484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.08%) |Training time=0.46s (21.26%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7087|ppo_ep: 1|act_loss: 0.016571044921875|cri_loss: 0.0089569091796875|unsuper_loss: 0.0 +average reward score: 5.015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.61%) |Training time=0.47s (21.76%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7088|ppo_ep: 1|act_loss: -0.0264129638671875|cri_loss: -0.01276397705078125|unsuper_loss: 0.0 +average reward score: 5.6953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.65%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.55 +[2023-04-14 13:07:23,635] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384 +[2023-04-14 13:07:23,636] [INFO] [logging.py:96:log_dist] [Rank 0] step=7090, skipped=90, lr=[5.566657852262237e-07, 5.566657852262237e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:07:23,636] [INFO] [timer.py:199:stop] epoch=0/micro_step=7090/global_step=7090, RunningAvgSamplesPerSec=105.70629971379196, CurrSamplesPerSec=114.99573149730027, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:07:23,728] [INFO] [logging.py:96:log_dist] [Rank 0] step=7090, skipped=120, lr=[3.020393412805259e-07, 3.020393412805259e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7089|ppo_ep: 1|act_loss: -0.05987548828125|cri_loss: -0.013641357421875|unsuper_loss: 0.0 +average reward score: 5.6640625 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.65%) |Training time=0.44s (20.68%) |Others=0.10 (4.67%)|CurSamplesPerSec=15.03 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7090|ppo_ep: 1|act_loss: 0.009307861328125|cri_loss: 0.004871368408203125|unsuper_loss: 0.0 +average reward score: 5.83203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.26%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7091|ppo_ep: 1|act_loss: 0.00020992755889892578|cri_loss: 0.000354766845703125|unsuper_loss: 0.0 +average reward score: 6.1015625 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.12%) |Training time=0.46s (19.62%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7092|ppo_ep: 1|act_loss: 0.005802154541015625|cri_loss: 0.0029811859130859375|unsuper_loss: 0.0 +average reward score: 4.9609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.71%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7093|ppo_ep: 1|act_loss: -0.00748443603515625|cri_loss: -0.0035839080810546875|unsuper_loss: 0.0 +average reward score: 5.609375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.45%) |Training time=0.45s (20.86%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7094|ppo_ep: 1|act_loss: -0.003345489501953125|cri_loss: -0.0012674331665039062|unsuper_loss: 0.0 +average reward score: 5.34375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.51%) |Training time=0.44s (20.77%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7095|ppo_ep: 1|act_loss: 0.0189208984375|cri_loss: 0.01151275634765625|unsuper_loss: 0.0 +average reward score: 5.4453125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.44s (20.76%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7096|ppo_ep: 1|act_loss: -0.01073455810546875|cri_loss: -0.00524139404296875|unsuper_loss: 0.0 +average reward score: 5.16796875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.73%) |Training time=0.44s (20.58%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7097|ppo_ep: 1|act_loss: -0.009918212890625|cri_loss: -0.00482177734375|unsuper_loss: 0.0 +average reward score: 5.0234375 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.68%) |Training time=0.44s (20.61%) |Others=0.10 (4.71%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7098|ppo_ep: 1|act_loss: -0.014434814453125|cri_loss: -0.007038116455078125|unsuper_loss: 0.0 +average reward score: 5.91796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.84%) |Training time=0.46s (21.20%) |Others=0.11 (4.96%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55 +[2023-04-14 13:07:45,359] [INFO] [logging.py:96:log_dist] [Rank 0] step=7100, skipped=90, lr=[5.480354326383703e-07, 5.480354326383703e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:07:45,378] [INFO] [timer.py:199:stop] epoch=0/micro_step=7100/global_step=7100, RunningAvgSamplesPerSec=105.71372664296617, CurrSamplesPerSec=113.53677699652836, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:07:45,470] [INFO] [logging.py:96:log_dist] [Rank 0] step=7100, skipped=120, lr=[2.9746951428553884e-07, 2.9746951428553884e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7099|ppo_ep: 1|act_loss: -0.00740814208984375|cri_loss: -0.0036258697509765625|unsuper_loss: 0.0 +average reward score: 5.9609375 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.48%) |Training time=0.44s (19.75%) |Others=0.11 (4.77%)|CurSamplesPerSec=14.22 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7100|ppo_ep: 1|act_loss: 0.008270263671875|cri_loss: 0.004364013671875|unsuper_loss: 0.0 +average reward score: 5.87109375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.47%) |Training time=0.44s (20.83%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7101|ppo_ep: 1|act_loss: 0.00771331787109375|cri_loss: 0.003948211669921875|unsuper_loss: 0.0 +average reward score: 5.96875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.44s (20.75%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7102|ppo_ep: 1|act_loss: 0.02349853515625|cri_loss: 0.01323699951171875|unsuper_loss: 0.0 +average reward score: 5.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.38%) |Training time=0.45s (20.94%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7103|ppo_ep: 1|act_loss: 0.0160675048828125|cri_loss: 0.00824737548828125|unsuper_loss: 0.0 +average reward score: 5.453125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.62%) |Training time=0.44s (20.79%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7104|ppo_ep: 1|act_loss: -0.020355224609375|cri_loss: -0.009979248046875|unsuper_loss: 0.0 +average reward score: 5.8046875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.01%) |Training time=0.46s (21.33%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7105|ppo_ep: 1|act_loss: -0.00275421142578125|cri_loss: -0.00083160400390625|unsuper_loss: 0.0 +average reward score: 5.84765625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.35%) |Training time=0.45s (20.96%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7106|ppo_ep: 1|act_loss: -0.00026607513427734375|cri_loss: 5.5789947509765625e-05|unsuper_loss: 0.0 +average reward score: 5.5 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.59%) |Training time=0.47s (20.10%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.55 +[2023-04-14 13:08:02,789] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 7107|ppo_ep: 1|act_loss: -0.019775390625|cri_loss: -0.00945281982421875|unsuper_loss: 0.0 +average reward score: 5.30078125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.51%) |Training time=0.46s (21.28%) |Others=0.09 (4.21%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.55 +[2023-04-14 13:08:04,927] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 7108|ppo_ep: 1|act_loss: -0.02490234375|cri_loss: -0.01221466064453125|unsuper_loss: 0.0 +average reward score: 5.30078125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.34%) |Training time=0.46s (21.50%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.55 +[2023-04-14 13:08:06,968] [INFO] [logging.py:96:log_dist] [Rank 0] step=7110, skipped=90, lr=[5.394684749830206e-07, 5.394684749830206e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:08:06,986] [INFO] [timer.py:199:stop] epoch=0/micro_step=7110/global_step=7110, RunningAvgSamplesPerSec=105.7198366319902, CurrSamplesPerSec=109.52436472834691, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:08:07,079] [INFO] [logging.py:96:log_dist] [Rank 0] step=7110, skipped=122, lr=[2.9383715516136083e-07, 2.9383715516136083e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7109|ppo_ep: 1|act_loss: -0.0279388427734375|cri_loss: -0.0134124755859375|unsuper_loss: 0.0 +average reward score: 5.4921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.22%) |Training time=0.45s (21.13%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7110|ppo_ep: 1|act_loss: -0.02520751953125|cri_loss: -0.0123291015625|unsuper_loss: 0.0 +average reward score: 5.69921875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.05%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7111|ppo_ep: 1|act_loss: -0.004817962646484375|cri_loss: -0.0023059844970703125|unsuper_loss: 0.0 +average reward score: 5.05078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.46s (21.14%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7112|ppo_ep: 1|act_loss: 0.018310546875|cri_loss: 0.00978851318359375|unsuper_loss: 0.0 +average reward score: 5.0234375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.08%) |Training time=0.45s (21.24%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7113|ppo_ep: 1|act_loss: 0.0018711090087890625|cri_loss: 0.0011491775512695312|unsuper_loss: 0.0 +average reward score: 5.59375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.12%) |Training time=0.45s (21.21%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7114|ppo_ep: 1|act_loss: -0.00603485107421875|cri_loss: -0.00254058837890625|unsuper_loss: 0.0 +average reward score: 5.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.15%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7115|ppo_ep: 1|act_loss: 0.011688232421875|cri_loss: 0.0062713623046875|unsuper_loss: 0.0 +average reward score: 5.57421875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.46s (21.28%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7116|ppo_ep: 1|act_loss: 0.0015411376953125|cri_loss: 0.0009899139404296875|unsuper_loss: 0.0 +average reward score: 5.75 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.06%) |Training time=0.46s (21.27%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7117|ppo_ep: 1|act_loss: 0.0027523040771484375|cri_loss: 0.00145721435546875|unsuper_loss: 0.0 +average reward score: 5.8203125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.24%) |Training time=0.45s (21.16%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7118|ppo_ep: 1|act_loss: 0.004467010498046875|cri_loss: 0.0024471282958984375|unsuper_loss: 0.0 +average reward score: 5.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.15%) |Training time=0.48s (22.22%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.55 +[2023-04-14 13:08:28,469] [INFO] [logging.py:96:log_dist] [Rank 0] step=7120, skipped=90, lr=[5.309650392431538e-07, 5.309650392431538e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:08:28,487] [INFO] [timer.py:199:stop] epoch=0/micro_step=7120/global_step=7120, RunningAvgSamplesPerSec=105.7216516885662, CurrSamplesPerSec=95.38179159059753, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:08:28,580] [INFO] [logging.py:96:log_dist] [Rank 0] step=7120, skipped=122, lr=[2.8932614087824774e-07, 2.8932614087824774e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7119|ppo_ep: 1|act_loss: -0.0005578994750976562|cri_loss: -0.00013780593872070312|unsuper_loss: 0.0 +average reward score: 5.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.66%) |Training time=0.50s (22.78%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7120|ppo_ep: 1|act_loss: 0.0099334716796875|cri_loss: 0.005100250244140625|unsuper_loss: 0.0 +average reward score: 5.40234375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.49%) |Training time=0.50s (22.94%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7121|ppo_ep: 1|act_loss: -0.0159912109375|cri_loss: -0.0079345703125|unsuper_loss: 0.0 +average reward score: 6.109375 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.00%) |Training time=0.52s (23.56%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.42 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7122|ppo_ep: 1|act_loss: -0.0201416015625|cri_loss: -0.00991058349609375|unsuper_loss: 0.0 +average reward score: 5.5 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.17%) |Training time=0.46s (21.19%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7123|ppo_ep: 1|act_loss: -0.020599365234375|cri_loss: -0.00914764404296875|unsuper_loss: 0.0 +average reward score: 5.59375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.88%) |Training time=0.46s (21.50%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7124|ppo_ep: 1|act_loss: -0.00659942626953125|cri_loss: -0.0029087066650390625|unsuper_loss: 0.0 +average reward score: 5.4453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (21.94%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7125|ppo_ep: 1|act_loss: 0.0066680908203125|cri_loss: 0.003437042236328125|unsuper_loss: 0.0 +average reward score: 5.3203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.31%) |Training time=0.48s (22.16%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7126|ppo_ep: 1|act_loss: -0.01512908935546875|cri_loss: -0.0073089599609375|unsuper_loss: 0.0 +average reward score: 5.7265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.84%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7127|ppo_ep: 1|act_loss: -0.00734710693359375|cri_loss: -0.0032787322998046875|unsuper_loss: 0.0 +average reward score: 5.51171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (22.06%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7128|ppo_ep: 1|act_loss: 0.032562255859375|cri_loss: 0.0173492431640625|unsuper_loss: 0.0 +average reward score: 4.9921875 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.47%) |Training time=0.49s (22.00%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.45 |AvgSamplesPerSec=14.55 +[2023-04-14 13:08:50,384] [INFO] [logging.py:96:log_dist] [Rank 0] step=7130, skipped=90, lr=[5.225252514602012e-07, 5.225252514602012e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:08:50,402] [INFO] [timer.py:199:stop] epoch=0/micro_step=7130/global_step=7130, RunningAvgSamplesPerSec=105.71516173869826, CurrSamplesPerSec=97.46446357329275, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:08:50,494] [INFO] [logging.py:96:log_dist] [Rank 0] step=7130, skipped=122, lr=[2.848478941146873e-07, 2.848478941146873e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7129|ppo_ep: 1|act_loss: 0.0390625|cri_loss: 0.0198974609375|unsuper_loss: 0.0 +average reward score: 5.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.23%) |Training time=0.49s (21.49%) |Others=0.10 (4.28%)|CurSamplesPerSec=14.00 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7130|ppo_ep: 1|act_loss: 0.009613037109375|cri_loss: 0.0063323974609375|unsuper_loss: 0.0 +average reward score: 5.91015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.81%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7131|ppo_ep: 1|act_loss: -0.021148681640625|cri_loss: -0.0103607177734375|unsuper_loss: 0.0 +average reward score: 5.30859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.47s (21.68%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7132|ppo_ep: 1|act_loss: 0.0086669921875|cri_loss: 0.00457000732421875|unsuper_loss: 0.0 +average reward score: 5.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.30%) |Training time=0.48s (22.07%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7133|ppo_ep: 1|act_loss: -0.0162811279296875|cri_loss: -0.00804901123046875|unsuper_loss: 0.0 +average reward score: 5.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.79%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7134|ppo_ep: 1|act_loss: 0.007770538330078125|cri_loss: 0.00411224365234375|unsuper_loss: 0.0 +average reward score: 5.90234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (21.93%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7135|ppo_ep: 1|act_loss: 0.03326416015625|cri_loss: 0.017120361328125|unsuper_loss: 0.0 +average reward score: 5.515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.63%) |Training time=0.47s (21.84%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7136|ppo_ep: 1|act_loss: -0.002288818359375|cri_loss: -0.00075531005859375|unsuper_loss: 0.0 +average reward score: 5.41796875 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.03%) |Training time=0.48s (20.65%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.65 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7137|ppo_ep: 1|act_loss: 0.00754547119140625|cri_loss: 0.003917694091796875|unsuper_loss: 0.0 +average reward score: 5.13671875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.18%) |Training time=0.49s (22.32%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7138|ppo_ep: 1|act_loss: 0.0069580078125|cri_loss: 0.004032135009765625|unsuper_loss: 0.0 +average reward score: 6.12109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.00%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55 +[2023-04-14 13:09:12,227] [INFO] [logging.py:96:log_dist] [Rank 0] step=7140, skipped=90, lr=[5.141492367321766e-07, 5.141492367321766e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:09:12,245] [INFO] [timer.py:199:stop] epoch=0/micro_step=7140/global_step=7140, RunningAvgSamplesPerSec=105.71016137653531, CurrSamplesPerSec=104.10979883555152, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:09:12,338] [INFO] [logging.py:96:log_dist] [Rank 0] step=7140, skipped=122, lr=[2.804024812490991e-07, 2.804024812490991e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7139|ppo_ep: 1|act_loss: 0.005550384521484375|cri_loss: 0.0030155181884765625|unsuper_loss: 0.0 +average reward score: 5.609375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.92%) |Training time=0.47s (21.58%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7140|ppo_ep: 1|act_loss: -0.011016845703125|cri_loss: -0.00543975830078125|unsuper_loss: 0.0 +average reward score: 6.203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.20%) |Training time=0.48s (22.18%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7141|ppo_ep: 1|act_loss: 0.0009465217590332031|cri_loss: 0.0014495849609375|unsuper_loss: 0.0 +average reward score: 5.08203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.32%) |Training time=0.48s (22.15%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7142|ppo_ep: 1|act_loss: -0.0117340087890625|cri_loss: -0.0057830810546875|unsuper_loss: 0.0 +average reward score: 5.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.16%) |Training time=0.48s (22.23%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7143|ppo_ep: 1|act_loss: -0.0034694671630859375|cri_loss: -0.0016698837280273438|unsuper_loss: 0.0 +average reward score: 5.84765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.11%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7144|ppo_ep: 1|act_loss: -0.0047454833984375|cri_loss: -0.0021305084228515625|unsuper_loss: 0.0 +average reward score: 5.3359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.21%) |Training time=0.48s (22.17%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7145|ppo_ep: 1|act_loss: -0.0008382797241210938|cri_loss: 0.00013303756713867188|unsuper_loss: 0.0 +average reward score: 5.53515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.20%) |Training time=0.48s (22.28%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7146|ppo_ep: 1|act_loss: 0.005992889404296875|cri_loss: 0.0034923553466796875|unsuper_loss: 0.0 +average reward score: 4.44921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.27%) |Training time=0.48s (22.11%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7147|ppo_ep: 1|act_loss: -0.013641357421875|cri_loss: -0.006656646728515625|unsuper_loss: 0.0 +average reward score: 5.35546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (22.09%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7148|ppo_ep: 1|act_loss: 0.071044921875|cri_loss: 0.0413818359375|unsuper_loss: 0.0 +average reward score: 5.31640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.46%) |Training time=0.47s (21.91%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55 +[2023-04-14 13:09:33,886] [INFO] [logging.py:96:log_dist] [Rank 0] step=7150, skipped=90, lr=[5.058371192118248e-07, 5.058371192118248e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:09:33,904] [INFO] [timer.py:199:stop] epoch=0/micro_step=7150/global_step=7150, RunningAvgSamplesPerSec=105.70305846331202, CurrSamplesPerSec=100.97480326808203, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:09:33,996] [INFO] [logging.py:96:log_dist] [Rank 0] step=7150, skipped=122, lr=[2.7598996817322614e-07, 2.7598996817322614e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7149|ppo_ep: 1|act_loss: -0.01239013671875|cri_loss: -0.00600433349609375|unsuper_loss: 0.0 +average reward score: 5.70703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (22.11%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7150|ppo_ep: 1|act_loss: 0.017181396484375|cri_loss: 0.00881195068359375|unsuper_loss: 0.0 +average reward score: 5.53515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.33%) |Training time=0.48s (22.06%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7151|ppo_ep: 1|act_loss: 0.00327301025390625|cri_loss: 0.00215911865234375|unsuper_loss: 0.0 +average reward score: 5.2734375 +------------------------------------------------------------------------------------- +|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.14%) |Training time=0.48s (20.35%) |Others=0.11 (4.51%)|CurSamplesPerSec=13.61 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7152|ppo_ep: 1|act_loss: -0.0084381103515625|cri_loss: -0.00391387939453125|unsuper_loss: 0.0 +average reward score: 5.6015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.04%) |Training time=0.49s (22.35%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7153|ppo_ep: 1|act_loss: 0.01629638671875|cri_loss: 0.009246826171875|unsuper_loss: 0.0 +average reward score: 5.73046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.62%) |Training time=0.47s (21.83%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7154|ppo_ep: 1|act_loss: 0.0115966796875|cri_loss: 0.006084442138671875|unsuper_loss: 0.0 +average reward score: 5.28125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.47s (21.91%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7155|ppo_ep: 1|act_loss: 0.0010461807250976562|cri_loss: 0.0006875991821289062|unsuper_loss: 0.0 +average reward score: 5.70703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.47%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7156|ppo_ep: 1|act_loss: 0.0706787109375|cri_loss: 0.04571533203125|unsuper_loss: 0.0 +average reward score: 5.87890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.47s (21.85%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7157|ppo_ep: 1|act_loss: 0.0011568069458007812|cri_loss: 0.0007429122924804688|unsuper_loss: 0.0 +average reward score: 5.8203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.62%) |Training time=0.47s (21.85%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7158|ppo_ep: 1|act_loss: -0.006542205810546875|cri_loss: -0.0032062530517578125|unsuper_loss: 0.0 +average reward score: 5.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.61%) |Training time=0.48s (21.80%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.55 +[2023-04-14 13:09:55,739] [INFO] [logging.py:96:log_dist] [Rank 0] step=7160, skipped=90, lr=[4.975890221047792e-07, 4.975890221047792e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:09:55,757] [INFO] [timer.py:199:stop] epoch=0/micro_step=7160/global_step=7160, RunningAvgSamplesPerSec=105.69891764973828, CurrSamplesPerSec=103.62230874288463, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:09:55,851] [INFO] [logging.py:96:log_dist] [Rank 0] step=7160, skipped=122, lr=[2.7161042029115706e-07, 2.7161042029115706e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7159|ppo_ep: 1|act_loss: 0.02386474609375|cri_loss: 0.012298583984375|unsuper_loss: 0.0 +average reward score: 5.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.81%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7160|ppo_ep: 1|act_loss: 0.00872802734375|cri_loss: 0.00505828857421875|unsuper_loss: 0.0 +average reward score: 5.12109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.49%) |Training time=0.47s (21.84%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7161|ppo_ep: 1|act_loss: -0.02520751953125|cri_loss: -0.00902557373046875|unsuper_loss: 0.0 +average reward score: 5.51171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.88%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7162|ppo_ep: 1|act_loss: -0.0174560546875|cri_loss: -0.0083770751953125|unsuper_loss: 0.0 +average reward score: 5.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.15%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7163|ppo_ep: 1|act_loss: -0.0013837814331054688|cri_loss: -0.0005974769592285156|unsuper_loss: 0.0 +average reward score: 5.96875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.69%) |Training time=0.47s (21.74%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7164|ppo_ep: 1|act_loss: -0.01415252685546875|cri_loss: -0.0066070556640625|unsuper_loss: 0.0 +average reward score: 4.609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.51%) |Training time=0.47s (21.84%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7165|ppo_ep: 1|act_loss: -0.0080413818359375|cri_loss: -0.00374603271484375|unsuper_loss: 0.0 +average reward score: 5.30859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.82%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7166|ppo_ep: 1|act_loss: -0.006404876708984375|cri_loss: -0.003139495849609375|unsuper_loss: 0.0 +average reward score: 4.86328125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.45%) |Training time=0.48s (21.94%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7167|ppo_ep: 1|act_loss: 0.0283203125|cri_loss: 0.0145416259765625|unsuper_loss: 0.0 +average reward score: 5.41015625 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.50%) |Training time=0.47s (20.24%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7168|ppo_ep: 1|act_loss: -0.0019178390502929688|cri_loss: -0.0005779266357421875|unsuper_loss: 0.0 +average reward score: 4.91015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.21%) |Training time=0.48s (22.18%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55 +[2023-04-14 13:10:17,524] [INFO] [logging.py:96:log_dist] [Rank 0] step=7170, skipped=90, lr=[4.894050676677376e-07, 4.894050676677376e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:10:17,543] [INFO] [timer.py:199:stop] epoch=0/micro_step=7170/global_step=7170, RunningAvgSamplesPerSec=105.69460992636105, CurrSamplesPerSec=101.85904479156943, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:10:17,635] [INFO] [logging.py:96:log_dist] [Rank 0] step=7170, skipped=122, lr=[2.672639025183571e-07, 2.672639025183571e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7169|ppo_ep: 1|act_loss: 0.0014104843139648438|cri_loss: 0.0010814666748046875|unsuper_loss: 0.0 +average reward score: 5.2109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.47%) |Training time=0.48s (22.02%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7170|ppo_ep: 1|act_loss: 0.01959228515625|cri_loss: 0.00992584228515625|unsuper_loss: 0.0 +average reward score: 5.48046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.89%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7171|ppo_ep: 1|act_loss: 0.027862548828125|cri_loss: 0.01568603515625|unsuper_loss: 0.0 +average reward score: 5.57421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.58%) |Training time=0.47s (21.86%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7172|ppo_ep: 1|act_loss: 0.0726318359375|cri_loss: 0.040069580078125|unsuper_loss: 0.0 +average reward score: 5.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.47%) |Training time=0.48s (21.94%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7173|ppo_ep: 1|act_loss: 0.045166015625|cri_loss: 0.02301025390625|unsuper_loss: 0.0 +average reward score: 5.625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.45%) |Training time=0.48s (22.01%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7174|ppo_ep: 1|act_loss: 0.004367828369140625|cri_loss: 0.0023746490478515625|unsuper_loss: 0.0 +average reward score: 5.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.43%) |Training time=0.47s (21.96%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7175|ppo_ep: 1|act_loss: 0.0097808837890625|cri_loss: 0.005435943603515625|unsuper_loss: 0.0 +average reward score: 6.01953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.79%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7176|ppo_ep: 1|act_loss: 0.0154571533203125|cri_loss: 0.00794219970703125|unsuper_loss: 0.0 +average reward score: 5.5859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.31%) |Training time=0.48s (22.07%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7177|ppo_ep: 1|act_loss: 0.004146575927734375|cri_loss: 0.00255584716796875|unsuper_loss: 0.0 +average reward score: 5.453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.88%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7178|ppo_ep: 1|act_loss: -0.03350830078125|cri_loss: -0.0157623291015625|unsuper_loss: 0.0 +average reward score: 4.9609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.47s (21.85%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.55 +[2023-04-14 13:10:39,135] [INFO] [logging.py:96:log_dist] [Rank 0] step=7180, skipped=90, lr=[4.812853772066454e-07, 4.812853772066454e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:10:39,153] [INFO] [timer.py:199:stop] epoch=0/micro_step=7180/global_step=7180, RunningAvgSamplesPerSec=105.69055579370145, CurrSamplesPerSec=102.88281263773706, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:10:39,246] [INFO] [logging.py:96:log_dist] [Rank 0] step=7180, skipped=122, lr=[2.629504792807036e-07, 2.629504792807036e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7179|ppo_ep: 1|act_loss: -0.004405975341796875|cri_loss: -0.0016450881958007812|unsuper_loss: 0.0 +average reward score: 4.6796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.63%) |Training time=0.47s (21.86%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7180|ppo_ep: 1|act_loss: -0.0251007080078125|cri_loss: -0.012298583984375|unsuper_loss: 0.0 +average reward score: 6.08203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.82%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7181|ppo_ep: 1|act_loss: 0.0293731689453125|cri_loss: 0.01560211181640625|unsuper_loss: 0.0 +average reward score: 5.6875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.76%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7182|ppo_ep: 1|act_loss: -0.002353668212890625|cri_loss: 0.0001239776611328125|unsuper_loss: 0.0 +average reward score: 5.7265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.47s (21.85%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7183|ppo_ep: 1|act_loss: 0.01068115234375|cri_loss: 0.00550079345703125|unsuper_loss: 0.0 +average reward score: 5.546875 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.54%) |Training time=0.47s (20.15%) |Others=0.10 (4.31%)|CurSamplesPerSec=13.75 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7184|ppo_ep: 1|act_loss: -0.0238189697265625|cri_loss: -0.01168060302734375|unsuper_loss: 0.0 +average reward score: 5.46484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.46%) |Training time=0.47s (21.92%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7185|ppo_ep: 1|act_loss: 0.01348114013671875|cri_loss: 0.0068206787109375|unsuper_loss: 0.0 +average reward score: 5.89453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.69%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7186|ppo_ep: 1|act_loss: 0.0394287109375|cri_loss: 0.0218963623046875|unsuper_loss: 0.0 +average reward score: 5.8359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.47s (21.92%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7187|ppo_ep: 1|act_loss: 0.002780914306640625|cri_loss: 0.0017032623291015625|unsuper_loss: 0.0 +average reward score: 6.01171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.68%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7188|ppo_ep: 1|act_loss: -0.01236724853515625|cri_loss: -0.006107330322265625|unsuper_loss: 0.0 +average reward score: 4.80859375 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.76%) |Training time=0.47s (20.81%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.17 |AvgSamplesPerSec=14.55 +[2023-04-14 13:11:01,012] [INFO] [logging.py:96:log_dist] [Rank 0] step=7190, skipped=90, lr=[4.732300710749039e-07, 4.732300710749039e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:11:01,030] [INFO] [timer.py:199:stop] epoch=0/micro_step=7190/global_step=7190, RunningAvgSamplesPerSec=105.68814051767909, CurrSamplesPerSec=104.36301713908928, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:11:01,123] [INFO] [logging.py:96:log_dist] [Rank 0] step=7190, skipped=122, lr=[2.586702145135353e-07, 2.586702145135353e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7189|ppo_ep: 1|act_loss: -0.003192901611328125|cri_loss: -0.0009927749633789062|unsuper_loss: 0.0 +average reward score: 5.484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.78%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7190|ppo_ep: 1|act_loss: -0.00814056396484375|cri_loss: -0.003192901611328125|unsuper_loss: 0.0 +average reward score: 5.64453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.32%) |Training time=0.48s (22.04%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7191|ppo_ep: 1|act_loss: -0.01434326171875|cri_loss: -0.0070953369140625|unsuper_loss: 0.0 +average reward score: 5.34375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.78%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7192|ppo_ep: 1|act_loss: -0.025177001953125|cri_loss: -0.01215362548828125|unsuper_loss: 0.0 +average reward score: 6.16015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.39%) |Training time=0.47s (21.98%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7193|ppo_ep: 1|act_loss: -0.01399993896484375|cri_loss: -0.00669097900390625|unsuper_loss: 0.0 +average reward score: 5.91796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.48s (22.02%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7194|ppo_ep: 1|act_loss: -0.01259613037109375|cri_loss: -0.0061798095703125|unsuper_loss: 0.0 +average reward score: 5.296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.71%) |Training time=0.47s (21.68%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7195|ppo_ep: 1|act_loss: -0.02081298828125|cri_loss: -0.01021575927734375|unsuper_loss: 0.0 +average reward score: 5.09765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.47s (21.95%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7196|ppo_ep: 1|act_loss: 0.01335906982421875|cri_loss: 0.00733184814453125|unsuper_loss: 0.0 +average reward score: 5.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.21%) |Training time=0.48s (22.16%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7197|ppo_ep: 1|act_loss: 0.0306243896484375|cri_loss: 0.01568603515625|unsuper_loss: 0.0 +average reward score: 5.125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.71%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7198|ppo_ep: 1|act_loss: 0.00463104248046875|cri_loss: 0.002452850341796875|unsuper_loss: 0.0 +average reward score: 5.73046875 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.18%) |Training time=0.48s (20.49%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.71 |AvgSamplesPerSec=14.55 +[2023-04-14 13:11:22,828] [INFO] [logging.py:96:log_dist] [Rank 0] step=7200, skipped=90, lr=[4.6523926867158245e-07, 4.6523926867158245e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:11:22,846] [INFO] [timer.py:199:stop] epoch=0/micro_step=7200/global_step=7200, RunningAvgSamplesPerSec=105.68242647988721, CurrSamplesPerSec=93.17142805575108, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:11:22,939] [INFO] [logging.py:96:log_dist] [Rank 0] step=7200, skipped=122, lr=[2.544231716607015e-07, 2.544231716607015e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7199|ppo_ep: 1|act_loss: 0.0150604248046875|cri_loss: 0.008148193359375|unsuper_loss: 0.0 +average reward score: 5.79296875 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.32%) |Training time=0.51s (23.00%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.55 +[2023-04-14 13:11:24,992] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 7200|ppo_ep: 1|act_loss: 0.0025997161865234375|cri_loss: 0.0015277862548828125|unsuper_loss: 0.0 +average reward score: 5.96484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.66s (77.08%) |Training time=0.39s (18.09%) |Others=0.10 (4.83%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7201|ppo_ep: 1|act_loss: 0.027587890625|cri_loss: 0.01409149169921875|unsuper_loss: 0.0 +average reward score: 5.02734375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.44%) |Training time=0.42s (18.98%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7202|ppo_ep: 1|act_loss: 0.0214385986328125|cri_loss: 0.01111602783203125|unsuper_loss: 0.0 +average reward score: 5.1171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.24%) |Training time=0.46s (21.11%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7203|ppo_ep: 1|act_loss: -0.0129547119140625|cri_loss: -0.00603485107421875|unsuper_loss: 0.0 +average reward score: 5.18359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.12%) |Training time=0.46s (21.23%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7204|ppo_ep: 1|act_loss: 0.003009796142578125|cri_loss: 0.0017175674438476562|unsuper_loss: 0.0 +average reward score: 6.03125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.92%) |Training time=0.47s (21.44%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7205|ppo_ep: 1|act_loss: 0.001621246337890625|cri_loss: 0.001251220703125|unsuper_loss: 0.0 +average reward score: 5.05078125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.14%) |Training time=0.46s (21.05%) |Others=0.10 (4.81%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7206|ppo_ep: 1|act_loss: 0.001617431640625|cri_loss: 0.0009260177612304688|unsuper_loss: 0.0 +average reward score: 5.40234375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.81%) |Training time=0.45s (20.44%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7207|ppo_ep: 1|act_loss: -3.62396240234375e-05|cri_loss: 0.0010194778442382812|unsuper_loss: 0.0 +average reward score: 5.8046875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.40%) |Training time=0.46s (20.93%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7208|ppo_ep: 1|act_loss: 0.013427734375|cri_loss: 0.007190704345703125|unsuper_loss: 0.0 +average reward score: 5.234375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.77%) |Training time=0.50s (22.64%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.55 +[2023-04-14 13:11:44,585] [INFO] [logging.py:96:log_dist] [Rank 0] step=7210, skipped=91, lr=[4.581027951092893e-07, 4.581027951092893e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:11:44,603] [INFO] [timer.py:199:stop] epoch=0/micro_step=7210/global_step=7210, RunningAvgSamplesPerSec=105.68967135042475, CurrSamplesPerSec=108.45140855085393, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:11:44,687] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +[2023-04-14 13:11:44,687] [INFO] [logging.py:96:log_dist] [Rank 0] step=7210, skipped=123, lr=[2.506292898694468e-07, 2.506292898694468e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7209|ppo_ep: 1|act_loss: 0.027099609375|cri_loss: 0.014129638671875|unsuper_loss: 0.0 +average reward score: 6.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.59%) |Training time=0.46s (21.25%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55 +[2023-04-14 13:11:46,852] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 7210|ppo_ep: 1|act_loss: 0.01174163818359375|cri_loss: 0.005977630615234375|unsuper_loss: 0.0 +average reward score: 5.55078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.51%) |Training time=0.48s (22.33%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7211|ppo_ep: 1|act_loss: -0.00045490264892578125|cri_loss: -0.00011813640594482422|unsuper_loss: 0.0 +average reward score: 5.1953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.12%) |Training time=0.48s (22.27%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7212|ppo_ep: 1|act_loss: 0.012725830078125|cri_loss: 0.00652313232421875|unsuper_loss: 0.0 +average reward score: 5.8203125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.02%) |Training time=0.49s (22.25%) |Others=0.10 (4.73%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7213|ppo_ep: 1|act_loss: -0.0278472900390625|cri_loss: -0.01361083984375|unsuper_loss: 0.0 +average reward score: 5.76953125 +------------------------------------------------------------------------------------- +|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.15%) |Training time=0.49s (20.57%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.51 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7214|ppo_ep: 1|act_loss: 0.004974365234375|cri_loss: 0.0028705596923828125|unsuper_loss: 0.0 +average reward score: 6.58984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.47%) |Training time=0.48s (21.91%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55 +[2023-04-14 13:11:57,807] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768, reducing to 16384 +epoch: 0|step: 7215|ppo_ep: 1|act_loss: -0.00435638427734375|cri_loss: -0.0017786026000976562|unsuper_loss: 0.0 +average reward score: 4.76171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.43%) |Training time=0.45s (20.89%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7216|ppo_ep: 1|act_loss: -0.0201416015625|cri_loss: -0.00984954833984375|unsuper_loss: 0.0 +average reward score: 4.91796875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.40%) |Training time=0.48s (21.79%) |Others=0.11 (4.82%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7217|ppo_ep: 1|act_loss: 0.019775390625|cri_loss: 0.0100860595703125|unsuper_loss: 0.0 +average reward score: 4.90234375 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.20%) |Training time=0.49s (22.23%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7218|ppo_ep: 1|act_loss: 0.0010919570922851562|cri_loss: 0.0005993843078613281|unsuper_loss: 0.0 +average reward score: 5.8125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.75%) |Training time=0.49s (22.66%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.55 +[2023-04-14 13:12:06,535] [INFO] [logging.py:96:log_dist] [Rank 0] step=7220, skipped=92, lr=[4.5101875119972634e-07, 4.5101875119972634e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:12:06,553] [INFO] [timer.py:199:stop] epoch=0/micro_step=7220/global_step=7220, RunningAvgSamplesPerSec=105.682454829117, CurrSamplesPerSec=98.92933678188963, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:12:06,646] [INFO] [logging.py:96:log_dist] [Rank 0] step=7220, skipped=124, lr=[2.4686241437572036e-07, 2.4686241437572036e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7219|ppo_ep: 1|act_loss: -0.0048065185546875|cri_loss: -0.002262115478515625|unsuper_loss: 0.0 +average reward score: 5.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.97%) |Training time=0.49s (22.40%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7220|ppo_ep: 1|act_loss: 0.0068359375|cri_loss: 0.00359344482421875|unsuper_loss: 0.0 +average reward score: 5.58984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.69%) |Training time=0.49s (22.68%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7221|ppo_ep: 1|act_loss: -0.020599365234375|cri_loss: -0.0101776123046875|unsuper_loss: 0.0 +average reward score: 5.59765625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.79%) |Training time=0.49s (22.58%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7222|ppo_ep: 1|act_loss: 0.01129150390625|cri_loss: 0.00586700439453125|unsuper_loss: 0.0 +average reward score: 4.8984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.69%) |Training time=0.49s (22.70%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7223|ppo_ep: 1|act_loss: -0.0013971328735351562|cri_loss: -0.0006508827209472656|unsuper_loss: 0.0 +average reward score: 5.69921875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.84%) |Training time=0.49s (22.56%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7224|ppo_ep: 1|act_loss: -0.02392578125|cri_loss: -0.01168060302734375|unsuper_loss: 0.0 +average reward score: 5.28125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.81%) |Training time=0.49s (22.58%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7225|ppo_ep: 1|act_loss: 0.01824951171875|cri_loss: 0.0111083984375|unsuper_loss: 0.0 +average reward score: 5.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.67%) |Training time=0.49s (22.73%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7226|ppo_ep: 1|act_loss: -0.014801025390625|cri_loss: -0.007198333740234375|unsuper_loss: 0.0 +average reward score: 5.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.80%) |Training time=0.49s (22.57%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7227|ppo_ep: 1|act_loss: 0.03509521484375|cri_loss: 0.018218994140625|unsuper_loss: 0.0 +average reward score: 6.125 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.58s (71.06%) |Training time=0.49s (22.03%) |Others=0.15 (6.91%)|CurSamplesPerSec=14.36 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7228|ppo_ep: 1|act_loss: 0.00925445556640625|cri_loss: 0.004772186279296875|unsuper_loss: 0.0 +average reward score: 5.328125 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.62s (72.92%) |Training time=0.50s (22.49%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.44 |AvgSamplesPerSec=14.55 +[2023-04-14 13:12:28,345] [INFO] [logging.py:96:log_dist] [Rank 0] step=7230, skipped=92, lr=[4.4320918629398245e-07, 4.4320918629398245e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:12:28,364] [INFO] [timer.py:199:stop] epoch=0/micro_step=7230/global_step=7230, RunningAvgSamplesPerSec=105.67111864118128, CurrSamplesPerSec=109.30850571717106, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:12:28,456] [INFO] [logging.py:96:log_dist] [Rank 0] step=7230, skipped=124, lr=[2.427087262091782e-07, 2.427087262091782e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7229|ppo_ep: 1|act_loss: 0.0153045654296875|cri_loss: 0.008270263671875|unsuper_loss: 0.0 +average reward score: 5.546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.46s (21.19%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7230|ppo_ep: 1|act_loss: -0.0004405975341796875|cri_loss: -7.367134094238281e-05|unsuper_loss: 0.0 +average reward score: 4.96484375 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.26%) |Training time=0.54s (24.25%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.34 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7231|ppo_ep: 1|act_loss: -0.0009431838989257812|cri_loss: -0.00023365020751953125|unsuper_loss: 0.0 +average reward score: 5.45703125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.12%) |Training time=0.49s (22.26%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7232|ppo_ep: 1|act_loss: 0.016845703125|cri_loss: 0.0087432861328125|unsuper_loss: 0.0 +average reward score: 4.96484375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.99%) |Training time=0.49s (22.35%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7233|ppo_ep: 1|act_loss: 0.03729248046875|cri_loss: 0.019073486328125|unsuper_loss: 0.0 +average reward score: 5.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.83%) |Training time=0.49s (22.58%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7234|ppo_ep: 1|act_loss: 0.01044464111328125|cri_loss: 0.005260467529296875|unsuper_loss: 0.0 +average reward score: 5.5234375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.05%) |Training time=0.49s (22.35%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7235|ppo_ep: 1|act_loss: -0.01177978515625|cri_loss: -0.00554656982421875|unsuper_loss: 0.0 +average reward score: 5.5703125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.01%) |Training time=0.49s (22.40%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7236|ppo_ep: 1|act_loss: 0.01081085205078125|cri_loss: 0.005710601806640625|unsuper_loss: 0.0 +average reward score: 5.66796875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.12%) |Training time=0.49s (22.27%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7237|ppo_ep: 1|act_loss: 0.0028858184814453125|cri_loss: 0.0016603469848632812|unsuper_loss: 0.0 +average reward score: 5.41796875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.94%) |Training time=0.49s (22.48%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7238|ppo_ep: 1|act_loss: -0.0245361328125|cri_loss: -0.011871337890625|unsuper_loss: 0.0 +average reward score: 4.9375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.90%) |Training time=0.49s (22.52%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.55 +[2023-04-14 13:12:50,207] [INFO] [logging.py:96:log_dist] [Rank 0] step=7240, skipped=92, lr=[4.354645700985926e-07, 4.354645700985926e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:12:50,225] [INFO] [timer.py:199:stop] epoch=0/micro_step=7240/global_step=7240, RunningAvgSamplesPerSec=105.65752762833793, CurrSamplesPerSec=98.45495143921833, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:12:50,318] [INFO] [logging.py:96:log_dist] [Rank 0] step=7240, skipped=124, lr=[2.385884965447316e-07, 2.385884965447316e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7239|ppo_ep: 1|act_loss: -0.0081329345703125|cri_loss: -0.003597259521484375|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.95%) |Training time=0.49s (22.46%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7240|ppo_ep: 1|act_loss: -0.013763427734375|cri_loss: -0.0066986083984375|unsuper_loss: 0.0 +average reward score: 5.3125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.73%) |Training time=0.50s (22.68%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7241|ppo_ep: 1|act_loss: -0.0121917724609375|cri_loss: -0.00574493408203125|unsuper_loss: 0.0 +average reward score: 5.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.02%) |Training time=0.49s (22.38%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7242|ppo_ep: 1|act_loss: -0.025238037109375|cri_loss: -0.0124053955078125|unsuper_loss: 0.0 +average reward score: 5.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.59%) |Training time=0.50s (22.81%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7243|ppo_ep: 1|act_loss: 0.00975799560546875|cri_loss: 0.00499725341796875|unsuper_loss: 0.0 +average reward score: 5.484375 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.77s (74.78%) |Training time=0.49s (20.94%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.54 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7244|ppo_ep: 1|act_loss: -0.0159912109375|cri_loss: -0.007350921630859375|unsuper_loss: 0.0 +average reward score: 5.609375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.83%) |Training time=0.49s (22.56%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7245|ppo_ep: 1|act_loss: -0.001018524169921875|cri_loss: -0.00044727325439453125|unsuper_loss: 0.0 +average reward score: 5.296875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.48s (22.30%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7246|ppo_ep: 1|act_loss: 0.001873016357421875|cri_loss: 0.0011262893676757812|unsuper_loss: 0.0 +average reward score: 5.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.24%) |Training time=0.49s (22.23%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7247|ppo_ep: 1|act_loss: -0.004680633544921875|cri_loss: -0.0019683837890625|unsuper_loss: 0.0 +average reward score: 5.6328125 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.17%) |Training time=0.49s (21.44%) |Others=0.10 (4.39%)|CurSamplesPerSec=14.06 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7248|ppo_ep: 1|act_loss: 0.002910614013671875|cri_loss: 0.0017080307006835938|unsuper_loss: 0.0 +average reward score: 5.36328125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.75%) |Training time=0.49s (22.66%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.55 +[2023-04-14 13:13:12,316] [INFO] [logging.py:96:log_dist] [Rank 0] step=7250, skipped=92, lr=[4.2778501740745257e-07, 4.2778501740745257e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:13:12,335] [INFO] [timer.py:199:stop] epoch=0/micro_step=7250/global_step=7250, RunningAvgSamplesPerSec=105.6449480402019, CurrSamplesPerSec=97.82626105957648, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:13:12,427] [INFO] [logging.py:96:log_dist] [Rank 0] step=7250, skipped=124, lr=[2.3450178645412518e-07, 2.3450178645412518e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7249|ppo_ep: 1|act_loss: 0.002838134765625|cri_loss: 0.00189971923828125|unsuper_loss: 0.0 +average reward score: 5.125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.08%) |Training time=0.49s (22.39%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7250|ppo_ep: 1|act_loss: -0.0059967041015625|cri_loss: -0.0029048919677734375|unsuper_loss: 0.0 +average reward score: 5.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.90%) |Training time=0.49s (22.46%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7251|ppo_ep: 1|act_loss: -0.0037975311279296875|cri_loss: -0.001712799072265625|unsuper_loss: 0.0 +average reward score: 5.0078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.96%) |Training time=0.49s (22.45%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7252|ppo_ep: 1|act_loss: -0.0023326873779296875|cri_loss: -0.0010633468627929688|unsuper_loss: 0.0 +average reward score: 5.73046875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.61%) |Training time=0.50s (22.81%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7253|ppo_ep: 1|act_loss: 0.0113525390625|cri_loss: 0.00598907470703125|unsuper_loss: 0.0 +average reward score: 5.07421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.90%) |Training time=0.49s (22.49%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7254|ppo_ep: 1|act_loss: -0.0007228851318359375|cri_loss: 3.24249267578125e-05|unsuper_loss: 0.0 +average reward score: 5.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.81%) |Training time=0.49s (22.59%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7255|ppo_ep: 1|act_loss: -0.0020904541015625|cri_loss: -0.0009593963623046875|unsuper_loss: 0.0 +average reward score: 5.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.64%) |Training time=0.50s (22.79%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.55 +[2023-04-14 13:13:27,520] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192 +epoch: 0|step: 7256|ppo_ep: 1|act_loss: 0.0012178421020507812|cri_loss: 0.0009031295776367188|unsuper_loss: 0.0 +average reward score: 5.86328125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.91%) |Training time=0.46s (21.43%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7257|ppo_ep: 1|act_loss: -0.0136871337890625|cri_loss: -0.006656646728515625|unsuper_loss: 0.0 +average reward score: 5.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.86%) |Training time=0.49s (22.54%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7258|ppo_ep: 1|act_loss: 0.031036376953125|cri_loss: 0.01690673828125|unsuper_loss: 0.0 +average reward score: 5.671875 +------------------------------------------------------------------------------------- +|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.19%) |Training time=0.48s (20.51%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.63 |AvgSamplesPerSec=14.55 +[2023-04-14 13:13:34,199] [INFO] [logging.py:96:log_dist] [Rank 0] step=7260, skipped=93, lr=[4.209291433815917e-07, 4.209291433815917e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:13:34,218] [INFO] [timer.py:199:stop] epoch=0/micro_step=7260/global_step=7260, RunningAvgSamplesPerSec=105.63492765670183, CurrSamplesPerSec=99.31827821724299, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:13:34,310] [INFO] [logging.py:96:log_dist] [Rank 0] step=7260, skipped=124, lr=[2.3044865651226017e-07, 2.3044865651226017e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7259|ppo_ep: 1|act_loss: -0.0121307373046875|cri_loss: -0.00601959228515625|unsuper_loss: 0.0 +average reward score: 5.58203125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.16%) |Training time=0.48s (22.25%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7260|ppo_ep: 1|act_loss: 0.001026153564453125|cri_loss: 0.0007610321044921875|unsuper_loss: 0.0 +average reward score: 5.36328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.69%) |Training time=0.47s (21.71%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7261|ppo_ep: 1|act_loss: -0.027099609375|cri_loss: -0.013214111328125|unsuper_loss: 0.0 +average reward score: 5.8046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.76%) |Training time=0.47s (21.61%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7262|ppo_ep: 1|act_loss: -0.0026416778564453125|cri_loss: -0.00098419189453125|unsuper_loss: 0.0 +average reward score: 5.08203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.63%) |Training time=0.47s (21.76%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7263|ppo_ep: 1|act_loss: -0.000705718994140625|cri_loss: -0.0001647472381591797|unsuper_loss: 0.0 +average reward score: 6.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.30%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7264|ppo_ep: 1|act_loss: 0.00021183490753173828|cri_loss: 0.0003452301025390625|unsuper_loss: 0.0 +average reward score: 5.1875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (21.97%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7265|ppo_ep: 1|act_loss: 0.020904541015625|cri_loss: 0.01153564453125|unsuper_loss: 0.0 +average reward score: 5.17578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.70%) |Training time=0.47s (21.65%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7266|ppo_ep: 1|act_loss: 0.02044677734375|cri_loss: 0.01055145263671875|unsuper_loss: 0.0 +average reward score: 4.921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.75%) |Training time=0.47s (21.63%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7267|ppo_ep: 1|act_loss: -0.02435302734375|cri_loss: -0.011871337890625|unsuper_loss: 0.0 +average reward score: 5.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.86%) |Training time=0.47s (21.46%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7268|ppo_ep: 1|act_loss: -0.0102081298828125|cri_loss: -0.004817962646484375|unsuper_loss: 0.0 +average reward score: 5.4296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.74%) |Training time=0.47s (21.65%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.55 +[2023-04-14 13:13:55,869] [INFO] [logging.py:96:log_dist] [Rank 0] step=7270, skipped=93, lr=[4.133735241503523e-07, 4.133735241503523e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:13:55,887] [INFO] [timer.py:199:stop] epoch=0/micro_step=7270/global_step=7270, RunningAvgSamplesPerSec=105.63331027433297, CurrSamplesPerSec=105.13135218502553, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:13:55,980] [INFO] [logging.py:96:log_dist] [Rank 0] step=7270, skipped=124, lr=[2.2642916679630006e-07, 2.2642916679630006e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7269|ppo_ep: 1|act_loss: 0.02801513671875|cri_loss: 0.01450347900390625|unsuper_loss: 0.0 +average reward score: 5.1640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.57%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55 +[2023-04-14 13:13:58,022] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096 +epoch: 0|step: 7270|ppo_ep: 1|act_loss: 0.0019197463989257812|cri_loss: 0.0013561248779296875|unsuper_loss: 0.0 +average reward score: 5.03125 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.71%) |Training time=0.44s (20.64%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7271|ppo_ep: 1|act_loss: 0.00724029541015625|cri_loss: 0.0037212371826171875|unsuper_loss: 0.0 +average reward score: 5.23828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.80%) |Training time=0.47s (21.58%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7272|ppo_ep: 1|act_loss: -0.00864410400390625|cri_loss: -0.004055023193359375|unsuper_loss: 0.0 +average reward score: 5.3125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.91%) |Training time=0.46s (21.45%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7273|ppo_ep: 1|act_loss: -0.0122833251953125|cri_loss: -0.00557708740234375|unsuper_loss: 0.0 +average reward score: 5.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.70%) |Training time=0.47s (19.98%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.72 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7274|ppo_ep: 1|act_loss: -0.02825927734375|cri_loss: -0.013885498046875|unsuper_loss: 0.0 +average reward score: 5.828125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.78%) |Training time=0.47s (21.63%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7275|ppo_ep: 1|act_loss: -0.00447845458984375|cri_loss: -0.0021266937255859375|unsuper_loss: 0.0 +average reward score: 5.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.87%) |Training time=0.47s (21.52%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7276|ppo_ep: 1|act_loss: 0.0157470703125|cri_loss: 0.00909423828125|unsuper_loss: 0.0 +average reward score: 5.17578125 +------------------------------------------------------------------------------------- +|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.23%) |Training time=0.47s (20.37%) |Others=0.10 (4.40%)|CurSamplesPerSec=14.00 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7277|ppo_ep: 1|act_loss: 0.117919921875|cri_loss: 0.06317138671875|unsuper_loss: 0.0 +average reward score: 5.16015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.51%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7278|ppo_ep: 1|act_loss: 0.01934814453125|cri_loss: 0.00983428955078125|unsuper_loss: 0.0 +average reward score: 5.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.69%) |Training time=0.47s (21.71%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55 +[2023-04-14 13:14:17,796] [INFO] [logging.py:96:log_dist] [Rank 0] step=7280, skipped=94, lr=[4.066293729300195e-07, 4.066293729300195e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:14:17,815] [INFO] [timer.py:199:stop] epoch=0/micro_step=7280/global_step=7280, RunningAvgSamplesPerSec=105.63432861140478, CurrSamplesPerSec=106.89784560000638, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:14:17,907] [INFO] [logging.py:96:log_dist] [Rank 0] step=7280, skipped=124, lr=[2.224433768847789e-07, 2.224433768847789e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7279|ppo_ep: 1|act_loss: 0.02032470703125|cri_loss: 0.01061248779296875|unsuper_loss: 0.0 +average reward score: 5.44140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.35%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7280|ppo_ep: 1|act_loss: 0.0042724609375|cri_loss: 0.00223541259765625|unsuper_loss: 0.0 +average reward score: 4.66015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.79%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7281|ppo_ep: 1|act_loss: -0.0147247314453125|cri_loss: -0.00708770751953125|unsuper_loss: 0.0 +average reward score: 5.74609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.85%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7282|ppo_ep: 1|act_loss: -0.009124755859375|cri_loss: -0.00443267822265625|unsuper_loss: 0.0 +average reward score: 4.9921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.76%) |Training time=0.47s (21.61%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7283|ppo_ep: 1|act_loss: 0.004364013671875|cri_loss: 0.0024471282958984375|unsuper_loss: 0.0 +average reward score: 4.83984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.73%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7284|ppo_ep: 1|act_loss: 0.001445770263671875|cri_loss: 0.00115203857421875|unsuper_loss: 0.0 +average reward score: 4.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (21.96%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7285|ppo_ep: 1|act_loss: 0.00766754150390625|cri_loss: 0.004055023193359375|unsuper_loss: 0.0 +average reward score: 5.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.93%) |Training time=0.46s (21.45%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7286|ppo_ep: 1|act_loss: -0.00945281982421875|cri_loss: -0.004619598388671875|unsuper_loss: 0.0 +average reward score: 4.65234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.86%) |Training time=0.47s (21.51%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7287|ppo_ep: 1|act_loss: -0.0136871337890625|cri_loss: -0.006744384765625|unsuper_loss: 0.0 +average reward score: 4.6875 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.60s (70.62%) |Training time=0.48s (21.03%) |Others=0.19 (8.35%)|CurSamplesPerSec=14.16 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7288|ppo_ep: 1|act_loss: -0.0018205642700195312|cri_loss: -0.0007228851318359375|unsuper_loss: 0.0 +average reward score: 5.4921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.86%) |Training time=0.46s (21.35%) |Others=0.10 (4.78%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.55 +[2023-04-14 13:14:39,554] [INFO] [logging.py:96:log_dist] [Rank 0] step=7290, skipped=94, lr=[3.9919809145031695e-07, 3.9919809145031695e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:14:39,573] [INFO] [timer.py:199:stop] epoch=0/micro_step=7290/global_step=7290, RunningAvgSamplesPerSec=105.63344822690242, CurrSamplesPerSec=110.83196229238267, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:14:39,665] [INFO] [logging.py:96:log_dist] [Rank 0] step=7290, skipped=124, lr=[2.1849134585671922e-07, 2.1849134585671922e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7289|ppo_ep: 1|act_loss: -0.03265380859375|cri_loss: -0.01580810546875|unsuper_loss: 0.0 +average reward score: 5.48046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.50%) |Training time=0.45s (20.88%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7290|ppo_ep: 1|act_loss: -0.0302276611328125|cri_loss: -0.014495849609375|unsuper_loss: 0.0 +average reward score: 4.96484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.07%) |Training time=0.46s (21.32%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7291|ppo_ep: 1|act_loss: 0.008209228515625|cri_loss: 0.004390716552734375|unsuper_loss: 0.0 +average reward score: 5.71484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.82%) |Training time=0.47s (21.54%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7292|ppo_ep: 1|act_loss: 0.00888824462890625|cri_loss: 0.00460052490234375|unsuper_loss: 0.0 +average reward score: 4.99609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.08%) |Training time=0.46s (21.28%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7293|ppo_ep: 1|act_loss: 0.02685546875|cri_loss: 0.01367950439453125|unsuper_loss: 0.0 +average reward score: 5.1015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.86%) |Training time=0.47s (21.52%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7294|ppo_ep: 1|act_loss: 0.0189208984375|cri_loss: 0.00954437255859375|unsuper_loss: 0.0 +average reward score: 5.55078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.86%) |Training time=0.47s (21.52%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7295|ppo_ep: 1|act_loss: 0.01593017578125|cri_loss: 0.00836181640625|unsuper_loss: 0.0 +average reward score: 5.109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.14%) |Training time=0.46s (21.22%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7296|ppo_ep: 1|act_loss: 0.02227783203125|cri_loss: 0.0115814208984375|unsuper_loss: 0.0 +average reward score: 5.1171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.69%) |Training time=0.47s (21.71%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7297|ppo_ep: 1|act_loss: -0.00872802734375|cri_loss: -0.003902435302734375|unsuper_loss: 0.0 +average reward score: 5.80078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.39%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7298|ppo_ep: 1|act_loss: 0.0307159423828125|cri_loss: 0.015655517578125|unsuper_loss: 0.0 +average reward score: 4.703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.97%) |Training time=0.49s (22.49%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55 +[2023-04-14 13:15:01,191] [INFO] [logging.py:96:log_dist] [Rank 0] step=7300, skipped=94, lr=[3.918324110315662e-07, 3.918324110315662e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:15:01,209] [INFO] [timer.py:199:stop] epoch=0/micro_step=7300/global_step=7300, RunningAvgSamplesPerSec=105.63232889516885, CurrSamplesPerSec=100.18558607917797, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:15:01,302] [INFO] [logging.py:96:log_dist] [Rank 0] step=7300, skipped=124, lr=[2.1457313229075322e-07, 2.1457313229075322e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7299|ppo_ep: 1|act_loss: -0.0007047653198242188|cri_loss: 0.00031280517578125|unsuper_loss: 0.0 +average reward score: 6.32421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.31%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7300|ppo_ep: 1|act_loss: 0.00514984130859375|cri_loss: 0.00290679931640625|unsuper_loss: 0.0 +average reward score: 4.609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.91%) |Training time=0.49s (22.47%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7301|ppo_ep: 1|act_loss: 0.013763427734375|cri_loss: 0.007228851318359375|unsuper_loss: 0.0 +average reward score: 5.28515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.49s (22.46%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7302|ppo_ep: 1|act_loss: 0.002460479736328125|cri_loss: 0.0013637542724609375|unsuper_loss: 0.0 +average reward score: 5.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.61s (70.38%) |Training time=0.57s (24.93%) |Others=0.11 (4.69%)|CurSamplesPerSec=14.01 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7303|ppo_ep: 1|act_loss: -0.0006494522094726562|cri_loss: -0.0001862049102783203|unsuper_loss: 0.0 +average reward score: 5.1171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.90%) |Training time=0.49s (22.58%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7304|ppo_ep: 1|act_loss: -0.025909423828125|cri_loss: -0.01270294189453125|unsuper_loss: 0.0 +average reward score: 5.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.96%) |Training time=0.47s (21.45%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7305|ppo_ep: 1|act_loss: -0.0158233642578125|cri_loss: -0.00772857666015625|unsuper_loss: 0.0 +average reward score: 6.1640625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.72%) |Training time=0.48s (21.80%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.56 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7306|ppo_ep: 1|act_loss: 0.0014791488647460938|cri_loss: 0.0011234283447265625|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.68s (73.96%) |Training time=0.49s (21.66%) |Others=0.10 (4.39%)|CurSamplesPerSec=14.10 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7307|ppo_ep: 1|act_loss: 0.01947021484375|cri_loss: 0.01013946533203125|unsuper_loss: 0.0 +average reward score: 5.53515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.49s (22.42%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7308|ppo_ep: 1|act_loss: 0.00453948974609375|cri_loss: 0.002384185791015625|unsuper_loss: 0.0 +average reward score: 5.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.78%) |Training time=0.49s (22.61%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.55 +[2023-04-14 13:15:23,160] [INFO] [logging.py:96:log_dist] [Rank 0] step=7310, skipped=94, lr=[3.8453244085091747e-07, 3.8453244085091747e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:15:23,179] [INFO] [timer.py:199:stop] epoch=0/micro_step=7310/global_step=7310, RunningAvgSamplesPerSec=105.62016882536805, CurrSamplesPerSec=98.96179635438095, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:15:23,271] [INFO] [logging.py:96:log_dist] [Rank 0] step=7310, skipped=124, lr=[2.106887942642588e-07, 2.106887942642588e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7309|ppo_ep: 1|act_loss: 0.0206451416015625|cri_loss: 0.01088714599609375|unsuper_loss: 0.0 +average reward score: 5.9765625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.27%) |Training time=0.49s (22.26%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7310|ppo_ep: 1|act_loss: -0.018463134765625|cri_loss: -0.0088043212890625|unsuper_loss: 0.0 +average reward score: 4.9921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.86%) |Training time=0.49s (22.54%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55 +[2023-04-14 13:15:27,598] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 7311|ppo_ep: 1|act_loss: 0.0302734375|cri_loss: 0.0158538818359375|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.29%) |Training time=0.49s (22.66%) |Others=0.09 (4.05%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.55 +[2023-04-14 13:15:29,755] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 7312|ppo_ep: 1|act_loss: -0.0013437271118164062|cri_loss: -0.0005731582641601562|unsuper_loss: 0.0 +average reward score: 5.6796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.17%) |Training time=0.49s (22.71%) |Others=0.09 (4.12%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7313|ppo_ep: 1|act_loss: -0.0218505859375|cri_loss: -0.01042938232421875|unsuper_loss: 0.0 +average reward score: 5.234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.81%) |Training time=0.49s (22.67%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7314|ppo_ep: 1|act_loss: 0.013671875|cri_loss: 0.007049560546875|unsuper_loss: 0.0 +average reward score: 5.5234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.95%) |Training time=0.49s (22.42%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7315|ppo_ep: 1|act_loss: -0.041412353515625|cri_loss: -0.0198974609375|unsuper_loss: 0.0 +average reward score: 5.6953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.49s (22.47%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7316|ppo_ep: 1|act_loss: -0.01434326171875|cri_loss: -0.00691986083984375|unsuper_loss: 0.0 +average reward score: 5.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.91%) |Training time=0.49s (22.48%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7317|ppo_ep: 1|act_loss: 0.0110321044921875|cri_loss: 0.0056610107421875|unsuper_loss: 0.0 +average reward score: 5.3828125 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.17%) |Training time=0.48s (20.63%) |Others=0.10 (4.19%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7318|ppo_ep: 1|act_loss: -0.0601806640625|cri_loss: -0.00604248046875|unsuper_loss: 0.0 +average reward score: 5.953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.23%) |Training time=0.48s (22.16%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55 +[2023-04-14 13:15:45,007] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 4096, reducing to 2048 +[2023-04-14 13:15:45,008] [INFO] [logging.py:96:log_dist] [Rank 0] step=7320, skipped=95, lr=[3.78018739391981e-07, 3.78018739391981e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:15:45,008] [INFO] [timer.py:199:stop] epoch=0/micro_step=7320/global_step=7320, RunningAvgSamplesPerSec=105.61084278644113, CurrSamplesPerSec=106.58968233799237, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:15:45,100] [INFO] [logging.py:96:log_dist] [Rank 0] step=7320, skipped=126, lr=[2.0760575293895051e-07, 2.0760575293895051e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7319|ppo_ep: 1|act_loss: 0.024017333984375|cri_loss: 0.01222991943359375|unsuper_loss: 0.0 +average reward score: 5.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.44%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7320|ppo_ep: 1|act_loss: -0.010589599609375|cri_loss: -0.005062103271484375|unsuper_loss: 0.0 +average reward score: 5.375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.37%) |Training time=0.48s (22.01%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7321|ppo_ep: 1|act_loss: 0.006988525390625|cri_loss: 0.00362396240234375|unsuper_loss: 0.0 +average reward score: 5.20703125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.85%) |Training time=0.49s (22.57%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7322|ppo_ep: 1|act_loss: -6.532669067382812e-05|cri_loss: 0.00026035308837890625|unsuper_loss: 0.0 +average reward score: 5.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.01%) |Training time=0.49s (22.38%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7323|ppo_ep: 1|act_loss: 0.02740478515625|cri_loss: 0.01401519775390625|unsuper_loss: 0.0 +average reward score: 5.4921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.98%) |Training time=0.49s (22.51%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7324|ppo_ep: 1|act_loss: 0.03741455078125|cri_loss: 0.019012451171875|unsuper_loss: 0.0 +average reward score: 5.0 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.81%) |Training time=0.49s (22.43%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7325|ppo_ep: 1|act_loss: 0.029541015625|cri_loss: 0.0160675048828125|unsuper_loss: 0.0 +average reward score: 5.4296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.09%) |Training time=0.48s (22.36%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7326|ppo_ep: 1|act_loss: -0.01361083984375|cri_loss: -0.00656890869140625|unsuper_loss: 0.0 +average reward score: 5.18359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.12%) |Training time=0.48s (22.25%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7327|ppo_ep: 1|act_loss: 0.006519317626953125|cri_loss: 0.003330230712890625|unsuper_loss: 0.0 +average reward score: 6.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.97%) |Training time=0.49s (22.50%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7328|ppo_ep: 1|act_loss: 0.0673828125|cri_loss: 0.03973388671875|unsuper_loss: 0.0 +average reward score: 5.75390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.66%) |Training time=0.49s (22.73%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.55 +[2023-04-14 13:16:06,691] [INFO] [logging.py:96:log_dist] [Rank 0] step=7330, skipped=95, lr=[3.708439159571659e-07, 3.708439159571659e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:16:06,709] [INFO] [timer.py:199:stop] epoch=0/micro_step=7330/global_step=7330, RunningAvgSamplesPerSec=105.60095077536732, CurrSamplesPerSec=99.02173837662687, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:16:06,802] [INFO] [logging.py:96:log_dist] [Rank 0] step=7330, skipped=126, lr=[2.0378253563519247e-07, 2.0378253563519247e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7329|ppo_ep: 1|act_loss: -0.0149078369140625|cri_loss: -0.00720977783203125|unsuper_loss: 0.0 +average reward score: 5.48046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.07%) |Training time=0.49s (22.43%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7330|ppo_ep: 1|act_loss: 0.022674560546875|cri_loss: 0.01198577880859375|unsuper_loss: 0.0 +average reward score: 5.4921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.63%) |Training time=0.49s (22.77%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7331|ppo_ep: 1|act_loss: -0.00815582275390625|cri_loss: -0.00391387939453125|unsuper_loss: 0.0 +average reward score: 5.17578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.94%) |Training time=0.49s (22.54%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7332|ppo_ep: 1|act_loss: -0.05584716796875|cri_loss: -0.021484375|unsuper_loss: 0.0 +average reward score: 5.11328125 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.61s (70.82%) |Training time=0.48s (21.26%) |Others=0.18 (7.93%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7333|ppo_ep: 1|act_loss: -0.01401519775390625|cri_loss: -0.006877899169921875|unsuper_loss: 0.0 +average reward score: 5.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.64s (73.48%) |Training time=0.48s (21.45%) |Others=0.11 (5.07%)|CurSamplesPerSec=14.37 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7334|ppo_ep: 1|act_loss: 0.008270263671875|cri_loss: 0.004230499267578125|unsuper_loss: 0.0 +average reward score: 5.16015625 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.10%) |Training time=0.46s (20.64%) |Others=0.12 (5.26%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7335|ppo_ep: 1|act_loss: -0.0372314453125|cri_loss: -0.0173187255859375|unsuper_loss: 0.0 +average reward score: 5.53515625 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.10%) |Training time=0.47s (20.60%) |Others=0.10 (4.31%)|CurSamplesPerSec=14.02 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7336|ppo_ep: 1|act_loss: 0.006072998046875|cri_loss: 0.003406524658203125|unsuper_loss: 0.0 +average reward score: 5.8984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.71%) |Training time=0.47s (21.68%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7337|ppo_ep: 1|act_loss: -0.01271820068359375|cri_loss: -0.00617218017578125|unsuper_loss: 0.0 +average reward score: 4.96484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.83%) |Training time=0.47s (21.64%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7338|ppo_ep: 1|act_loss: 0.031219482421875|cri_loss: 0.0158233642578125|unsuper_loss: 0.0 +average reward score: 5.08984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.67%) |Training time=0.47s (21.71%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55 +[2023-04-14 13:16:28,680] [INFO] [logging.py:96:log_dist] [Rank 0] step=7340, skipped=95, lr=[3.6373511386058315e-07, 3.6373511386058315e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:16:28,698] [INFO] [timer.py:199:stop] epoch=0/micro_step=7340/global_step=7340, RunningAvgSamplesPerSec=105.59720320971675, CurrSamplesPerSec=105.43584973840909, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:16:28,791] [INFO] [logging.py:96:log_dist] [Rank 0] step=7340, skipped=126, lr=[1.9999335381357953e-07, 1.9999335381357953e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7339|ppo_ep: 1|act_loss: -0.0055084228515625|cri_loss: -0.002353668212890625|unsuper_loss: 0.0 +average reward score: 5.65625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.97%) |Training time=0.47s (21.52%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7340|ppo_ep: 1|act_loss: -0.002819061279296875|cri_loss: -0.0006628036499023438|unsuper_loss: 0.0 +average reward score: 5.1796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.60%) |Training time=0.47s (21.81%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7341|ppo_ep: 1|act_loss: -0.01428985595703125|cri_loss: -0.006931304931640625|unsuper_loss: 0.0 +average reward score: 5.375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.85%) |Training time=0.47s (21.62%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7342|ppo_ep: 1|act_loss: 0.03863525390625|cri_loss: 0.0203704833984375|unsuper_loss: 0.0 +average reward score: 5.71484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.76%) |Training time=0.47s (21.64%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7343|ppo_ep: 1|act_loss: -0.00783538818359375|cri_loss: -0.003650665283203125|unsuper_loss: 0.0 +average reward score: 5.40625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.59%) |Training time=0.48s (21.90%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7344|ppo_ep: 1|act_loss: 0.00574493408203125|cri_loss: 0.0035114288330078125|unsuper_loss: 0.0 +average reward score: 5.34375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.85%) |Training time=0.47s (21.54%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7345|ppo_ep: 1|act_loss: -0.00555419921875|cri_loss: 0.001644134521484375|unsuper_loss: 0.0 +average reward score: 5.43359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.14%) |Training time=0.46s (21.34%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7346|ppo_ep: 1|act_loss: -0.0234527587890625|cri_loss: -0.011444091796875|unsuper_loss: 0.0 +average reward score: 5.39453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.35%) |Training time=0.45s (21.01%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7347|ppo_ep: 1|act_loss: 0.0014638900756835938|cri_loss: 0.0008153915405273438|unsuper_loss: 0.0 +average reward score: 6.046875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.93%) |Training time=0.47s (21.52%) |Others=0.12 (5.55%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7348|ppo_ep: 1|act_loss: -0.0240478515625|cri_loss: -0.01140594482421875|unsuper_loss: 0.0 +average reward score: 5.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.18%) |Training time=0.46s (20.09%) |Others=0.11 (4.72%)|CurSamplesPerSec=14.02 |AvgSamplesPerSec=14.55 +[2023-04-14 13:16:50,529] [INFO] [logging.py:96:log_dist] [Rank 0] step=7350, skipped=95, lr=[3.5669243847182864e-07, 3.5669243847182864e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:16:50,547] [INFO] [timer.py:199:stop] epoch=0/micro_step=7350/global_step=7350, RunningAvgSamplesPerSec=105.59639078906564, CurrSamplesPerSec=104.471301040761, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:16:50,640] [INFO] [logging.py:96:log_dist] [Rank 0] step=7350, skipped=126, lr=[1.962382636389268e-07, 1.962382636389268e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7349|ppo_ep: 1|act_loss: -0.00226593017578125|cri_loss: -0.0006923675537109375|unsuper_loss: 0.0 +average reward score: 5.5703125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.95%) |Training time=0.47s (21.54%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7350|ppo_ep: 1|act_loss: -0.0267333984375|cri_loss: -0.01296234130859375|unsuper_loss: 0.0 +average reward score: 5.69140625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.38%) |Training time=0.48s (22.07%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7351|ppo_ep: 1|act_loss: -0.008514404296875|cri_loss: -0.0030345916748046875|unsuper_loss: 0.0 +average reward score: 5.5703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.89%) |Training time=0.47s (21.58%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7352|ppo_ep: 1|act_loss: -0.0154571533203125|cri_loss: -0.007434844970703125|unsuper_loss: 0.0 +average reward score: 5.89453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.02%) |Training time=0.46s (21.35%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7353|ppo_ep: 1|act_loss: -0.00461578369140625|cri_loss: -0.002185821533203125|unsuper_loss: 0.0 +average reward score: 5.3359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.62%) |Training time=0.48s (21.86%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7354|ppo_ep: 1|act_loss: -0.0004100799560546875|cri_loss: 8.821487426757812e-06|unsuper_loss: 0.0 +average reward score: 5.7578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.64%) |Training time=0.47s (21.74%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7355|ppo_ep: 1|act_loss: 0.015960693359375|cri_loss: 0.00814056396484375|unsuper_loss: 0.0 +average reward score: 6.1015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.31%) |Training time=0.46s (21.15%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7356|ppo_ep: 1|act_loss: 0.0009984970092773438|cri_loss: 0.0006694793701171875|unsuper_loss: 0.0 +average reward score: 5.80859375 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.09%) |Training time=0.48s (21.66%) |Others=0.14 (6.25%)|CurSamplesPerSec=14.45 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7357|ppo_ep: 1|act_loss: 0.05804443359375|cri_loss: 0.0310516357421875|unsuper_loss: 0.0 +average reward score: 4.7578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.80%) |Training time=0.47s (21.66%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7358|ppo_ep: 1|act_loss: 0.003452301025390625|cri_loss: 0.0018177032470703125|unsuper_loss: 0.0 +average reward score: 5.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.06%) |Training time=0.48s (22.31%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.55 +[2023-04-14 13:17:12,274] [INFO] [logging.py:96:log_dist] [Rank 0] step=7360, skipped=95, lr=[3.4971599418034037e-07, 3.4971599418034037e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:17:12,293] [INFO] [timer.py:199:stop] epoch=0/micro_step=7360/global_step=7360, RunningAvgSamplesPerSec=105.59306648686497, CurrSamplesPerSec=97.36653804664417, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:17:12,386] [INFO] [logging.py:96:log_dist] [Rank 0] step=7360, skipped=126, lr=[1.9251732077072925e-07, 1.9251732077072925e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7359|ppo_ep: 1|act_loss: 0.00797271728515625|cri_loss: 0.00403594970703125|unsuper_loss: 0.0 +average reward score: 5.0078125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.98%) |Training time=0.49s (22.55%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7360|ppo_ep: 1|act_loss: 0.01273345947265625|cri_loss: 0.0066986083984375|unsuper_loss: 0.0 +average reward score: 5.984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.30%) |Training time=0.47s (22.04%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7361|ppo_ep: 1|act_loss: -0.01342010498046875|cri_loss: -0.00612640380859375|unsuper_loss: 0.0 +average reward score: 5.4453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.55%) |Training time=0.47s (21.80%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7362|ppo_ep: 1|act_loss: 0.014251708984375|cri_loss: 0.0075225830078125|unsuper_loss: 0.0 +average reward score: 5.5546875 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.74s (74.70%) |Training time=0.49s (21.01%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7363|ppo_ep: 1|act_loss: -7.724761962890625e-05|cri_loss: 0.0008220672607421875|unsuper_loss: 0.0 +average reward score: 4.68359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.92%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7364|ppo_ep: 1|act_loss: -0.0110931396484375|cri_loss: -0.005306243896484375|unsuper_loss: 0.0 +average reward score: 4.98046875 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.59%) |Training time=0.48s (21.81%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7365|ppo_ep: 1|act_loss: -0.002201080322265625|cri_loss: -0.0009555816650390625|unsuper_loss: 0.0 +average reward score: 5.24609375 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.67s (74.23%) |Training time=0.47s (20.97%) |Others=0.11 (4.79%)|CurSamplesPerSec=14.20 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7366|ppo_ep: 1|act_loss: -0.0089569091796875|cri_loss: -0.004367828369140625|unsuper_loss: 0.0 +average reward score: 5.59375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.78%) |Training time=0.46s (21.56%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7367|ppo_ep: 1|act_loss: -0.0165863037109375|cri_loss: -0.00789642333984375|unsuper_loss: 0.0 +average reward score: 5.66796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.33%) |Training time=0.48s (22.11%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7368|ppo_ep: 1|act_loss: -0.010040283203125|cri_loss: -0.004924774169921875|unsuper_loss: 0.0 +average reward score: 5.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.06%) |Training time=0.48s (22.31%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.55 +[2023-04-14 13:17:34,126] [INFO] [logging.py:96:log_dist] [Rank 0] step=7370, skipped=95, lr=[3.428058843938532e-07, 3.428058843938532e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:17:34,144] [INFO] [timer.py:199:stop] epoch=0/micro_step=7370/global_step=7370, RunningAvgSamplesPerSec=105.58833846823492, CurrSamplesPerSec=101.61320161196909, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:17:34,237] [INFO] [logging.py:96:log_dist] [Rank 0] step=7370, skipped=126, lr=[1.888305803623372e-07, 1.888305803623372e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7369|ppo_ep: 1|act_loss: -0.023406982421875|cri_loss: -0.0115966796875|unsuper_loss: 0.0 +average reward score: 5.31640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.23%) |Training time=0.48s (22.24%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7370|ppo_ep: 1|act_loss: 0.0177001953125|cri_loss: 0.009124755859375|unsuper_loss: 0.0 +average reward score: 4.90625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.10%) |Training time=0.48s (22.28%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7371|ppo_ep: 1|act_loss: 0.00457763671875|cri_loss: 0.0029449462890625|unsuper_loss: 0.0 +average reward score: 5.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.33%) |Training time=0.47s (22.02%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7372|ppo_ep: 1|act_loss: -0.011962890625|cri_loss: -0.00588226318359375|unsuper_loss: 0.0 +average reward score: 5.16015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.99%) |Training time=0.48s (22.41%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7373|ppo_ep: 1|act_loss: -0.016937255859375|cri_loss: -0.00826263427734375|unsuper_loss: 0.0 +average reward score: 5.61328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.15%) |Training time=0.48s (22.30%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7374|ppo_ep: 1|act_loss: -0.0300140380859375|cri_loss: -0.01454925537109375|unsuper_loss: 0.0 +average reward score: 5.08203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.49%) |Training time=0.47s (21.84%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7375|ppo_ep: 1|act_loss: 0.0208740234375|cri_loss: 0.01078033447265625|unsuper_loss: 0.0 +average reward score: 4.859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.35%) |Training time=0.47s (22.02%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7376|ppo_ep: 1|act_loss: 0.0167388916015625|cri_loss: 0.00910186767578125|unsuper_loss: 0.0 +average reward score: 5.23046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.46%) |Training time=0.47s (21.87%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7377|ppo_ep: 1|act_loss: -0.005702972412109375|cri_loss: -0.0024814605712890625|unsuper_loss: 0.0 +average reward score: 5.5 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.47%) |Training time=0.47s (20.22%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.77 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7378|ppo_ep: 1|act_loss: -0.01103973388671875|cri_loss: -0.00537872314453125|unsuper_loss: 0.0 +average reward score: 5.4296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.61%) |Training time=0.47s (21.72%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.55 +[2023-04-14 13:17:55,852] [INFO] [logging.py:96:log_dist] [Rank 0] step=7380, skipped=95, lr=[3.359622115368647e-07, 3.359622115368647e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:17:55,870] [INFO] [timer.py:199:stop] epoch=0/micro_step=7380/global_step=7380, RunningAvgSamplesPerSec=105.58390759317957, CurrSamplesPerSec=101.95661566977104, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:17:55,967] [INFO] [logging.py:96:log_dist] [Rank 0] step=7380, skipped=126, lr=[1.851780970601366e-07, 1.851780970601366e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7379|ppo_ep: 1|act_loss: 0.0010738372802734375|cri_loss: 0.0008387565612792969|unsuper_loss: 0.0 +average reward score: 5.51171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.22%) |Training time=0.48s (22.08%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7380|ppo_ep: 1|act_loss: -0.0142974853515625|cri_loss: -0.006511688232421875|unsuper_loss: 0.0 +average reward score: 5.37890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.11%) |Training time=0.46s (21.22%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7381|ppo_ep: 1|act_loss: 0.0143585205078125|cri_loss: 0.0078277587890625|unsuper_loss: 0.0 +average reward score: 5.484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (20.95%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7382|ppo_ep: 1|act_loss: -0.0185394287109375|cri_loss: -0.0091094970703125|unsuper_loss: 0.0 +average reward score: 5.1796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.07%) |Training time=0.46s (21.28%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7383|ppo_ep: 1|act_loss: 0.018524169921875|cri_loss: 0.011260986328125|unsuper_loss: 0.0 +average reward score: 5.375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.26%) |Training time=0.45s (21.17%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7384|ppo_ep: 1|act_loss: -0.0389404296875|cri_loss: -0.0170745849609375|unsuper_loss: 0.0 +average reward score: 5.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.08%) |Training time=0.46s (21.29%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7385|ppo_ep: 1|act_loss: -0.00312042236328125|cri_loss: -0.0014448165893554688|unsuper_loss: 0.0 +average reward score: 5.80078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.15%) |Training time=0.46s (21.20%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7386|ppo_ep: 1|act_loss: -0.028533935546875|cri_loss: -0.0136566162109375|unsuper_loss: 0.0 +average reward score: 5.24609375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.92%) |Training time=0.46s (21.41%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7387|ppo_ep: 1|act_loss: 0.0108184814453125|cri_loss: 0.005504608154296875|unsuper_loss: 0.0 +average reward score: 5.8125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.17%) |Training time=0.46s (21.21%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7388|ppo_ep: 1|act_loss: -0.003536224365234375|cri_loss: -0.001667022705078125|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.02%) |Training time=0.46s (21.33%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55 +[2023-04-14 13:18:17,398] [INFO] [logging.py:96:log_dist] [Rank 0] step=7390, skipped=95, lr=[3.2918507704911455e-07, 3.2918507704911455e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:18:17,416] [INFO] [timer.py:199:stop] epoch=0/micro_step=7390/global_step=7390, RunningAvgSamplesPerSec=105.5878590176888, CurrSamplesPerSec=107.56306313887185, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:18:17,509] [INFO] [logging.py:96:log_dist] [Rank 0] step=7390, skipped=126, lr=[1.8155992500274112e-07, 1.8155992500274112e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7389|ppo_ep: 1|act_loss: 0.023468017578125|cri_loss: 0.01312255859375|unsuper_loss: 0.0 +average reward score: 5.6953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.11%) |Training time=0.46s (21.34%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7390|ppo_ep: 1|act_loss: 0.00725555419921875|cri_loss: 0.00370025634765625|unsuper_loss: 0.0 +average reward score: 5.94921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.17%) |Training time=0.46s (21.18%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7391|ppo_ep: 1|act_loss: -0.00531768798828125|cri_loss: -0.0025043487548828125|unsuper_loss: 0.0 +average reward score: 5.39453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.38%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7392|ppo_ep: 1|act_loss: 0.00354766845703125|cri_loss: 0.00273895263671875|unsuper_loss: 0.0 +average reward score: 5.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.68%) |Training time=0.46s (19.76%) |Others=0.11 (4.56%)|CurSamplesPerSec=13.77 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7393|ppo_ep: 1|act_loss: -0.00673675537109375|cri_loss: -0.0030460357666015625|unsuper_loss: 0.0 +average reward score: 5.19921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.65%) |Training time=0.45s (20.69%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7394|ppo_ep: 1|act_loss: 0.0222015380859375|cri_loss: 0.0114288330078125|unsuper_loss: 0.0 +average reward score: 6.484375 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.70s (74.73%) |Training time=0.48s (20.87%) |Others=0.10 (4.40%)|CurSamplesPerSec=14.06 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7395|ppo_ep: 1|act_loss: -0.0157470703125|cri_loss: -0.00746917724609375|unsuper_loss: 0.0 +average reward score: 5.55078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.94%) |Training time=0.46s (21.39%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7396|ppo_ep: 1|act_loss: -0.01318359375|cri_loss: -0.005802154541015625|unsuper_loss: 0.0 +average reward score: 5.64453125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.34%) |Training time=0.45s (20.99%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7397|ppo_ep: 1|act_loss: 0.004085540771484375|cri_loss: 0.00223541259765625|unsuper_loss: 0.0 +average reward score: 4.953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.21%) |Training time=0.45s (21.14%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7398|ppo_ep: 1|act_loss: 0.0054931640625|cri_loss: 0.0029010772705078125|unsuper_loss: 0.0 +average reward score: 5.41015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.14%) |Training time=0.46s (21.21%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55 +[2023-04-14 13:18:39,217] [INFO] [logging.py:96:log_dist] [Rank 0] step=7400, skipped=95, lr=[3.2247458138408513e-07, 3.2247458138408513e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:18:39,235] [INFO] [timer.py:199:stop] epoch=0/micro_step=7400/global_step=7400, RunningAvgSamplesPerSec=105.59212222294153, CurrSamplesPerSec=110.47226664581524, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:18:39,328] [INFO] [logging.py:96:log_dist] [Rank 0] step=7400, skipped=126, lr=[1.7797611782018942e-07, 1.7797611782018942e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7399|ppo_ep: 1|act_loss: 0.00040078163146972656|cri_loss: 0.0002589225769042969|unsuper_loss: 0.0 +average reward score: 5.84765625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.30%) |Training time=0.45s (21.05%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7400|ppo_ep: 1|act_loss: 0.05859375|cri_loss: 0.03192138671875|unsuper_loss: 0.0 +average reward score: 5.3359375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.40%) |Training time=0.45s (20.92%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7401|ppo_ep: 1|act_loss: 0.004222869873046875|cri_loss: 0.0022983551025390625|unsuper_loss: 0.0 +average reward score: 4.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.31%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7402|ppo_ep: 1|act_loss: -0.022674560546875|cri_loss: -0.01120758056640625|unsuper_loss: 0.0 +average reward score: 5.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.16%) |Training time=0.45s (21.10%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7403|ppo_ep: 1|act_loss: 0.02545166015625|cri_loss: 0.01396942138671875|unsuper_loss: 0.0 +average reward score: 5.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.46%) |Training time=0.45s (20.98%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7404|ppo_ep: 1|act_loss: 0.02581787109375|cri_loss: 0.01312255859375|unsuper_loss: 0.0 +average reward score: 5.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.09%) |Training time=0.46s (21.27%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7405|ppo_ep: 1|act_loss: 0.0230865478515625|cri_loss: 0.01181793212890625|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.49%) |Training time=0.47s (21.85%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7406|ppo_ep: 1|act_loss: 0.04376220703125|cri_loss: 0.0252227783203125|unsuper_loss: 0.0 +average reward score: 5.625 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.58s (69.98%) |Training time=0.49s (21.57%) |Others=0.19 (8.45%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7407|ppo_ep: 1|act_loss: 0.018585205078125|cri_loss: 0.009429931640625|unsuper_loss: 0.0 +average reward score: 5.03125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.05%) |Training time=0.47s (21.26%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7408|ppo_ep: 1|act_loss: 0.018463134765625|cri_loss: 0.00962066650390625|unsuper_loss: 0.0 +average reward score: 5.6875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.64%) |Training time=0.47s (21.71%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55 +[2023-04-14 13:19:00,873] [INFO] [logging.py:96:log_dist] [Rank 0] step=7410, skipped=95, lr=[3.158308240075113e-07, 3.158308240075113e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:19:00,892] [INFO] [timer.py:199:stop] epoch=0/micro_step=7410/global_step=7410, RunningAvgSamplesPerSec=105.59323097908113, CurrSamplesPerSec=103.80286557061606, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:19:00,985] [INFO] [logging.py:96:log_dist] [Rank 0] step=7410, skipped=126, lr=[1.744267286331497e-07, 1.744267286331497e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7409|ppo_ep: 1|act_loss: 0.0030670166015625|cri_loss: 0.0015916824340820312|unsuper_loss: 0.0 +average reward score: 5.86328125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.45%) |Training time=0.47s (21.90%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7410|ppo_ep: 1|act_loss: -0.0164031982421875|cri_loss: -0.008026123046875|unsuper_loss: 0.0 +average reward score: 5.06640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.41%) |Training time=0.48s (21.95%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7411|ppo_ep: 1|act_loss: -0.005390167236328125|cri_loss: -0.002544403076171875|unsuper_loss: 0.0 +average reward score: 5.328125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.98%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7412|ppo_ep: 1|act_loss: 0.008453369140625|cri_loss: 0.00438690185546875|unsuper_loss: 0.0 +average reward score: 5.4296875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.88%) |Training time=0.46s (21.42%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.55 +[2023-04-14 13:19:09,581] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 7413|ppo_ep: 1|act_loss: -0.0165252685546875|cri_loss: -0.00797271728515625|unsuper_loss: 0.0 +average reward score: 5.2109375 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.54%) |Training time=0.45s (21.23%) |Others=0.09 (4.23%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.55 +[2023-04-14 13:19:11,712] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 7414|ppo_ep: 1|act_loss: -0.00782012939453125|cri_loss: -0.0037174224853515625|unsuper_loss: 0.0 +average reward score: 5.5703125 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.58s (74.31%) |Training time=0.46s (21.49%) |Others=0.09 (4.20%)|CurSamplesPerSec=15.02 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7415|ppo_ep: 1|act_loss: -0.019073486328125|cri_loss: -0.009246826171875|unsuper_loss: 0.0 +average reward score: 6.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.44%) |Training time=0.45s (20.91%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7416|ppo_ep: 1|act_loss: -0.026031494140625|cri_loss: -0.01288604736328125|unsuper_loss: 0.0 +average reward score: 5.0234375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.88%) |Training time=0.46s (21.45%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7417|ppo_ep: 1|act_loss: -0.0194549560546875|cri_loss: -0.0088043212890625|unsuper_loss: 0.0 +average reward score: 5.34765625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.92%) |Training time=0.46s (21.40%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7418|ppo_ep: 1|act_loss: -0.02740478515625|cri_loss: -0.01326751708984375|unsuper_loss: 0.0 +average reward score: 6.375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.78%) |Training time=0.46s (21.56%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.55 +[2023-04-14 13:19:22,331] [INFO] [logging.py:96:log_dist] [Rank 0] step=7420, skipped=95, lr=[3.0925390339590433e-07, 3.0925390339590433e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:19:22,349] [INFO] [timer.py:199:stop] epoch=0/micro_step=7420/global_step=7420, RunningAvgSamplesPerSec=105.59626399643966, CurrSamplesPerSec=107.24917276422602, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:19:22,442] [INFO] [logging.py:96:log_dist] [Rank 0] step=7420, skipped=128, lr=[1.7161203361170996e-07, 1.7161203361170996e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7419|ppo_ep: 1|act_loss: -0.01311492919921875|cri_loss: -0.006343841552734375|unsuper_loss: 0.0 +average reward score: 5.234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.64%) |Training time=0.46s (21.47%) |Others=0.11 (4.89%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7420|ppo_ep: 1|act_loss: -0.0055694580078125|cri_loss: -0.0026264190673828125|unsuper_loss: 0.0 +average reward score: 5.65625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.85%) |Training time=0.46s (21.50%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7421|ppo_ep: 1|act_loss: -0.005157470703125|cri_loss: -0.002201080322265625|unsuper_loss: 0.0 +average reward score: 5.5546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.54%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7422|ppo_ep: 1|act_loss: -0.018798828125|cri_loss: -0.00916290283203125|unsuper_loss: 0.0 +average reward score: 5.54296875 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.88%) |Training time=0.46s (19.78%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7423|ppo_ep: 1|act_loss: 0.0019893646240234375|cri_loss: 0.0019683837890625|unsuper_loss: 0.0 +average reward score: 5.0234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.98%) |Training time=0.46s (21.36%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7424|ppo_ep: 1|act_loss: -0.0025959014892578125|cri_loss: -0.0012378692626953125|unsuper_loss: 0.0 +average reward score: 6.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.68s (75.04%) |Training time=0.46s (20.45%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.29 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7425|ppo_ep: 1|act_loss: 0.00028014183044433594|cri_loss: 0.0005426406860351562|unsuper_loss: 0.0 +average reward score: 5.34375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.23%) |Training time=0.45s (21.18%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7426|ppo_ep: 1|act_loss: 0.028900146484375|cri_loss: 0.01474761962890625|unsuper_loss: 0.0 +average reward score: 5.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.83%) |Training time=0.46s (21.53%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7427|ppo_ep: 1|act_loss: 0.017486572265625|cri_loss: 0.0092010498046875|unsuper_loss: 0.0 +average reward score: 5.984375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.82%) |Training time=0.46s (21.60%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7428|ppo_ep: 1|act_loss: 0.0107269287109375|cri_loss: 0.0064697265625|unsuper_loss: 0.0 +average reward score: 5.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.72%) |Training time=0.46s (21.61%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.55 +[2023-04-14 13:19:44,065] [INFO] [logging.py:96:log_dist] [Rank 0] step=7430, skipped=95, lr=[3.0274391703509267e-07, 3.0274391703509267e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:19:44,083] [INFO] [timer.py:199:stop] epoch=0/micro_step=7430/global_step=7430, RunningAvgSamplesPerSec=105.59870905382144, CurrSamplesPerSec=105.69964632309106, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:19:44,176] [INFO] [logging.py:96:log_dist] [Rank 0] step=7430, skipped=128, lr=[1.681247290517518e-07, 1.681247290517518e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7429|ppo_ep: 1|act_loss: 0.041107177734375|cri_loss: 0.02197265625|unsuper_loss: 0.0 +average reward score: 5.44140625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.72%) |Training time=0.47s (21.71%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7430|ppo_ep: 1|act_loss: 0.0006413459777832031|cri_loss: 0.00042319297790527344|unsuper_loss: 0.0 +average reward score: 5.08203125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.05%) |Training time=0.51s (23.39%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7431|ppo_ep: 1|act_loss: 0.0380859375|cri_loss: 0.0194244384765625|unsuper_loss: 0.0 +average reward score: 5.5546875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.65s (76.00%) |Training time=0.42s (19.45%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7432|ppo_ep: 1|act_loss: 0.008270263671875|cri_loss: 0.004383087158203125|unsuper_loss: 0.0 +average reward score: 5.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.81%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7433|ppo_ep: 1|act_loss: -0.0036334991455078125|cri_loss: -0.0017004013061523438|unsuper_loss: 0.0 +average reward score: 4.9921875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.98%) |Training time=0.47s (21.38%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7434|ppo_ep: 1|act_loss: -0.002941131591796875|cri_loss: -0.0012197494506835938|unsuper_loss: 0.0 +average reward score: 5.09375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.92%) |Training time=0.46s (21.42%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7435|ppo_ep: 1|act_loss: -0.00490570068359375|cri_loss: -0.0023326873779296875|unsuper_loss: 0.0 +average reward score: 5.29296875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.76%) |Training time=0.47s (21.61%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7436|ppo_ep: 1|act_loss: 0.0081329345703125|cri_loss: 0.00434112548828125|unsuper_loss: 0.0 +average reward score: 5.39453125 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.42%) |Training time=0.48s (20.28%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.55 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7437|ppo_ep: 1|act_loss: 0.00555419921875|cri_loss: 0.0030307769775390625|unsuper_loss: 0.0 +average reward score: 4.88671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.49%) |Training time=0.45s (20.86%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7438|ppo_ep: 1|act_loss: -0.00777435302734375|cri_loss: -0.0036983489990234375|unsuper_loss: 0.0 +average reward score: 4.9609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.65%) |Training time=0.47s (21.70%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55 +[2023-04-14 13:20:05,976] [INFO] [logging.py:96:log_dist] [Rank 0] step=7440, skipped=95, lr=[2.9630096141877935e-07, 2.9630096141877935e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:20:05,994] [INFO] [timer.py:199:stop] epoch=0/micro_step=7440/global_step=7440, RunningAvgSamplesPerSec=105.59770834207129, CurrSamplesPerSec=103.76915880769367, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:20:06,087] [INFO] [logging.py:96:log_dist] [Rank 0] step=7440, skipped=128, lr=[1.646719885086523e-07, 1.646719885086523e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7439|ppo_ep: 1|act_loss: -0.0347900390625|cri_loss: -0.017120361328125|unsuper_loss: 0.0 +average reward score: 5.25 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.71%) |Training time=0.47s (21.76%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7440|ppo_ep: 1|act_loss: -0.00661468505859375|cri_loss: -0.0029449462890625|unsuper_loss: 0.0 +average reward score: 6.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.58%) |Training time=0.47s (21.77%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7441|ppo_ep: 1|act_loss: -0.016204833984375|cri_loss: -0.00795745849609375|unsuper_loss: 0.0 +average reward score: 5.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.41%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7442|ppo_ep: 1|act_loss: 0.031158447265625|cri_loss: 0.016265869140625|unsuper_loss: 0.0 +average reward score: 5.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.99%) |Training time=0.46s (21.36%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.55 +epoch: 0|step: 7443|ppo_ep: 1|act_loss: -0.027191162109375|cri_loss: -0.0133209228515625|unsuper_loss: 0.0 +average reward score: 5.6484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.33%) |Training time=0.45s (21.10%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7444|ppo_ep: 1|act_loss: 0.01097869873046875|cri_loss: 0.0056304931640625|unsuper_loss: 0.0 +average reward score: 5.0859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.45s (21.11%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7445|ppo_ep: 1|act_loss: 0.030792236328125|cri_loss: 0.016082763671875|unsuper_loss: 0.0 +average reward score: 5.91796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.78%) |Training time=0.44s (20.65%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7446|ppo_ep: 1|act_loss: -0.001796722412109375|cri_loss: -0.0007958412170410156|unsuper_loss: 0.0 +average reward score: 5.43359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.85%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7447|ppo_ep: 1|act_loss: -0.0030612945556640625|cri_loss: -0.0014820098876953125|unsuper_loss: 0.0 +average reward score: 5.9140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.52%) |Training time=0.46s (20.97%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7448|ppo_ep: 1|act_loss: 0.0092315673828125|cri_loss: 0.00507354736328125|unsuper_loss: 0.0 +average reward score: 5.92578125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.51%) |Training time=0.45s (20.81%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56 +[2023-04-14 13:20:27,533] [INFO] [logging.py:96:log_dist] [Rank 0] step=7450, skipped=95, lr=[2.89925132047109e-07, 2.89925132047109e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:20:27,552] [INFO] [timer.py:199:stop] epoch=0/micro_step=7450/global_step=7450, RunningAvgSamplesPerSec=105.60254928538421, CurrSamplesPerSec=110.05864481747192, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:20:27,644] [INFO] [logging.py:96:log_dist] [Rank 0] step=7450, skipped=128, lr=[1.6125386316035496e-07, 1.6125386316035496e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7449|ppo_ep: 1|act_loss: -0.0038928985595703125|cri_loss: -0.0017147064208984375|unsuper_loss: 0.0 +average reward score: 5.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.27%) |Training time=0.45s (21.17%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7450|ppo_ep: 1|act_loss: 0.0303497314453125|cri_loss: 0.016021728515625|unsuper_loss: 0.0 +average reward score: 5.95703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.80%) |Training time=0.46s (21.54%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7451|ppo_ep: 1|act_loss: 0.006317138671875|cri_loss: 0.0035724639892578125|unsuper_loss: 0.0 +average reward score: 5.42578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.06%) |Training time=0.46s (21.36%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7452|ppo_ep: 1|act_loss: 0.04852294921875|cri_loss: 0.026397705078125|unsuper_loss: 0.0 +average reward score: 5.05078125 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.20%) |Training time=0.45s (19.47%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.80 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7453|ppo_ep: 1|act_loss: -0.019744873046875|cri_loss: -0.0097503662109375|unsuper_loss: 0.0 +average reward score: 5.6171875 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.46%) |Training time=0.46s (20.60%) |Others=0.13 (5.95%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7454|ppo_ep: 1|act_loss: 0.013275146484375|cri_loss: 0.0069122314453125|unsuper_loss: 0.0 +average reward score: 5.0 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.65s (74.51%) |Training time=0.46s (20.95%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7455|ppo_ep: 1|act_loss: 0.0143890380859375|cri_loss: 0.007434844970703125|unsuper_loss: 0.0 +average reward score: 5.35546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.11%) |Training time=0.46s (21.30%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7456|ppo_ep: 1|act_loss: 0.00958251953125|cri_loss: 0.005680084228515625|unsuper_loss: 0.0 +average reward score: 5.5 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.46s (21.45%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7457|ppo_ep: 1|act_loss: 0.0296630859375|cri_loss: 0.0150909423828125|unsuper_loss: 0.0 +average reward score: 4.953125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.95%) |Training time=0.49s (22.54%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7458|ppo_ep: 1|act_loss: 0.00717926025390625|cri_loss: 0.0038280487060546875|unsuper_loss: 0.0 +average reward score: 5.3515625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.42%) |Training time=0.50s (23.00%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.56 +[2023-04-14 13:20:49,459] [INFO] [logging.py:96:log_dist] [Rank 0] step=7460, skipped=95, lr=[2.8361652342525515e-07, 2.8361652342525515e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:20:49,477] [INFO] [timer.py:199:stop] epoch=0/micro_step=7460/global_step=7460, RunningAvgSamplesPerSec=105.5989674076688, CurrSamplesPerSec=88.6655405472359, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:20:49,570] [INFO] [logging.py:96:log_dist] [Rank 0] step=7460, skipped=128, lr=[1.5787040367172379e-07, 1.5787040367172379e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7459|ppo_ep: 1|act_loss: 0.006500244140625|cri_loss: 0.0033111572265625|unsuper_loss: 0.0 +average reward score: 5.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.89%) |Training time=0.52s (23.68%) |Others=0.10 (4.44%)|CurSamplesPerSec=14.48 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7460|ppo_ep: 1|act_loss: -0.029205322265625|cri_loss: -0.013946533203125|unsuper_loss: 0.0 +average reward score: 5.76171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.13%) |Training time=0.48s (22.25%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7461|ppo_ep: 1|act_loss: 0.010284423828125|cri_loss: 0.005336761474609375|unsuper_loss: 0.0 +average reward score: 5.64453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.48s (22.07%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7462|ppo_ep: 1|act_loss: 0.016448974609375|cri_loss: 0.008453369140625|unsuper_loss: 0.0 +average reward score: 5.2734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.14%) |Training time=0.48s (22.25%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7463|ppo_ep: 1|act_loss: 0.0008373260498046875|cri_loss: 0.0004830360412597656|unsuper_loss: 0.0 +average reward score: 5.63671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.46%) |Training time=0.48s (22.00%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7464|ppo_ep: 1|act_loss: 0.019256591796875|cri_loss: 0.01026153564453125|unsuper_loss: 0.0 +average reward score: 5.9453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.48s (22.28%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7465|ppo_ep: 1|act_loss: 0.00318145751953125|cri_loss: 0.0030651092529296875|unsuper_loss: 0.0 +average reward score: 5.6171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.48%) |Training time=0.48s (21.98%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7466|ppo_ep: 1|act_loss: 0.0126190185546875|cri_loss: 0.006465911865234375|unsuper_loss: 0.0 +average reward score: 5.3671875 +------------------------------------------------------------------------------------- +|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.06%) |Training time=0.51s (21.69%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.50 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7467|ppo_ep: 1|act_loss: 0.009735107421875|cri_loss: 0.004962921142578125|unsuper_loss: 0.0 +average reward score: 5.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.29%) |Training time=0.48s (22.20%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7468|ppo_ep: 1|act_loss: -0.019805908203125|cri_loss: -0.00965118408203125|unsuper_loss: 0.0 +average reward score: 5.40625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.22%) |Training time=0.48s (22.19%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.56 +[2023-04-14 13:21:11,390] [INFO] [logging.py:96:log_dist] [Rank 0] step=7470, skipped=95, lr=[2.7737522906201384e-07, 2.7737522906201384e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:21:11,408] [INFO] [timer.py:199:stop] epoch=0/micro_step=7470/global_step=7470, RunningAvgSamplesPerSec=105.58988139804403, CurrSamplesPerSec=98.6508434562043, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:21:11,501] [INFO] [logging.py:96:log_dist] [Rank 0] step=7470, skipped=128, lr=[1.5452166019378989e-07, 1.5452166019378989e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7469|ppo_ep: 1|act_loss: -0.00325775146484375|cri_loss: -0.001453399658203125|unsuper_loss: 0.0 +average reward score: 5.12890625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.17%) |Training time=0.49s (22.33%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7470|ppo_ep: 1|act_loss: 0.00650787353515625|cri_loss: 0.0034236907958984375|unsuper_loss: 0.0 +average reward score: 5.0078125 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.17%) |Training time=0.49s (22.28%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.49 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7471|ppo_ep: 1|act_loss: 0.00421905517578125|cri_loss: 0.003101348876953125|unsuper_loss: 0.0 +average reward score: 5.09375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.45%) |Training time=0.48s (22.00%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7472|ppo_ep: 1|act_loss: 0.02142333984375|cri_loss: 0.011138916015625|unsuper_loss: 0.0 +average reward score: 5.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.25%) |Training time=0.48s (22.04%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7473|ppo_ep: 1|act_loss: 0.015045166015625|cri_loss: 0.00791168212890625|unsuper_loss: 0.0 +average reward score: 5.03125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.34%) |Training time=0.48s (22.14%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7474|ppo_ep: 1|act_loss: 6.431341171264648e-05|cri_loss: 0.00018262863159179688|unsuper_loss: 0.0 +average reward score: 5.609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.90%) |Training time=0.49s (22.51%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7475|ppo_ep: 1|act_loss: 0.01222991943359375|cri_loss: 0.006435394287109375|unsuper_loss: 0.0 +average reward score: 5.23828125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.21%) |Training time=0.48s (22.27%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7476|ppo_ep: 1|act_loss: 0.0091552734375|cri_loss: 0.00495147705078125|unsuper_loss: 0.0 +average reward score: 5.109375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.78%) |Training time=0.49s (22.64%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7477|ppo_ep: 1|act_loss: -0.018341064453125|cri_loss: -0.009063720703125|unsuper_loss: 0.0 +average reward score: 5.07421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.05%) |Training time=0.49s (22.43%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7478|ppo_ep: 1|act_loss: 0.0106048583984375|cri_loss: 0.005496978759765625|unsuper_loss: 0.0 +average reward score: 5.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.79%) |Training time=0.49s (22.60%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.56 +[2023-04-14 13:21:33,187] [INFO] [logging.py:96:log_dist] [Rank 0] step=7480, skipped=95, lr=[2.71201341468426e-07, 2.71201341468426e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:21:33,206] [INFO] [timer.py:199:stop] epoch=0/micro_step=7480/global_step=7480, RunningAvgSamplesPerSec=105.5795860028403, CurrSamplesPerSec=96.38076143670229, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:21:33,298] [INFO] [logging.py:96:log_dist] [Rank 0] step=7480, skipped=128, lr=[1.5120768236301042e-07, 1.5120768236301042e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7479|ppo_ep: 1|act_loss: -0.0081634521484375|cri_loss: -0.0033721923828125|unsuper_loss: 0.0 +average reward score: 5.27734375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.75%) |Training time=0.50s (22.75%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7480|ppo_ep: 1|act_loss: 0.0211944580078125|cri_loss: 0.01141357421875|unsuper_loss: 0.0 +average reward score: 5.00390625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.88%) |Training time=0.49s (22.53%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7481|ppo_ep: 1|act_loss: 0.0014867782592773438|cri_loss: 0.0010585784912109375|unsuper_loss: 0.0 +average reward score: 6.0859375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.59%) |Training time=0.49s (22.41%) |Others=0.11 (5.00%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7482|ppo_ep: 1|act_loss: 0.006160736083984375|cri_loss: 0.003284454345703125|unsuper_loss: 0.0 +average reward score: 6.0078125 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.88%) |Training time=0.49s (20.82%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.70 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7483|ppo_ep: 1|act_loss: -0.007537841796875|cri_loss: -0.00336456298828125|unsuper_loss: 0.0 +average reward score: 5.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.69s (75.17%) |Training time=0.46s (20.45%) |Others=0.10 (4.37%)|CurSamplesPerSec=14.27 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7484|ppo_ep: 1|act_loss: 0.002811431884765625|cri_loss: 0.0015897750854492188|unsuper_loss: 0.0 +average reward score: 5.72265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.91%) |Training time=0.46s (21.42%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7485|ppo_ep: 1|act_loss: -0.0172119140625|cri_loss: -0.00836944580078125|unsuper_loss: 0.0 +average reward score: 5.4453125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.40%) |Training time=0.45s (20.98%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7486|ppo_ep: 1|act_loss: 0.004638671875|cri_loss: 0.0026397705078125|unsuper_loss: 0.0 +average reward score: 5.3359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.90%) |Training time=0.46s (21.42%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7487|ppo_ep: 1|act_loss: -0.00982666015625|cri_loss: -0.004459381103515625|unsuper_loss: 0.0 +average reward score: 4.921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.93%) |Training time=0.46s (21.49%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7488|ppo_ep: 1|act_loss: -0.0114898681640625|cri_loss: -0.00560760498046875|unsuper_loss: 0.0 +average reward score: 5.58203125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.96%) |Training time=0.49s (22.41%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.56 +[2023-04-14 13:21:55,054] [INFO] [logging.py:96:log_dist] [Rank 0] step=7490, skipped=95, lr=[2.6509495215639946e-07, 2.6509495215639946e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:21:55,073] [INFO] [timer.py:199:stop] epoch=0/micro_step=7490/global_step=7490, RunningAvgSamplesPerSec=105.57695172591919, CurrSamplesPerSec=101.3811761080209, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:21:55,165] [INFO] [logging.py:96:log_dist] [Rank 0] step=7490, skipped=128, lr=[1.4792851930053116e-07, 1.4792851930053116e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7489|ppo_ep: 1|act_loss: -0.01031494140625|cri_loss: -0.0049896240234375|unsuper_loss: 0.0 +average reward score: 4.97265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.13%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7490|ppo_ep: 1|act_loss: 0.003208160400390625|cri_loss: 0.0019092559814453125|unsuper_loss: 0.0 +average reward score: 5.1875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.96%) |Training time=0.49s (22.40%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7491|ppo_ep: 1|act_loss: -0.0019426345825195312|cri_loss: -0.000942230224609375|unsuper_loss: 0.0 +average reward score: 5.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.96%) |Training time=0.47s (21.43%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7492|ppo_ep: 1|act_loss: -0.01708984375|cri_loss: -0.00830078125|unsuper_loss: 0.0 +average reward score: 6.19921875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.84%) |Training time=0.45s (20.47%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7493|ppo_ep: 1|act_loss: -0.003551483154296875|cri_loss: -0.0016069412231445312|unsuper_loss: 0.0 +average reward score: 5.31640625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.16%) |Training time=0.46s (21.29%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7494|ppo_ep: 1|act_loss: -0.00821685791015625|cri_loss: -0.003978729248046875|unsuper_loss: 0.0 +average reward score: 5.8515625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.98%) |Training time=0.46s (21.32%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7495|ppo_ep: 1|act_loss: 0.030670166015625|cri_loss: 0.0157470703125|unsuper_loss: 0.0 +average reward score: 5.19140625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.86%) |Training time=0.47s (21.49%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7496|ppo_ep: 1|act_loss: 0.013885498046875|cri_loss: 0.007472991943359375|unsuper_loss: 0.0 +average reward score: 5.64453125 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.63s (71.27%) |Training time=0.45s (19.92%) |Others=0.20 (8.81%)|CurSamplesPerSec=14.01 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7497|ppo_ep: 1|act_loss: -0.0068511962890625|cri_loss: -0.0031795501708984375|unsuper_loss: 0.0 +average reward score: 5.50390625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.92%) |Training time=0.45s (20.53%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7498|ppo_ep: 1|act_loss: -0.0098114013671875|cri_loss: -0.0047607421875|unsuper_loss: 0.0 +average reward score: 5.3671875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.71%) |Training time=0.47s (21.60%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.56 +[2023-04-14 13:22:16,928] [INFO] [logging.py:96:log_dist] [Rank 0] step=7500, skipped=95, lr=[2.5905615163735286e-07, 2.5905615163735286e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:22:16,946] [INFO] [timer.py:199:stop] epoch=0/micro_step=7500/global_step=7500, RunningAvgSamplesPerSec=105.57782550562153, CurrSamplesPerSec=104.88119826961066, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:22:17,039] [INFO] [logging.py:96:log_dist] [Rank 0] step=7500, skipped=128, lr=[1.4468421961145924e-07, 1.4468421961145924e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7499|ppo_ep: 1|act_loss: 0.00513458251953125|cri_loss: 0.0026531219482421875|unsuper_loss: 0.0 +average reward score: 5.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.80%) |Training time=0.47s (21.64%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7500|ppo_ep: 1|act_loss: -0.0026226043701171875|cri_loss: -0.0012683868408203125|unsuper_loss: 0.0 +average reward score: 5.91796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.44%) |Training time=0.45s (20.88%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7501|ppo_ep: 1|act_loss: -0.0023021697998046875|cri_loss: 8.96453857421875e-05|unsuper_loss: 0.0 +average reward score: 5.4296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.97%) |Training time=0.46s (21.29%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7502|ppo_ep: 1|act_loss: 0.0137939453125|cri_loss: 0.007843017578125|unsuper_loss: 0.0 +average reward score: 4.91015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.03%) |Training time=0.44s (20.38%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7503|ppo_ep: 1|act_loss: -0.0096435546875|cri_loss: -0.00400543212890625|unsuper_loss: 0.0 +average reward score: 5.3671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.85%) |Training time=0.44s (20.57%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7504|ppo_ep: 1|act_loss: -0.02020263671875|cri_loss: -0.00994873046875|unsuper_loss: 0.0 +average reward score: 5.62109375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.19%) |Training time=0.43s (20.13%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7505|ppo_ep: 1|act_loss: -0.0022716522216796875|cri_loss: -0.0006518363952636719|unsuper_loss: 0.0 +average reward score: 5.23046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.55%) |Training time=0.43s (19.86%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7506|ppo_ep: 1|act_loss: 0.011077880859375|cri_loss: 0.00572967529296875|unsuper_loss: 0.0 +average reward score: 5.21484375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.83%) |Training time=0.42s (19.46%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7507|ppo_ep: 1|act_loss: 0.001827239990234375|cri_loss: 0.0010480880737304688|unsuper_loss: 0.0 +average reward score: 5.3359375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.81%) |Training time=0.42s (19.60%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7508|ppo_ep: 1|act_loss: 0.00127410888671875|cri_loss: 0.0009026527404785156|unsuper_loss: 0.0 +average reward score: 5.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.28%) |Training time=0.43s (20.06%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.56 +[2023-04-14 13:22:38,417] [INFO] [logging.py:96:log_dist] [Rank 0] step=7510, skipped=95, lr=[2.5308502942087883e-07, 2.5308502942087883e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:22:38,436] [INFO] [timer.py:199:stop] epoch=0/micro_step=7510/global_step=7510, RunningAvgSamplesPerSec=105.59227493403863, CurrSamplesPerSec=120.76162267067473, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:22:38,528] [INFO] [logging.py:96:log_dist] [Rank 0] step=7510, skipped=128, lr=[1.4147483138414269e-07, 1.4147483138414269e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7509|ppo_ep: 1|act_loss: 0.0016269683837890625|cri_loss: 0.0010166168212890625|unsuper_loss: 0.0 +average reward score: 5.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.44%) |Training time=0.43s (19.97%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7510|ppo_ep: 1|act_loss: -0.020721435546875|cri_loss: -0.01023101806640625|unsuper_loss: 0.0 +average reward score: 5.625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.98%) |Training time=0.44s (20.35%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7511|ppo_ep: 1|act_loss: -0.04547119140625|cri_loss: -0.01995849609375|unsuper_loss: 0.0 +average reward score: 5.30078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.43%) |Training time=0.43s (20.00%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7512|ppo_ep: 1|act_loss: -0.01806640625|cri_loss: -0.00852203369140625|unsuper_loss: 0.0 +average reward score: 5.9765625 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.80s (77.03%) |Training time=0.44s (18.67%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.70 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7513|ppo_ep: 1|act_loss: 0.0228271484375|cri_loss: 0.01172637939453125|unsuper_loss: 0.0 +average reward score: 5.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.71s (76.25%) |Training time=0.44s (19.37%) |Others=0.10 (4.38%)|CurSamplesPerSec=14.23 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7514|ppo_ep: 1|act_loss: -0.031982421875|cri_loss: -0.01556396484375|unsuper_loss: 0.0 +average reward score: 5.7265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.88%) |Training time=0.44s (20.46%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56 +[2023-04-14 13:22:51,702] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 7515|ppo_ep: 1|act_loss: 0.0291748046875|cri_loss: 0.0157318115234375|unsuper_loss: 0.0 +average reward score: 5.1015625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.37%) |Training time=0.44s (20.51%) |Others=0.09 (4.11%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.56 +[2023-04-14 13:22:53,844] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 7516|ppo_ep: 1|act_loss: -0.0148773193359375|cri_loss: -0.006542205810546875|unsuper_loss: 0.0 +average reward score: 5.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.22%) |Training time=0.44s (20.61%) |Others=0.09 (4.17%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7517|ppo_ep: 1|act_loss: 0.0228118896484375|cri_loss: 0.01151275634765625|unsuper_loss: 0.0 +average reward score: 5.05859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.59%) |Training time=0.45s (20.87%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7518|ppo_ep: 1|act_loss: 0.0023403167724609375|cri_loss: 0.0014944076538085938|unsuper_loss: 0.0 +average reward score: 5.0859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.83%) |Training time=0.44s (20.50%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56 +[2023-04-14 13:23:00,186] [INFO] [logging.py:96:log_dist] [Rank 0] step=7520, skipped=95, lr=[2.471816740134132e-07, 2.471816740134132e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:23:00,204] [INFO] [timer.py:199:stop] epoch=0/micro_step=7520/global_step=7520, RunningAvgSamplesPerSec=105.6047174355037, CurrSamplesPerSec=117.03275525160899, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:23:00,297] [INFO] [logging.py:96:log_dist] [Rank 0] step=7520, skipped=130, lr=[1.3893248903978695e-07, 1.3893248903978695e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7519|ppo_ep: 1|act_loss: -0.02703857421875|cri_loss: -0.01279449462890625|unsuper_loss: 0.0 +average reward score: 5.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.10%) |Training time=0.44s (20.33%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7520|ppo_ep: 1|act_loss: 0.0146331787109375|cri_loss: 0.00946044921875|unsuper_loss: 0.0 +average reward score: 5.61328125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.85%) |Training time=0.44s (20.49%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7521|ppo_ep: 1|act_loss: 0.033538818359375|cri_loss: 0.0172271728515625|unsuper_loss: 0.0 +average reward score: 6.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.28%) |Training time=0.43s (20.13%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7522|ppo_ep: 1|act_loss: 0.00530242919921875|cri_loss: 0.0028667449951171875|unsuper_loss: 0.0 +average reward score: 5.40234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.99%) |Training time=0.44s (20.35%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7523|ppo_ep: 1|act_loss: 0.003238677978515625|cri_loss: 0.0018138885498046875|unsuper_loss: 0.0 +average reward score: 5.19921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.26%) |Training time=0.43s (20.07%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7524|ppo_ep: 1|act_loss: -0.0134124755859375|cri_loss: -0.006565093994140625|unsuper_loss: 0.0 +average reward score: 5.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.26%) |Training time=0.43s (20.06%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7525|ppo_ep: 1|act_loss: 0.0013933181762695312|cri_loss: 0.0007691383361816406|unsuper_loss: 0.0 +average reward score: 4.94140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.72%) |Training time=0.45s (20.73%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7526|ppo_ep: 1|act_loss: 0.00327301025390625|cri_loss: 0.001712799072265625|unsuper_loss: 0.0 +average reward score: 5.6171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.20%) |Training time=0.43s (20.13%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7527|ppo_ep: 1|act_loss: -0.00595855712890625|cri_loss: -0.0021209716796875|unsuper_loss: 0.0 +average reward score: 5.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.78s (77.04%) |Training time=0.43s (18.67%) |Others=0.10 (4.30%)|CurSamplesPerSec=13.88 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7528|ppo_ep: 1|act_loss: 0.03192138671875|cri_loss: 0.0162353515625|unsuper_loss: 0.0 +average reward score: 5.6953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.19%) |Training time=0.43s (20.11%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56 +[2023-04-14 13:23:21,842] [INFO] [logging.py:96:log_dist] [Rank 0] step=7530, skipped=95, lr=[2.413461729169248e-07, 2.413461729169248e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:23:21,861] [INFO] [timer.py:199:stop] epoch=0/micro_step=7530/global_step=7530, RunningAvgSamplesPerSec=105.61827921516286, CurrSamplesPerSec=111.98603613450567, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:23:21,953] [INFO] [logging.py:96:log_dist] [Rank 0] step=7530, skipped=130, lr=[1.357860609740455e-07, 1.357860609740455e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7529|ppo_ep: 1|act_loss: -0.0020847320556640625|cri_loss: 0.0007724761962890625|unsuper_loss: 0.0 +average reward score: 5.8984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.60%) |Training time=0.45s (20.83%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7530|ppo_ep: 1|act_loss: -0.008270263671875|cri_loss: -0.00394439697265625|unsuper_loss: 0.0 +average reward score: 4.8828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.35%) |Training time=0.45s (21.00%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7531|ppo_ep: 1|act_loss: 0.037750244140625|cri_loss: 0.0194091796875|unsuper_loss: 0.0 +average reward score: 5.27734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.88%) |Training time=0.44s (20.54%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7532|ppo_ep: 1|act_loss: 0.01190185546875|cri_loss: 0.00634765625|unsuper_loss: 0.0 +average reward score: 5.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.73%) |Training time=0.44s (20.61%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7533|ppo_ep: 1|act_loss: 0.0234375|cri_loss: 0.0126495361328125|unsuper_loss: 0.0 +average reward score: 5.609375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.95%) |Training time=0.44s (20.45%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7534|ppo_ep: 1|act_loss: -0.00909423828125|cri_loss: -0.004352569580078125|unsuper_loss: 0.0 +average reward score: 5.9609375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.94%) |Training time=0.44s (20.39%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7535|ppo_ep: 1|act_loss: 0.018157958984375|cri_loss: 0.0093536376953125|unsuper_loss: 0.0 +average reward score: 4.9921875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.31%) |Training time=0.43s (20.10%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7536|ppo_ep: 1|act_loss: 0.01336669921875|cri_loss: 0.007061004638671875|unsuper_loss: 0.0 +average reward score: 5.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.58%) |Training time=0.45s (20.77%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7537|ppo_ep: 1|act_loss: -0.016876220703125|cri_loss: -0.00801849365234375|unsuper_loss: 0.0 +average reward score: 4.66796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.74%) |Training time=0.44s (20.69%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7538|ppo_ep: 1|act_loss: -0.024139404296875|cri_loss: -0.0118560791015625|unsuper_loss: 0.0 +average reward score: 4.9765625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.63%) |Training time=0.44s (20.71%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56 +[2023-04-14 13:23:43,304] [INFO] [logging.py:96:log_dist] [Rank 0] step=7540, skipped=95, lr=[2.355786126276159e-07, 2.355786126276159e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:23:43,322] [INFO] [timer.py:199:stop] epoch=0/micro_step=7540/global_step=7540, RunningAvgSamplesPerSec=105.62894102683586, CurrSamplesPerSec=112.07852142696694, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:23:43,414] [INFO] [logging.py:96:log_dist] [Rank 0] step=7540, skipped=130, lr=[1.3267467626223606e-07, 1.3267467626223606e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7539|ppo_ep: 1|act_loss: 0.0186767578125|cri_loss: 0.00942230224609375|unsuper_loss: 0.0 +average reward score: 5.96484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.60%) |Training time=0.45s (20.85%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7540|ppo_ep: 1|act_loss: -0.011962890625|cri_loss: -0.005840301513671875|unsuper_loss: 0.0 +average reward score: 5.5546875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.59%) |Training time=0.44s (20.74%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7541|ppo_ep: 1|act_loss: -0.002826690673828125|cri_loss: -0.001079559326171875|unsuper_loss: 0.0 +average reward score: 5.16796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.35%) |Training time=0.43s (20.07%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7542|ppo_ep: 1|act_loss: 0.0143280029296875|cri_loss: 0.00728607177734375|unsuper_loss: 0.0 +average reward score: 5.67578125 +------------------------------------------------------------------------------------- +|E2E latency=2.40s |Gather latency=0.00s (0.00%) |Generate time=1.81s (75.39%) |Training time=0.44s (18.29%) |Others=0.15 (6.33%)|CurSamplesPerSec=13.36 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7543|ppo_ep: 1|act_loss: -0.0010290145874023438|cri_loss: 0.00040149688720703125|unsuper_loss: 0.0 +average reward score: 5.13671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.83%) |Training time=0.44s (20.59%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7544|ppo_ep: 1|act_loss: -0.026947021484375|cri_loss: -0.01317596435546875|unsuper_loss: 0.0 +average reward score: 4.73828125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.70%) |Training time=0.44s (20.63%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7545|ppo_ep: 1|act_loss: -0.00775146484375|cri_loss: -0.0036716461181640625|unsuper_loss: 0.0 +average reward score: 5.8046875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.00%) |Training time=0.44s (20.32%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7546|ppo_ep: 1|act_loss: -0.00421142578125|cri_loss: -0.0018768310546875|unsuper_loss: 0.0 +average reward score: 5.3515625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.58%) |Training time=0.45s (20.85%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7547|ppo_ep: 1|act_loss: 0.026153564453125|cri_loss: 0.01334381103515625|unsuper_loss: 0.0 +average reward score: 5.39453125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.13%) |Training time=0.43s (20.23%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7548|ppo_ep: 1|act_loss: -0.0076904296875|cri_loss: -0.003749847412109375|unsuper_loss: 0.0 +average reward score: 5.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.10%) |Training time=0.43s (20.20%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56 +[2023-04-14 13:24:05,009] [INFO] [logging.py:96:log_dist] [Rank 0] step=7550, skipped=95, lr=[2.298790786346439e-07, 2.298790786346439e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:24:05,027] [INFO] [timer.py:199:stop] epoch=0/micro_step=7550/global_step=7550, RunningAvgSamplesPerSec=105.64110098341945, CurrSamplesPerSec=114.46428324718376, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:24:05,120] [INFO] [logging.py:96:log_dist] [Rank 0] step=7550, skipped=130, lr=[1.2959838102258537e-07, 1.2959838102258537e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7549|ppo_ep: 1|act_loss: -0.001949310302734375|cri_loss: -0.0006542205810546875|unsuper_loss: 0.0 +average reward score: 5.19921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.79%) |Training time=0.44s (20.64%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7550|ppo_ep: 1|act_loss: -0.004299163818359375|cri_loss: -0.0019626617431640625|unsuper_loss: 0.0 +average reward score: 4.98828125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.95%) |Training time=0.44s (20.37%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7551|ppo_ep: 1|act_loss: 0.004322052001953125|cri_loss: 0.002300262451171875|unsuper_loss: 0.0 +average reward score: 5.3359375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.15%) |Training time=0.43s (20.26%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7552|ppo_ep: 1|act_loss: -0.00215911865234375|cri_loss: -0.0009260177612304688|unsuper_loss: 0.0 +average reward score: 5.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.82%) |Training time=0.44s (20.52%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7553|ppo_ep: 1|act_loss: 0.02783203125|cri_loss: 0.01480865478515625|unsuper_loss: 0.0 +average reward score: 5.58984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.74%) |Training time=0.44s (20.69%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7554|ppo_ep: 1|act_loss: 0.00394439697265625|cri_loss: 0.002223968505859375|unsuper_loss: 0.0 +average reward score: 5.671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.09%) |Training time=0.43s (20.23%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7555|ppo_ep: 1|act_loss: -0.0018434524536132812|cri_loss: -0.0006570816040039062|unsuper_loss: 0.0 +average reward score: 5.90625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.36%) |Training time=0.43s (20.06%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7556|ppo_ep: 1|act_loss: 0.0039520263671875|cri_loss: 0.002269744873046875|unsuper_loss: 0.0 +average reward score: 5.03125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.48%) |Training time=0.45s (20.88%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7557|ppo_ep: 1|act_loss: 0.0239105224609375|cri_loss: 0.012115478515625|unsuper_loss: 0.0 +average reward score: 5.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.00%) |Training time=0.45s (20.26%) |Others=0.17 (7.75%)|CurSamplesPerSec=14.38 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7558|ppo_ep: 1|act_loss: -0.02239990234375|cri_loss: -0.01085662841796875|unsuper_loss: 0.0 +average reward score: 5.34375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.22%) |Training time=0.43s (20.09%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56 +[2023-04-14 13:24:26,536] [INFO] [logging.py:96:log_dist] [Rank 0] step=7560, skipped=95, lr=[2.242476554188525e-07, 2.242476554188525e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:24:26,554] [INFO] [timer.py:199:stop] epoch=0/micro_step=7560/global_step=7560, RunningAvgSamplesPerSec=105.65329125870058, CurrSamplesPerSec=115.2066690872322, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:24:26,647] [INFO] [logging.py:96:log_dist] [Rank 0] step=7560, skipped=130, lr=[1.2655722085321064e-07, 1.2655722085321064e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7559|ppo_ep: 1|act_loss: -0.02569580078125|cri_loss: -0.012420654296875|unsuper_loss: 0.0 +average reward score: 6.05859375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.85%) |Training time=0.44s (20.57%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7560|ppo_ep: 1|act_loss: 0.004360198974609375|cri_loss: 0.0026397705078125|unsuper_loss: 0.0 +average reward score: 5.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.61%) |Training time=0.44s (20.72%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7561|ppo_ep: 1|act_loss: -0.00481414794921875|cri_loss: -0.002254486083984375|unsuper_loss: 0.0 +average reward score: 5.64453125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.81%) |Training time=0.44s (20.51%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7562|ppo_ep: 1|act_loss: -0.0236053466796875|cri_loss: -0.01155853271484375|unsuper_loss: 0.0 +average reward score: 5.35546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.64%) |Training time=0.45s (20.81%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7563|ppo_ep: 1|act_loss: 0.015167236328125|cri_loss: 0.007965087890625|unsuper_loss: 0.0 +average reward score: 5.40625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.81%) |Training time=0.44s (20.62%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7564|ppo_ep: 1|act_loss: -0.0111083984375|cri_loss: -0.00547027587890625|unsuper_loss: 0.0 +average reward score: 4.9609375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.76%) |Training time=0.44s (20.58%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7565|ppo_ep: 1|act_loss: 0.00646209716796875|cri_loss: 0.003376007080078125|unsuper_loss: 0.0 +average reward score: 5.390625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.82%) |Training time=0.44s (20.52%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7566|ppo_ep: 1|act_loss: -0.0036830902099609375|cri_loss: -0.001728057861328125|unsuper_loss: 0.0 +average reward score: 4.80859375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.72%) |Training time=0.44s (20.60%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7567|ppo_ep: 1|act_loss: -0.0219268798828125|cri_loss: -0.0107879638671875|unsuper_loss: 0.0 +average reward score: 5.24609375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.78%) |Training time=0.44s (20.56%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7568|ppo_ep: 1|act_loss: -0.01464080810546875|cri_loss: -0.006877899169921875|unsuper_loss: 0.0 +average reward score: 5.51171875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.68%) |Training time=0.44s (20.67%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56 +[2023-04-14 13:24:47,993] [INFO] [logging.py:96:log_dist] [Rank 0] step=7570, skipped=95, lr=[2.186844264515187e-07, 2.186844264515187e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:24:48,012] [INFO] [timer.py:199:stop] epoch=0/micro_step=7570/global_step=7570, RunningAvgSamplesPerSec=105.66395711527322, CurrSamplesPerSec=114.59886133490893, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:24:48,104] [INFO] [logging.py:96:log_dist] [Rank 0] step=7570, skipped=130, lr=[1.235512408314418e-07, 1.235512408314418e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7569|ppo_ep: 1|act_loss: 0.05194091796875|cri_loss: 0.0263824462890625|unsuper_loss: 0.0 +average reward score: 5.3359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.76%) |Training time=0.44s (20.58%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7570|ppo_ep: 1|act_loss: -0.0175018310546875|cri_loss: -0.00848388671875|unsuper_loss: 0.0 +average reward score: 5.18359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.63%) |Training time=0.44s (20.71%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7571|ppo_ep: 1|act_loss: 0.0053863525390625|cri_loss: 0.0031070709228515625|unsuper_loss: 0.0 +average reward score: 5.046875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.95%) |Training time=0.44s (20.23%) |Others=0.10 (4.81%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7572|ppo_ep: 1|act_loss: -0.00743865966796875|cri_loss: -0.0034351348876953125|unsuper_loss: 0.0 +average reward score: 5.4453125 +------------------------------------------------------------------------------------- +|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.69s (72.01%) |Training time=0.56s (23.73%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.63 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7573|ppo_ep: 1|act_loss: 0.01708984375|cri_loss: 0.0089111328125|unsuper_loss: 0.0 +average reward score: 5.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.65%) |Training time=0.44s (20.68%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7574|ppo_ep: 1|act_loss: 0.00736236572265625|cri_loss: 0.00399017333984375|unsuper_loss: 0.0 +average reward score: 5.34375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.45s (20.78%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7575|ppo_ep: 1|act_loss: -0.009674072265625|cri_loss: -0.004669189453125|unsuper_loss: 0.0 +average reward score: 4.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.71%) |Training time=0.44s (20.63%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7576|ppo_ep: 1|act_loss: 0.016326904296875|cri_loss: 0.00838470458984375|unsuper_loss: 0.0 +average reward score: 6.1953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.91%) |Training time=0.44s (20.37%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7577|ppo_ep: 1|act_loss: 0.002529144287109375|cri_loss: 0.0013599395751953125|unsuper_loss: 0.0 +average reward score: 5.19140625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.07%) |Training time=0.44s (20.33%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7578|ppo_ep: 1|act_loss: 0.126953125|cri_loss: 0.07867431640625|unsuper_loss: 0.0 +average reward score: 5.875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.40%) |Training time=0.45s (20.97%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56 +[2023-04-14 13:25:09,680] [INFO] [logging.py:96:log_dist] [Rank 0] step=7580, skipped=95, lr=[2.131894741931159e-07, 2.131894741931159e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:25:09,699] [INFO] [timer.py:199:stop] epoch=0/micro_step=7580/global_step=7580, RunningAvgSamplesPerSec=105.6733901747893, CurrSamplesPerSec=118.37709679268664, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:25:09,791] [INFO] [logging.py:96:log_dist] [Rank 0] step=7580, skipped=130, lr=[1.2058048551315455e-07, 1.2058048551315455e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7579|ppo_ep: 1|act_loss: -0.00325775146484375|cri_loss: -0.0013904571533203125|unsuper_loss: 0.0 +average reward score: 5.60546875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.10%) |Training time=0.43s (20.23%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7580|ppo_ep: 1|act_loss: -0.01525115966796875|cri_loss: -0.0068359375|unsuper_loss: 0.0 +average reward score: 5.8984375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.08%) |Training time=0.43s (20.24%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7581|ppo_ep: 1|act_loss: 0.06732177734375|cri_loss: 0.038665771484375|unsuper_loss: 0.0 +average reward score: 5.6640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.27%) |Training time=0.43s (20.10%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7582|ppo_ep: 1|act_loss: 0.00707244873046875|cri_loss: 0.0036716461181640625|unsuper_loss: 0.0 +average reward score: 5.8515625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.95%) |Training time=0.44s (20.38%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7583|ppo_ep: 1|act_loss: 0.040618896484375|cri_loss: 0.0207366943359375|unsuper_loss: 0.0 +average reward score: 5.34375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.15%) |Training time=0.43s (20.17%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7584|ppo_ep: 1|act_loss: -0.0211181640625|cri_loss: -0.01018524169921875|unsuper_loss: 0.0 +average reward score: 4.8203125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.80%) |Training time=0.44s (20.52%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7585|ppo_ep: 1|act_loss: -0.0008382797241210938|cri_loss: -0.0003597736358642578|unsuper_loss: 0.0 +average reward score: 5.27734375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.13%) |Training time=0.43s (20.19%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7586|ppo_ep: 1|act_loss: 0.00284576416015625|cri_loss: 0.0018253326416015625|unsuper_loss: 0.0 +average reward score: 5.23046875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.99%) |Training time=0.44s (20.34%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7587|ppo_ep: 1|act_loss: -0.0142364501953125|cri_loss: -0.00701904296875|unsuper_loss: 0.0 +average reward score: 5.42578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.19%) |Training time=0.43s (20.12%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7588|ppo_ep: 1|act_loss: -0.00010442733764648438|cri_loss: 0.0002875328063964844|unsuper_loss: 0.0 +average reward score: 5.3671875 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.76%) |Training time=0.44s (18.90%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.86 |AvgSamplesPerSec=14.56 +[2023-04-14 13:25:31,281] [INFO] [logging.py:96:log_dist] [Rank 0] step=7590, skipped=95, lr=[2.0776288009209243e-07, 2.0776288009209243e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:25:31,300] [INFO] [timer.py:199:stop] epoch=0/micro_step=7590/global_step=7590, RunningAvgSamplesPerSec=105.68754108352529, CurrSamplesPerSec=118.10116994467049, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:25:31,392] [INFO] [logging.py:96:log_dist] [Rank 0] step=7590, skipped=130, lr=[1.1764499893210879e-07, 1.1764499893210879e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7589|ppo_ep: 1|act_loss: -0.02447509765625|cri_loss: -0.01074981689453125|unsuper_loss: 0.0 +average reward score: 4.8828125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.08%) |Training time=0.43s (20.25%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7590|ppo_ep: 1|act_loss: -0.0019626617431640625|cri_loss: -0.0008349418640136719|unsuper_loss: 0.0 +average reward score: 5.88671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.66%) |Training time=0.44s (20.70%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7591|ppo_ep: 1|act_loss: -0.0298004150390625|cri_loss: -0.01467132568359375|unsuper_loss: 0.0 +average reward score: 4.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.93%) |Training time=0.44s (20.40%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7592|ppo_ep: 1|act_loss: -0.0089111328125|cri_loss: -0.0044097900390625|unsuper_loss: 0.0 +average reward score: 6.21875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.89%) |Training time=0.44s (20.42%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7593|ppo_ep: 1|act_loss: -0.00467681884765625|cri_loss: -0.001857757568359375|unsuper_loss: 0.0 +average reward score: 5.453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.54%) |Training time=0.45s (20.79%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7594|ppo_ep: 1|act_loss: 0.01416015625|cri_loss: 0.00722503662109375|unsuper_loss: 0.0 +average reward score: 5.8359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.45s (20.78%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7595|ppo_ep: 1|act_loss: -0.00719451904296875|cri_loss: -0.0034542083740234375|unsuper_loss: 0.0 +average reward score: 5.82421875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.22%) |Training time=0.43s (20.10%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7596|ppo_ep: 1|act_loss: -0.0211029052734375|cri_loss: -0.0099334716796875|unsuper_loss: 0.0 +average reward score: 4.90234375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.08%) |Training time=0.43s (20.23%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7597|ppo_ep: 1|act_loss: 0.003650665283203125|cri_loss: 0.002460479736328125|unsuper_loss: 0.0 +average reward score: 5.1875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.91%) |Training time=0.44s (20.41%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7598|ppo_ep: 1|act_loss: 0.00899505615234375|cri_loss: 0.0050506591796875|unsuper_loss: 0.0 +average reward score: 5.9453125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.03%) |Training time=0.43s (20.29%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.56 +[2023-04-14 13:25:52,727] [INFO] [logging.py:96:log_dist] [Rank 0] step=7600, skipped=95, lr=[2.024047245836643e-07, 2.024047245836643e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:25:52,746] [INFO] [timer.py:199:stop] epoch=0/micro_step=7600/global_step=7600, RunningAvgSamplesPerSec=105.69993310975197, CurrSamplesPerSec=117.69169550112328, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:25:52,838] [INFO] [logging.py:96:log_dist] [Rank 0] step=7600, skipped=130, lr=[1.1474482459929714e-07, 1.1474482459929714e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7599|ppo_ep: 1|act_loss: -0.0093536376953125|cri_loss: -0.0044097900390625|unsuper_loss: 0.0 +average reward score: 5.42578125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.02%) |Training time=0.43s (20.30%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7600|ppo_ep: 1|act_loss: -0.025604248046875|cri_loss: -0.0126190185546875|unsuper_loss: 0.0 +average reward score: 5.3828125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.76%) |Training time=0.44s (20.58%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7601|ppo_ep: 1|act_loss: 0.00301361083984375|cri_loss: 0.0016078948974609375|unsuper_loss: 0.0 +average reward score: 5.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.00%) |Training time=0.44s (20.33%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7602|ppo_ep: 1|act_loss: -0.020599365234375|cri_loss: -0.00994873046875|unsuper_loss: 0.0 +average reward score: 6.44140625 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.72s (73.39%) |Training time=0.45s (19.01%) |Others=0.18 (7.60%)|CurSamplesPerSec=13.65 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7603|ppo_ep: 1|act_loss: -0.0031585693359375|cri_loss: -0.001377105712890625|unsuper_loss: 0.0 +average reward score: 5.58984375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.65s (75.18%) |Training time=0.44s (19.92%) |Others=0.11 (4.89%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7604|ppo_ep: 1|act_loss: 0.00724029541015625|cri_loss: 0.0037631988525390625|unsuper_loss: 0.0 +average reward score: 5.109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.55%) |Training time=0.45s (20.80%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7605|ppo_ep: 1|act_loss: -0.00554656982421875|cri_loss: -0.0021820068359375|unsuper_loss: 0.0 +average reward score: 5.44921875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.96%) |Training time=0.44s (20.36%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7606|ppo_ep: 1|act_loss: -0.0665283203125|cri_loss: -0.032135009765625|unsuper_loss: 0.0 +average reward score: 5.16015625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.99%) |Training time=0.44s (20.33%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7607|ppo_ep: 1|act_loss: -0.0015411376953125|cri_loss: -0.0003371238708496094|unsuper_loss: 0.0 +average reward score: 5.3515625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.79%) |Training time=0.44s (20.62%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7608|ppo_ep: 1|act_loss: -0.01561737060546875|cri_loss: -0.00743865966796875|unsuper_loss: 0.0 +average reward score: 5.3671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.21%) |Training time=0.43s (19.99%) |Others=0.10 (4.80%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56 +[2023-04-14 13:26:14,473] [INFO] [logging.py:96:log_dist] [Rank 0] step=7610, skipped=95, lr=[1.9711508708862154e-07, 1.9711508708862154e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:26:14,491] [INFO] [timer.py:199:stop] epoch=0/micro_step=7610/global_step=7610, RunningAvgSamplesPerSec=105.71107588458801, CurrSamplesPerSec=105.46318940792834, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:26:14,583] [INFO] [logging.py:96:log_dist] [Rank 0] step=7610, skipped=130, lr=[1.1188000550230005e-07, 1.1188000550230005e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7609|ppo_ep: 1|act_loss: -0.0077056884765625|cri_loss: -0.0035419464111328125|unsuper_loss: 0.0 +average reward score: 5.48046875 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.22%) |Training time=0.47s (21.22%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7610|ppo_ep: 1|act_loss: 0.0023365020751953125|cri_loss: 0.0012693405151367188|unsuper_loss: 0.0 +average reward score: 5.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.99%) |Training time=0.46s (21.39%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7611|ppo_ep: 1|act_loss: 0.020172119140625|cri_loss: 0.010772705078125|unsuper_loss: 0.0 +average reward score: 5.0078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.22%) |Training time=0.46s (21.15%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7612|ppo_ep: 1|act_loss: -0.016632080078125|cri_loss: -0.0080718994140625|unsuper_loss: 0.0 +average reward score: 5.25390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.29%) |Training time=0.46s (21.08%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56 +[2023-04-14 13:26:23,123] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 7613|ppo_ep: 1|act_loss: -0.006011962890625|cri_loss: -0.0027866363525390625|unsuper_loss: 0.0 +average reward score: 5.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.06%) |Training time=0.43s (20.27%) |Others=0.10 (4.67%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7614|ppo_ep: 1|act_loss: -0.00437164306640625|cri_loss: -0.001934051513671875|unsuper_loss: 0.0 +average reward score: 5.13671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.94%) |Training time=0.46s (21.45%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7615|ppo_ep: 1|act_loss: -0.00489044189453125|cri_loss: -0.0023822784423828125|unsuper_loss: 0.0 +average reward score: 4.67578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.26%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7616|ppo_ep: 1|act_loss: -0.01690673828125|cri_loss: -0.00826263427734375|unsuper_loss: 0.0 +average reward score: 5.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.33%) |Training time=0.46s (21.02%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56 +[2023-04-14 13:26:32,062] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 7617|ppo_ep: 1|act_loss: 0.01526641845703125|cri_loss: 0.0093231201171875|unsuper_loss: 0.0 +average reward score: 5.3515625 +------------------------------------------------------------------------------------- +|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.44%) |Training time=0.49s (20.76%) |Others=0.09 (3.81%)|CurSamplesPerSec=13.61 |AvgSamplesPerSec=14.56 +[2023-04-14 13:26:34,217] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 7618|ppo_ep: 1|act_loss: 0.0286865234375|cri_loss: 0.0150604248046875|unsuper_loss: 0.0 +average reward score: 5.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.31%) |Training time=0.46s (21.55%) |Others=0.09 (4.15%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.56 +[2023-04-14 13:26:36,272] [INFO] [logging.py:96:log_dist] [Rank 0] step=7620, skipped=96, lr=[1.9241306106741956e-07, 1.9241306106741956e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:26:36,290] [INFO] [timer.py:199:stop] epoch=0/micro_step=7620/global_step=7620, RunningAvgSamplesPerSec=105.71308353833173, CurrSamplesPerSec=105.99807301686585, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:26:36,383] [INFO] [logging.py:96:log_dist] [Rank 0] step=7620, skipped=132, lr=[1.0961363454760482e-07, 1.0961363454760482e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7619|ppo_ep: 1|act_loss: 0.0058441162109375|cri_loss: 0.00321197509765625|unsuper_loss: 0.0 +average reward score: 4.8046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.96%) |Training time=0.46s (21.42%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7620|ppo_ep: 1|act_loss: -0.006824493408203125|cri_loss: -0.00334930419921875|unsuper_loss: 0.0 +average reward score: 5.609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.89%) |Training time=0.47s (21.49%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7621|ppo_ep: 1|act_loss: -0.006504058837890625|cri_loss: -0.0030193328857421875|unsuper_loss: 0.0 +average reward score: 5.19921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.91%) |Training time=0.47s (21.47%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7622|ppo_ep: 1|act_loss: 0.010223388671875|cri_loss: 0.005458831787109375|unsuper_loss: 0.0 +average reward score: 5.01953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.04%) |Training time=0.46s (21.33%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7623|ppo_ep: 1|act_loss: 0.007648468017578125|cri_loss: 0.003936767578125|unsuper_loss: 0.0 +average reward score: 5.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.46%) |Training time=0.48s (21.92%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7624|ppo_ep: 1|act_loss: 0.0036411285400390625|cri_loss: 0.002719879150390625|unsuper_loss: 0.0 +average reward score: 5.54296875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.55%) |Training time=0.48s (21.86%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7625|ppo_ep: 1|act_loss: -0.0020275115966796875|cri_loss: -0.0009050369262695312|unsuper_loss: 0.0 +average reward score: 5.45703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.42%) |Training time=0.45s (20.93%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7626|ppo_ep: 1|act_loss: 0.0220794677734375|cri_loss: 0.01171112060546875|unsuper_loss: 0.0 +average reward score: 5.48828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.99%) |Training time=0.46s (21.39%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7627|ppo_ep: 1|act_loss: 0.004680633544921875|cri_loss: 0.0029010772705078125|unsuper_loss: 0.0 +average reward score: 5.28515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.02%) |Training time=0.46s (21.36%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7628|ppo_ep: 1|act_loss: 0.018707275390625|cri_loss: 0.00984954833984375|unsuper_loss: 0.0 +average reward score: 4.9140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.93%) |Training time=0.46s (21.45%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56 +[2023-04-14 13:26:57,936] [INFO] [logging.py:96:log_dist] [Rank 0] step=7630, skipped=96, lr=[1.8725382296377066e-07, 1.8725382296377066e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:26:57,955] [INFO] [timer.py:199:stop] epoch=0/micro_step=7630/global_step=7630, RunningAvgSamplesPerSec=105.71314051584926, CurrSamplesPerSec=106.23861904871212, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:26:58,047] [INFO] [logging.py:96:log_dist] [Rank 0] step=7630, skipped=132, lr=[1.0681256153060565e-07, 1.0681256153060565e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7629|ppo_ep: 1|act_loss: -0.0301513671875|cri_loss: -0.0148773193359375|unsuper_loss: 0.0 +average reward score: 5.94140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.95%) |Training time=0.46s (21.45%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7630|ppo_ep: 1|act_loss: -0.017547607421875|cri_loss: -0.008636474609375|unsuper_loss: 0.0 +average reward score: 5.5234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.27%) |Training time=0.46s (21.12%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7631|ppo_ep: 1|act_loss: 0.0010089874267578125|cri_loss: 0.0006146430969238281|unsuper_loss: 0.0 +average reward score: 5.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.46%) |Training time=0.46s (20.99%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7632|ppo_ep: 1|act_loss: -0.004638671875|cri_loss: -0.002239227294921875|unsuper_loss: 0.0 +average reward score: 5.6640625 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.24%) |Training time=0.46s (20.33%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7633|ppo_ep: 1|act_loss: 0.0055999755859375|cri_loss: 0.0029544830322265625|unsuper_loss: 0.0 +average reward score: 5.4921875 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.68%) |Training time=0.47s (20.03%) |Others=0.10 (4.29%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7634|ppo_ep: 1|act_loss: 0.028350830078125|cri_loss: 0.014739990234375|unsuper_loss: 0.0 +average reward score: 5.52734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.74%) |Training time=0.47s (21.65%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7635|ppo_ep: 1|act_loss: 0.00738525390625|cri_loss: 0.00392913818359375|unsuper_loss: 0.0 +average reward score: 4.8828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.13%) |Training time=0.46s (21.11%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7636|ppo_ep: 1|act_loss: 0.0267791748046875|cri_loss: 0.01430511474609375|unsuper_loss: 0.0 +average reward score: 5.1015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.11%) |Training time=0.46s (21.25%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7637|ppo_ep: 1|act_loss: 0.01326751708984375|cri_loss: 0.00701904296875|unsuper_loss: 0.0 +average reward score: 5.26171875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.25%) |Training time=0.46s (21.15%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7638|ppo_ep: 1|act_loss: 0.0040130615234375|cri_loss: 0.0021495819091796875|unsuper_loss: 0.0 +average reward score: 5.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.44%) |Training time=0.46s (20.94%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56 +[2023-04-14 13:27:19,954] [INFO] [logging.py:96:log_dist] [Rank 0] step=7640, skipped=96, lr=[1.8216332744641935e-07, 1.8216332744641935e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:27:19,973] [INFO] [timer.py:199:stop] epoch=0/micro_step=7640/global_step=7640, RunningAvgSamplesPerSec=105.71413966238659, CurrSamplesPerSec=101.97505208246275, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:27:20,065] [INFO] [logging.py:96:log_dist] [Rank 0] step=7640, skipped=132, lr=[1.0404696132469445e-07, 1.0404696132469445e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7639|ppo_ep: 1|act_loss: 0.00672149658203125|cri_loss: 0.00396728515625|unsuper_loss: 0.0 +average reward score: 6.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.54%) |Training time=0.48s (21.90%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7640|ppo_ep: 1|act_loss: 0.00762939453125|cri_loss: 0.00408935546875|unsuper_loss: 0.0 +average reward score: 5.82421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.46s (21.58%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7641|ppo_ep: 1|act_loss: -0.010955810546875|cri_loss: -0.00516510009765625|unsuper_loss: 0.0 +average reward score: 5.5234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (22.01%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7642|ppo_ep: 1|act_loss: -0.00540924072265625|cri_loss: -0.002593994140625|unsuper_loss: 0.0 +average reward score: 5.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.47%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7643|ppo_ep: 1|act_loss: 0.006183624267578125|cri_loss: 0.0033664703369140625|unsuper_loss: 0.0 +average reward score: 5.4453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.33%) |Training time=0.48s (22.05%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7644|ppo_ep: 1|act_loss: 0.01395416259765625|cri_loss: 0.00766754150390625|unsuper_loss: 0.0 +average reward score: 5.77734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.98%) |Training time=0.46s (21.36%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7645|ppo_ep: 1|act_loss: -0.030029296875|cri_loss: -0.01447296142578125|unsuper_loss: 0.0 +average reward score: 5.91015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.59%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7646|ppo_ep: 1|act_loss: -0.01416015625|cri_loss: -0.0068206787109375|unsuper_loss: 0.0 +average reward score: 5.796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.88%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7647|ppo_ep: 1|act_loss: 0.0216522216796875|cri_loss: 0.01099395751953125|unsuper_loss: 0.0 +average reward score: 5.23828125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.44%) |Training time=0.48s (21.94%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7648|ppo_ep: 1|act_loss: 0.013641357421875|cri_loss: 0.0073089599609375|unsuper_loss: 0.0 +average reward score: 5.34375 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.50%) |Training time=0.50s (21.24%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.57 |AvgSamplesPerSec=14.56 +[2023-04-14 13:27:41,752] [INFO] [logging.py:96:log_dist] [Rank 0] step=7650, skipped=96, lr=[1.7714164996878907e-07, 1.7714164996878907e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:27:41,770] [INFO] [timer.py:199:stop] epoch=0/micro_step=7650/global_step=7650, RunningAvgSamplesPerSec=105.71081829530702, CurrSamplesPerSec=105.03040784290403, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:27:41,863] [INFO] [logging.py:96:log_dist] [Rank 0] step=7650, skipped=132, lr=[1.0131687492273862e-07, 1.0131687492273862e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7649|ppo_ep: 1|act_loss: 0.0198211669921875|cri_loss: 0.01026153564453125|unsuper_loss: 0.0 +average reward score: 5.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.67%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7650|ppo_ep: 1|act_loss: -0.01372528076171875|cri_loss: -0.006732940673828125|unsuper_loss: 0.0 +average reward score: 5.77734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.83%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56 +[2023-04-14 13:27:46,070] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096 +epoch: 0|step: 7651|ppo_ep: 1|act_loss: 0.006175994873046875|cri_loss: 0.0032939910888671875|unsuper_loss: 0.0 +average reward score: 5.40625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.55%) |Training time=0.44s (20.78%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7652|ppo_ep: 1|act_loss: -0.00801849365234375|cri_loss: -0.003887176513671875|unsuper_loss: 0.0 +average reward score: 5.17578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.65%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7653|ppo_ep: 1|act_loss: -0.01218414306640625|cri_loss: -0.0047760009765625|unsuper_loss: 0.0 +average reward score: 5.953125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.18%) |Training time=0.48s (22.15%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7654|ppo_ep: 1|act_loss: -0.01444244384765625|cri_loss: -0.007080078125|unsuper_loss: 0.0 +average reward score: 5.1875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.07%) |Training time=0.46s (21.27%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7655|ppo_ep: 1|act_loss: 0.0143585205078125|cri_loss: 0.007495880126953125|unsuper_loss: 0.0 +average reward score: 5.27734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.07%) |Training time=0.49s (22.41%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7656|ppo_ep: 1|act_loss: -0.0041656494140625|cri_loss: -0.001190185546875|unsuper_loss: 0.0 +average reward score: 5.484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.80%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7657|ppo_ep: 1|act_loss: 0.039520263671875|cri_loss: 0.022735595703125|unsuper_loss: 0.0 +average reward score: 6.01171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.70%) |Training time=0.47s (21.74%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7658|ppo_ep: 1|act_loss: -0.0272674560546875|cri_loss: -0.01178741455078125|unsuper_loss: 0.0 +average reward score: 5.625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.68%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56 +[2023-04-14 13:28:03,367] [INFO] [logging.py:96:log_dist] [Rank 0] step=7660, skipped=97, lr=[1.7268104120316125e-07, 1.7268104120316125e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:28:03,385] [INFO] [timer.py:199:stop] epoch=0/micro_step=7660/global_step=7660, RunningAvgSamplesPerSec=105.70880371115669, CurrSamplesPerSec=103.66849002303269, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:28:03,480] [INFO] [logging.py:96:log_dist] [Rank 0] step=7660, skipped=132, lr=[9.862234279120419e-08, 9.862234279120419e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7659|ppo_ep: 1|act_loss: 0.022857666015625|cri_loss: 0.0119781494140625|unsuper_loss: 0.0 +average reward score: 4.9453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.81%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7660|ppo_ep: 1|act_loss: -0.0127716064453125|cri_loss: -0.00572967529296875|unsuper_loss: 0.0 +average reward score: 5.04296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.72%) |Training time=0.46s (21.39%) |Others=0.11 (4.89%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7661|ppo_ep: 1|act_loss: -0.017333984375|cri_loss: -0.00823974609375|unsuper_loss: 0.0 +average reward score: 4.890625 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.66s (72.76%) |Training time=0.51s (22.55%) |Others=0.11 (4.69%)|CurSamplesPerSec=14.05 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7662|ppo_ep: 1|act_loss: 0.043792724609375|cri_loss: 0.0233612060546875|unsuper_loss: 0.0 +average reward score: 5.37109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.59%) |Training time=0.47s (21.85%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7663|ppo_ep: 1|act_loss: -0.024261474609375|cri_loss: -0.0117645263671875|unsuper_loss: 0.0 +average reward score: 5.40625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.87%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7664|ppo_ep: 1|act_loss: 0.0703125|cri_loss: 0.03778076171875|unsuper_loss: 0.0 +average reward score: 5.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.49%) |Training time=0.53s (24.00%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.40 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7665|ppo_ep: 1|act_loss: 0.0843505859375|cri_loss: 0.0477294921875|unsuper_loss: 0.0 +average reward score: 4.77734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.83%) |Training time=0.47s (21.62%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7666|ppo_ep: 1|act_loss: -0.00772857666015625|cri_loss: -0.0037326812744140625|unsuper_loss: 0.0 +average reward score: 5.58203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.30%) |Training time=0.48s (22.03%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7667|ppo_ep: 1|act_loss: 0.020172119140625|cri_loss: 0.0110626220703125|unsuper_loss: 0.0 +average reward score: 5.43359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.70%) |Training time=0.47s (21.76%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7668|ppo_ep: 1|act_loss: 0.0711669921875|cri_loss: 0.037109375|unsuper_loss: 0.0 +average reward score: 5.53515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.49%) |Training time=0.48s (21.88%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56 +[2023-04-14 13:28:25,190] [INFO] [logging.py:96:log_dist] [Rank 0] step=7670, skipped=97, lr=[1.677903222209867e-07, 1.677903222209867e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:28:25,208] [INFO] [timer.py:199:stop] epoch=0/micro_step=7670/global_step=7670, RunningAvgSamplesPerSec=105.70321438555533, CurrSamplesPerSec=105.26079819809787, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:28:25,301] [INFO] [logging.py:96:log_dist] [Rank 0] step=7670, skipped=132, lr=[9.596340486955818e-08, 9.596340486955818e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7669|ppo_ep: 1|act_loss: -0.00418853759765625|cri_loss: -0.001926422119140625|unsuper_loss: 0.0 +average reward score: 5.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.97%) |Training time=0.47s (21.51%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7670|ppo_ep: 1|act_loss: -0.0084381103515625|cri_loss: -0.003810882568359375|unsuper_loss: 0.0 +average reward score: 5.91796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.55%) |Training time=0.47s (21.84%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7671|ppo_ep: 1|act_loss: -0.00292205810546875|cri_loss: -0.001384735107421875|unsuper_loss: 0.0 +average reward score: 5.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.74%) |Training time=0.47s (21.71%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7672|ppo_ep: 1|act_loss: -0.005619049072265625|cri_loss: -0.0026702880859375|unsuper_loss: 0.0 +average reward score: 5.44140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.71%) |Training time=0.47s (21.66%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7673|ppo_ep: 1|act_loss: -0.0075225830078125|cri_loss: -0.003704071044921875|unsuper_loss: 0.0 +average reward score: 5.58203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.55%) |Training time=0.47s (21.88%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7674|ppo_ep: 1|act_loss: -0.0134735107421875|cri_loss: -0.006656646728515625|unsuper_loss: 0.0 +average reward score: 4.83984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.75%) |Training time=0.47s (21.62%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7675|ppo_ep: 1|act_loss: 0.0252227783203125|cri_loss: 0.01290130615234375|unsuper_loss: 0.0 +average reward score: 5.35546875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.26%) |Training time=0.46s (21.20%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7676|ppo_ep: 1|act_loss: -0.00504302978515625|cri_loss: -0.0024242401123046875|unsuper_loss: 0.0 +average reward score: 6.1875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.47%) |Training time=0.45s (20.89%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7677|ppo_ep: 1|act_loss: -0.0011739730834960938|cri_loss: -0.0005507469177246094|unsuper_loss: 0.0 +average reward score: 5.24609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.46s (21.18%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7678|ppo_ep: 1|act_loss: -0.03155517578125|cri_loss: -0.0152740478515625|unsuper_loss: 0.0 +average reward score: 5.546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.47%) |Training time=0.45s (20.87%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.56 +[2023-04-14 13:28:46,830] [INFO] [logging.py:96:log_dist] [Rank 0] step=7680, skipped=97, lr=[1.629686343211429e-07, 1.629686343211429e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:28:46,848] [INFO] [timer.py:199:stop] epoch=0/micro_step=7680/global_step=7680, RunningAvgSamplesPerSec=105.70482233763329, CurrSamplesPerSec=112.06785304851931, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:28:46,941] [INFO] [logging.py:96:log_dist] [Rank 0] step=7680, skipped=132, lr=[9.334010056967408e-08, 9.334010056967408e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7679|ppo_ep: 1|act_loss: -0.0015592575073242188|cri_loss: -0.0006017684936523438|unsuper_loss: 0.0 +average reward score: 5.12890625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.34%) |Training time=0.49s (22.17%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7680|ppo_ep: 1|act_loss: 0.0016498565673828125|cri_loss: 0.0011415481567382812|unsuper_loss: 0.0 +average reward score: 5.609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.42%) |Training time=0.45s (20.93%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7681|ppo_ep: 1|act_loss: 0.012451171875|cri_loss: 0.006500244140625|unsuper_loss: 0.0 +average reward score: 6.28125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.25%) |Training time=0.46s (21.21%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7682|ppo_ep: 1|act_loss: 0.0462646484375|cri_loss: 0.0240325927734375|unsuper_loss: 0.0 +average reward score: 6.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.40%) |Training time=0.45s (20.95%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7683|ppo_ep: 1|act_loss: -0.0667724609375|cri_loss: -0.01611328125|unsuper_loss: 0.0 +average reward score: 5.12890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.46%) |Training time=0.45s (20.97%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7684|ppo_ep: 1|act_loss: -0.0005736351013183594|cri_loss: -0.00011801719665527344|unsuper_loss: 0.0 +average reward score: 5.16796875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.85%) |Training time=0.47s (21.55%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7685|ppo_ep: 1|act_loss: 0.015594482421875|cri_loss: 0.0088348388671875|unsuper_loss: 0.0 +average reward score: 6.34765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.08%) |Training time=0.46s (21.39%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7686|ppo_ep: 1|act_loss: -0.006443023681640625|cri_loss: -0.0030460357666015625|unsuper_loss: 0.0 +average reward score: 5.4921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.61%) |Training time=0.47s (21.66%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7687|ppo_ep: 1|act_loss: 0.0017538070678710938|cri_loss: 0.0012903213500976562|unsuper_loss: 0.0 +average reward score: 5.6875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.76%) |Training time=0.47s (21.69%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7688|ppo_ep: 1|act_loss: -0.0147247314453125|cri_loss: -0.0071258544921875|unsuper_loss: 0.0 +average reward score: 5.1796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.82%) |Training time=0.47s (21.55%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56 +[2023-04-14 13:29:08,495] [INFO] [logging.py:96:log_dist] [Rank 0] step=7690, skipped=97, lr=[1.5821604897267761e-07, 1.5821604897267761e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:29:08,514] [INFO] [timer.py:199:stop] epoch=0/micro_step=7690/global_step=7690, RunningAvgSamplesPerSec=105.70594568464799, CurrSamplesPerSec=104.29189793176543, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:29:08,609] [INFO] [logging.py:96:log_dist] [Rank 0] step=7690, skipped=132, lr=[9.075246877525034e-08, 9.075246877525034e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7689|ppo_ep: 1|act_loss: -0.00147247314453125|cri_loss: 0.00119781494140625|unsuper_loss: 0.0 +average reward score: 5.33984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.80%) |Training time=0.47s (21.60%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7690|ppo_ep: 1|act_loss: 0.002208709716796875|cri_loss: 0.0012598037719726562|unsuper_loss: 0.0 +average reward score: 5.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.63%) |Training time=0.47s (21.52%) |Others=0.11 (4.85%)|CurSamplesPerSec=14.54 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7691|ppo_ep: 1|act_loss: 0.0294189453125|cri_loss: 0.014984130859375|unsuper_loss: 0.0 +average reward score: 5.1640625 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.71s (73.93%) |Training time=0.50s (21.81%) |Others=0.10 (4.26%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7692|ppo_ep: 1|act_loss: 0.0054931640625|cri_loss: 0.0031280517578125|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.76%) |Training time=0.48s (21.69%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.54 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7693|ppo_ep: 1|act_loss: 0.051361083984375|cri_loss: 0.02728271484375|unsuper_loss: 0.0 +average reward score: 5.859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.00%) |Training time=0.47s (21.47%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7694|ppo_ep: 1|act_loss: 0.024505615234375|cri_loss: 0.0128173828125|unsuper_loss: 0.0 +average reward score: 5.4296875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.84%) |Training time=0.47s (21.53%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7695|ppo_ep: 1|act_loss: -0.010711669921875|cri_loss: -0.0051727294921875|unsuper_loss: 0.0 +average reward score: 5.23046875 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.77s (75.10%) |Training time=0.49s (20.74%) |Others=0.10 (4.16%)|CurSamplesPerSec=13.55 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7696|ppo_ep: 1|act_loss: -0.0024051666259765625|cri_loss: -0.0011453628540039062|unsuper_loss: 0.0 +average reward score: 5.12890625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.94%) |Training time=0.47s (21.44%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7697|ppo_ep: 1|act_loss: 0.009765625|cri_loss: 0.004962921142578125|unsuper_loss: 0.0 +average reward score: 4.58203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.14%) |Training time=0.46s (21.29%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7698|ppo_ep: 1|act_loss: 0.045196533203125|cri_loss: 0.02325439453125|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.31%) |Training time=0.46s (21.06%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.56 +[2023-04-14 13:29:30,655] [INFO] [logging.py:96:log_dist] [Rank 0] step=7700, skipped=97, lr=[1.5353263662036897e-07, 1.5353263662036897e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:29:30,673] [INFO] [timer.py:199:stop] epoch=0/micro_step=7700/global_step=7700, RunningAvgSamplesPerSec=105.7013104552143, CurrSamplesPerSec=98.56926587346759, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:29:30,766] [INFO] [logging.py:96:log_dist] [Rank 0] step=7700, skipped=132, lr=[8.820054784123288e-08, 8.820054784123288e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7699|ppo_ep: 1|act_loss: 0.003551483154296875|cri_loss: 0.0018930435180664062|unsuper_loss: 0.0 +average reward score: 5.65234375 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.31%) |Training time=0.49s (22.24%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.55 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7700|ppo_ep: 1|act_loss: -0.022552490234375|cri_loss: -0.009979248046875|unsuper_loss: 0.0 +average reward score: 5.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.00%) |Training time=0.46s (21.38%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7701|ppo_ep: 1|act_loss: 0.0167694091796875|cri_loss: 0.00882720947265625|unsuper_loss: 0.0 +average reward score: 5.36328125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.75%) |Training time=0.47s (21.64%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7702|ppo_ep: 1|act_loss: -0.024078369140625|cri_loss: -0.01169586181640625|unsuper_loss: 0.0 +average reward score: 5.09375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (21.99%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7703|ppo_ep: 1|act_loss: -0.004787445068359375|cri_loss: -0.00232696533203125|unsuper_loss: 0.0 +average reward score: 5.28125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.13%) |Training time=0.49s (22.36%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7704|ppo_ep: 1|act_loss: -0.046112060546875|cri_loss: -0.019561767578125|unsuper_loss: 0.0 +average reward score: 6.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.56%) |Training time=0.47s (21.80%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7705|ppo_ep: 1|act_loss: -0.0125885009765625|cri_loss: -0.00618743896484375|unsuper_loss: 0.0 +average reward score: 5.00390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.47%) |Training time=0.48s (22.00%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7706|ppo_ep: 1|act_loss: -0.0083465576171875|cri_loss: -0.0037059783935546875|unsuper_loss: 0.0 +average reward score: 5.76953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.12%) |Training time=0.48s (22.27%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7707|ppo_ep: 1|act_loss: -0.0010509490966796875|cri_loss: -0.0004482269287109375|unsuper_loss: 0.0 +average reward score: 5.61328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.52%) |Training time=0.48s (21.95%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7708|ppo_ep: 1|act_loss: -0.00623321533203125|cri_loss: -0.002918243408203125|unsuper_loss: 0.0 +average reward score: 5.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.03%) |Training time=0.49s (22.34%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.56 +[2023-04-14 13:29:52,369] [INFO] [logging.py:96:log_dist] [Rank 0] step=7710, skipped=97, lr=[1.489184666836874e-07, 1.489184666836874e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:29:52,387] [INFO] [timer.py:199:stop] epoch=0/micro_step=7710/global_step=7710, RunningAvgSamplesPerSec=105.69619158975686, CurrSamplesPerSec=102.90253227929671, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:29:52,479] [INFO] [logging.py:96:log_dist] [Rank 0] step=7710, skipped=132, lr=[8.568437559324572e-08, 8.568437559324572e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7709|ppo_ep: 1|act_loss: 0.001888275146484375|cri_loss: 0.00121307373046875|unsuper_loss: 0.0 +average reward score: 5.328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.66%) |Training time=0.47s (21.82%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7710|ppo_ep: 1|act_loss: 0.0034389495849609375|cri_loss: 0.0018596649169921875|unsuper_loss: 0.0 +average reward score: 5.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.81%) |Training time=0.49s (20.76%) |Others=0.10 (4.43%)|CurSamplesPerSec=13.57 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7711|ppo_ep: 1|act_loss: 0.0123748779296875|cri_loss: 0.006412506103515625|unsuper_loss: 0.0 +average reward score: 4.93359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.85%) |Training time=0.47s (21.61%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7712|ppo_ep: 1|act_loss: 0.006988525390625|cri_loss: 0.00377655029296875|unsuper_loss: 0.0 +average reward score: 5.87890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.48%) |Training time=0.47s (21.90%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7713|ppo_ep: 1|act_loss: -0.0089569091796875|cri_loss: -0.00347900390625|unsuper_loss: 0.0 +average reward score: 5.6484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.80%) |Training time=0.47s (21.64%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7714|ppo_ep: 1|act_loss: 0.007404327392578125|cri_loss: 0.00394439697265625|unsuper_loss: 0.0 +average reward score: 5.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.35%) |Training time=0.48s (22.01%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.58 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7715|ppo_ep: 1|act_loss: 0.020233154296875|cri_loss: 0.0104522705078125|unsuper_loss: 0.0 +average reward score: 5.3515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.74%) |Training time=0.47s (21.72%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7716|ppo_ep: 1|act_loss: 0.0013427734375|cri_loss: 0.000820159912109375|unsuper_loss: 0.0 +average reward score: 5.015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.50%) |Training time=0.47s (21.87%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7717|ppo_ep: 1|act_loss: -0.0102996826171875|cri_loss: -0.0048675537109375|unsuper_loss: 0.0 +average reward score: 5.0078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.96%) |Training time=0.49s (22.51%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7718|ppo_ep: 1|act_loss: 0.0143280029296875|cri_loss: 0.00757598876953125|unsuper_loss: 0.0 +average reward score: 5.59375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.98%) |Training time=0.48s (22.38%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56 +[2023-04-14 13:30:14,266] [INFO] [logging.py:96:log_dist] [Rank 0] step=7720, skipped=97, lr=[1.4437360755576166e-07, 1.4437360755576166e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:30:14,285] [INFO] [timer.py:199:stop] epoch=0/micro_step=7720/global_step=7720, RunningAvgSamplesPerSec=105.68932583761074, CurrSamplesPerSec=93.3791178439091, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:30:14,369] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +[2023-04-14 13:30:14,369] [INFO] [logging.py:96:log_dist] [Rank 0] step=7720, skipped=133, lr=[8.345041653250202e-08, 8.345041653250202e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7719|ppo_ep: 1|act_loss: 0.0103302001953125|cri_loss: 0.005672454833984375|unsuper_loss: 0.0 +average reward score: 5.03515625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.82%) |Training time=0.50s (23.17%) |Others=0.09 (4.02%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.56 +[2023-04-14 13:30:16,708] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 7720|ppo_ep: 1|act_loss: 0.002162933349609375|cri_loss: 0.00196075439453125|unsuper_loss: 0.0 +average reward score: 5.2109375 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.69s (72.27%) |Training time=0.56s (23.90%) |Others=0.09 (3.83%)|CurSamplesPerSec=13.68 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7721|ppo_ep: 1|act_loss: -0.01486968994140625|cri_loss: -0.0072174072265625|unsuper_loss: 0.0 +average reward score: 5.69140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.93%) |Training time=0.49s (22.55%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7722|ppo_ep: 1|act_loss: -0.00432586669921875|cri_loss: -0.0019664764404296875|unsuper_loss: 0.0 +average reward score: 5.30859375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.66%) |Training time=0.49s (22.74%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7723|ppo_ep: 1|act_loss: 0.0094451904296875|cri_loss: 0.005096435546875|unsuper_loss: 0.0 +average reward score: 5.671875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.84%) |Training time=0.47s (21.67%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7724|ppo_ep: 1|act_loss: -2.2649765014648438e-05|cri_loss: 0.0001900196075439453|unsuper_loss: 0.0 +average reward score: 4.9609375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.38%) |Training time=0.48s (22.01%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7725|ppo_ep: 1|act_loss: 0.022003173828125|cri_loss: 0.011322021484375|unsuper_loss: 0.0 +average reward score: 5.52734375 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.77s (74.95%) |Training time=0.49s (20.88%) |Others=0.10 (4.16%)|CurSamplesPerSec=13.57 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7726|ppo_ep: 1|act_loss: -0.0091705322265625|cri_loss: -0.0045166015625|unsuper_loss: 0.0 +average reward score: 5.671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.31%) |Training time=0.46s (21.06%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7727|ppo_ep: 1|act_loss: 0.0120849609375|cri_loss: 0.0061798095703125|unsuper_loss: 0.0 +average reward score: 5.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.73%) |Training time=0.45s (20.72%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7728|ppo_ep: 1|act_loss: -0.005695343017578125|cri_loss: -0.0025634765625|unsuper_loss: 0.0 +average reward score: 5.38671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.17%) |Training time=0.46s (21.19%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56 +[2023-04-14 13:30:36,328] [INFO] [logging.py:96:log_dist] [Rank 0] step=7730, skipped=97, lr=[1.398981266023686e-07, 1.398981266023686e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:30:36,346] [INFO] [timer.py:199:stop] epoch=0/micro_step=7730/global_step=7730, RunningAvgSamplesPerSec=105.68184628578439, CurrSamplesPerSec=101.17140408416168, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:30:36,439] [INFO] [logging.py:96:log_dist] [Rank 0] step=7730, skipped=134, lr=[8.124547094506025e-08, 8.124547094506025e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7729|ppo_ep: 1|act_loss: 0.00047397613525390625|cri_loss: 0.00033283233642578125|unsuper_loss: 0.0 +average reward score: 5.609375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.67%) |Training time=0.48s (21.85%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7730|ppo_ep: 1|act_loss: 0.013671875|cri_loss: 0.006999969482421875|unsuper_loss: 0.0 +average reward score: 5.87109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.29%) |Training time=0.45s (20.99%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7731|ppo_ep: 1|act_loss: -0.02764892578125|cri_loss: -0.01317596435546875|unsuper_loss: 0.0 +average reward score: 5.47265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.26%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7732|ppo_ep: 1|act_loss: -0.032623291015625|cri_loss: -0.0156707763671875|unsuper_loss: 0.0 +average reward score: 5.41796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.18%) |Training time=0.46s (21.20%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7733|ppo_ep: 1|act_loss: -0.005161285400390625|cri_loss: -0.002452850341796875|unsuper_loss: 0.0 +average reward score: 5.03515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.67%) |Training time=0.45s (20.77%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7734|ppo_ep: 1|act_loss: 0.048309326171875|cri_loss: 0.0247039794921875|unsuper_loss: 0.0 +average reward score: 5.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.44%) |Training time=0.45s (20.94%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7735|ppo_ep: 1|act_loss: -0.01873779296875|cri_loss: -0.0092926025390625|unsuper_loss: 0.0 +average reward score: 5.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.31%) |Training time=0.46s (21.14%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7736|ppo_ep: 1|act_loss: -0.00949859619140625|cri_loss: -0.0045318603515625|unsuper_loss: 0.0 +average reward score: 6.01171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.20%) |Training time=0.46s (21.18%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7737|ppo_ep: 1|act_loss: -0.021820068359375|cri_loss: -0.004364013671875|unsuper_loss: 0.0 +average reward score: 5.234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.50%) |Training time=0.45s (20.94%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7738|ppo_ep: 1|act_loss: -0.026641845703125|cri_loss: -0.013214111328125|unsuper_loss: 0.0 +average reward score: 5.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.18%) |Training time=0.46s (21.21%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56 +[2023-04-14 13:30:57,937] [INFO] [logging.py:96:log_dist] [Rank 0] step=7740, skipped=97, lr=[1.354920901609319e-07, 1.354920901609319e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:30:57,955] [INFO] [timer.py:199:stop] epoch=0/micro_step=7740/global_step=7740, RunningAvgSamplesPerSec=105.68622715622114, CurrSamplesPerSec=109.06858467666899, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:30:58,048] [INFO] [logging.py:96:log_dist] [Rank 0] step=7740, skipped=134, lr=[7.882959173781374e-08, 7.882959173781374e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7739|ppo_ep: 1|act_loss: -0.001194000244140625|cri_loss: -0.0004506111145019531|unsuper_loss: 0.0 +average reward score: 5.8203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.33%) |Training time=0.46s (21.12%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7740|ppo_ep: 1|act_loss: 0.00063323974609375|cri_loss: 0.0004687309265136719|unsuper_loss: 0.0 +average reward score: 6.19140625 +------------------------------------------------------------------------------------- +|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.61%) |Training time=0.47s (20.11%) |Others=0.10 (4.27%)|CurSamplesPerSec=13.62 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7741|ppo_ep: 1|act_loss: 0.02520751953125|cri_loss: 0.012786865234375|unsuper_loss: 0.0 +average reward score: 5.8515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.28%) |Training time=0.46s (21.19%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7742|ppo_ep: 1|act_loss: -0.0069122314453125|cri_loss: -0.0032215118408203125|unsuper_loss: 0.0 +average reward score: 5.71875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.31%) |Training time=0.46s (21.07%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7743|ppo_ep: 1|act_loss: -0.00384521484375|cri_loss: -0.0016021728515625|unsuper_loss: 0.0 +average reward score: 4.95703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.24%) |Training time=0.46s (21.21%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7744|ppo_ep: 1|act_loss: 0.006366729736328125|cri_loss: 0.0032444000244140625|unsuper_loss: 0.0 +average reward score: 5.375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.45%) |Training time=0.45s (20.83%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7745|ppo_ep: 1|act_loss: 0.0296478271484375|cri_loss: 0.0156097412109375|unsuper_loss: 0.0 +average reward score: 5.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.11%) |Training time=0.46s (21.34%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7746|ppo_ep: 1|act_loss: -0.00768280029296875|cri_loss: -0.0029754638671875|unsuper_loss: 0.0 +average reward score: 5.71875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.07%) |Training time=0.46s (21.31%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7747|ppo_ep: 1|act_loss: 0.0009546279907226562|cri_loss: 0.002101898193359375|unsuper_loss: 0.0 +average reward score: 4.83203125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.46%) |Training time=0.46s (21.03%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7748|ppo_ep: 1|act_loss: 0.00020587444305419922|cri_loss: 0.00020742416381835938|unsuper_loss: 0.0 +average reward score: 5.21875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.90%) |Training time=0.47s (21.52%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.56 +[2023-04-14 13:31:19,832] [INFO] [logging.py:96:log_dist] [Rank 0] step=7750, skipped=97, lr=[1.3115556353954087e-07, 1.3115556353954087e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:31:19,851] [INFO] [timer.py:199:stop] epoch=0/micro_step=7750/global_step=7750, RunningAvgSamplesPerSec=105.68781927307126, CurrSamplesPerSec=104.16773291257819, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:31:19,943] [INFO] [logging.py:96:log_dist] [Rank 0] step=7750, skipped=134, lr=[7.644960011677305e-08, 7.644960011677305e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7749|ppo_ep: 1|act_loss: 0.00521087646484375|cri_loss: 0.0029754638671875|unsuper_loss: 0.0 +average reward score: 5.125 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.25%) |Training time=0.47s (21.31%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.52 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7750|ppo_ep: 1|act_loss: 0.004547119140625|cri_loss: 0.0027065277099609375|unsuper_loss: 0.0 +average reward score: 5.1015625 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.28%) |Training time=0.46s (20.31%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.11 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7751|ppo_ep: 1|act_loss: -0.00215911865234375|cri_loss: -0.00099945068359375|unsuper_loss: 0.0 +average reward score: 5.48828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.16%) |Training time=0.46s (21.32%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7752|ppo_ep: 1|act_loss: 0.0113067626953125|cri_loss: 0.0062255859375|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.09%) |Training time=0.46s (21.28%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7753|ppo_ep: 1|act_loss: -0.0093536376953125|cri_loss: -0.004302978515625|unsuper_loss: 0.0 +average reward score: 5.34375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.37%) |Training time=0.46s (21.10%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7754|ppo_ep: 1|act_loss: 0.0090179443359375|cri_loss: 0.004871368408203125|unsuper_loss: 0.0 +average reward score: 6.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.28%) |Training time=0.46s (21.09%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7755|ppo_ep: 1|act_loss: 0.05841064453125|cri_loss: 0.031768798828125|unsuper_loss: 0.0 +average reward score: 5.1953125 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.84%) |Training time=0.47s (19.97%) |Others=0.10 (4.19%)|CurSamplesPerSec=13.65 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7756|ppo_ep: 1|act_loss: -0.0202178955078125|cri_loss: -0.0099334716796875|unsuper_loss: 0.0 +average reward score: 5.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.11%) |Training time=0.46s (21.27%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7757|ppo_ep: 1|act_loss: 0.00994873046875|cri_loss: 0.00514984130859375|unsuper_loss: 0.0 +average reward score: 5.3125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.32%) |Training time=0.46s (21.14%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7758|ppo_ep: 1|act_loss: 0.0001895427703857422|cri_loss: 0.00017499923706054688|unsuper_loss: 0.0 +average reward score: 5.12890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.77%) |Training time=0.47s (21.61%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56 +[2023-04-14 13:31:41,798] [INFO] [logging.py:96:log_dist] [Rank 0] step=7760, skipped=97, lr=[1.2688861101598217e-07, 1.2688861101598217e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:31:41,816] [INFO] [timer.py:199:stop] epoch=0/micro_step=7760/global_step=7760, RunningAvgSamplesPerSec=105.68886827324046, CurrSamplesPerSec=102.76134876744779, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:31:41,909] [INFO] [logging.py:96:log_dist] [Rank 0] step=7760, skipped=134, lr=[7.410553135915572e-08, 7.410553135915572e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7759|ppo_ep: 1|act_loss: -0.013427734375|cri_loss: -0.006565093994140625|unsuper_loss: 0.0 +average reward score: 5.68359375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.77%) |Training time=0.47s (21.71%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7760|ppo_ep: 1|act_loss: 0.020599365234375|cri_loss: 0.0105438232421875|unsuper_loss: 0.0 +average reward score: 5.6796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.02%) |Training time=0.46s (21.36%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7761|ppo_ep: 1|act_loss: -0.002109527587890625|cri_loss: -0.0009775161743164062|unsuper_loss: 0.0 +average reward score: 5.94921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.09%) |Training time=0.46s (21.20%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7762|ppo_ep: 1|act_loss: -0.01495361328125|cri_loss: -0.007282257080078125|unsuper_loss: 0.0 +average reward score: 6.00390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.16%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7763|ppo_ep: 1|act_loss: 0.0152587890625|cri_loss: 0.00789642333984375|unsuper_loss: 0.0 +average reward score: 5.45703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.22%) |Training time=0.46s (21.24%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7764|ppo_ep: 1|act_loss: 0.0494384765625|cri_loss: 0.02557373046875|unsuper_loss: 0.0 +average reward score: 5.34375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.22%) |Training time=0.46s (21.14%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7765|ppo_ep: 1|act_loss: -0.00846099853515625|cri_loss: -0.0038127899169921875|unsuper_loss: 0.0 +average reward score: 5.59375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.26%) |Training time=0.46s (21.19%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7766|ppo_ep: 1|act_loss: -0.014495849609375|cri_loss: -0.00702667236328125|unsuper_loss: 0.0 +average reward score: 4.88671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.32%) |Training time=0.45s (21.04%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7767|ppo_ep: 1|act_loss: 0.02105712890625|cri_loss: 0.01104736328125|unsuper_loss: 0.0 +average reward score: 5.6484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.30%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7768|ppo_ep: 1|act_loss: -0.0157470703125|cri_loss: -0.0063934326171875|unsuper_loss: 0.0 +average reward score: 5.47265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.25%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56 +[2023-04-14 13:32:03,410] [INFO] [logging.py:96:log_dist] [Rank 0] step=7770, skipped=97, lr=[1.2269129583678402e-07, 1.2269129583678402e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:32:03,429] [INFO] [timer.py:199:stop] epoch=0/micro_step=7770/global_step=7770, RunningAvgSamplesPerSec=105.69178947512732, CurrSamplesPerSec=107.38045341847963, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:32:03,521] [INFO] [logging.py:96:log_dist] [Rank 0] step=7770, skipped=134, lr=[7.17974202097152e-08, 7.17974202097152e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7769|ppo_ep: 1|act_loss: -0.012664794921875|cri_loss: -0.006145477294921875|unsuper_loss: 0.0 +average reward score: 5.06640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.07%) |Training time=0.46s (21.31%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7770|ppo_ep: 1|act_loss: 0.01064300537109375|cri_loss: 0.0056610107421875|unsuper_loss: 0.0 +average reward score: 5.171875 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.60s (70.19%) |Training time=0.58s (25.50%) |Others=0.10 (4.31%)|CurSamplesPerSec=14.04 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7771|ppo_ep: 1|act_loss: -0.0009503364562988281|cri_loss: -0.00011730194091796875|unsuper_loss: 0.0 +average reward score: 5.25 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.07%) |Training time=0.46s (21.39%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7772|ppo_ep: 1|act_loss: -0.03314208984375|cri_loss: -0.0156707763671875|unsuper_loss: 0.0 +average reward score: 5.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.10%) |Training time=0.46s (21.27%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7773|ppo_ep: 1|act_loss: -0.012664794921875|cri_loss: -0.006103515625|unsuper_loss: 0.0 +average reward score: 4.87890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.13%) |Training time=0.46s (21.31%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7774|ppo_ep: 1|act_loss: 0.0161590576171875|cri_loss: 0.00826263427734375|unsuper_loss: 0.0 +average reward score: 5.40234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.94%) |Training time=0.47s (21.42%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7775|ppo_ep: 1|act_loss: -0.0257110595703125|cri_loss: -0.0124664306640625|unsuper_loss: 0.0 +average reward score: 4.8984375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.43%) |Training time=0.46s (21.06%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7776|ppo_ep: 1|act_loss: -0.032073974609375|cri_loss: -0.014984130859375|unsuper_loss: 0.0 +average reward score: 5.5703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.03%) |Training time=0.46s (21.29%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7777|ppo_ep: 1|act_loss: -0.016937255859375|cri_loss: -0.007793426513671875|unsuper_loss: 0.0 +average reward score: 5.93359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.11%) |Training time=0.46s (21.33%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7778|ppo_ep: 1|act_loss: -0.0005383491516113281|cri_loss: -0.00024020671844482422|unsuper_loss: 0.0 +average reward score: 5.74609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.08%) |Training time=0.46s (21.32%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56 +[2023-04-14 13:32:25,299] [INFO] [logging.py:96:log_dist] [Rank 0] step=7780, skipped=97, lr=[1.1856368021628366e-07, 1.1856368021628366e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:32:25,318] [INFO] [timer.py:199:stop] epoch=0/micro_step=7780/global_step=7780, RunningAvgSamplesPerSec=105.68915657133162, CurrSamplesPerSec=104.81370925924769, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:32:25,410] [INFO] [logging.py:96:log_dist] [Rank 0] step=7780, skipped=134, lr=[6.952530088022713e-08, 6.952530088022713e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7779|ppo_ep: 1|act_loss: 0.00714874267578125|cri_loss: 0.0036907196044921875|unsuper_loss: 0.0 +average reward score: 5.87109375 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.72s (75.23%) |Training time=0.47s (20.48%) |Others=0.10 (4.29%)|CurSamplesPerSec=14.01 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7780|ppo_ep: 1|act_loss: -0.004230499267578125|cri_loss: -0.0018758773803710938|unsuper_loss: 0.0 +average reward score: 5.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.24%) |Training time=0.46s (21.13%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7781|ppo_ep: 1|act_loss: 0.04168701171875|cri_loss: 0.0229034423828125|unsuper_loss: 0.0 +average reward score: 5.515625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.52%) |Training time=0.45s (20.92%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7782|ppo_ep: 1|act_loss: -0.016357421875|cri_loss: -0.00785064697265625|unsuper_loss: 0.0 +average reward score: 6.0078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.38%) |Training time=0.45s (20.97%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7783|ppo_ep: 1|act_loss: 0.010711669921875|cri_loss: 0.005565643310546875|unsuper_loss: 0.0 +average reward score: 5.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.44%) |Training time=0.45s (20.91%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7784|ppo_ep: 1|act_loss: -0.024505615234375|cri_loss: -0.0116729736328125|unsuper_loss: 0.0 +average reward score: 5.15625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.36%) |Training time=0.45s (21.00%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7785|ppo_ep: 1|act_loss: 0.01143646240234375|cri_loss: 0.00632476806640625|unsuper_loss: 0.0 +average reward score: 5.28125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.44%) |Training time=0.45s (20.91%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7786|ppo_ep: 1|act_loss: -0.0153656005859375|cri_loss: -0.007503509521484375|unsuper_loss: 0.0 +average reward score: 5.53515625 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.46%) |Training time=0.44s (19.20%) |Others=0.10 (4.34%)|CurSamplesPerSec=13.82 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7787|ppo_ep: 1|act_loss: -0.03143310546875|cri_loss: -0.014678955078125|unsuper_loss: 0.0 +average reward score: 5.1328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.37%) |Training time=0.45s (21.07%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7788|ppo_ep: 1|act_loss: -0.008697509765625|cri_loss: -0.0040130615234375|unsuper_loss: 0.0 +average reward score: 5.13671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.91%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.56 +[2023-04-14 13:32:47,019] [INFO] [logging.py:96:log_dist] [Rank 0] step=7790, skipped=97, lr=[1.1450582533570157e-07, 1.1450582533570157e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:32:47,038] [INFO] [timer.py:199:stop] epoch=0/micro_step=7790/global_step=7790, RunningAvgSamplesPerSec=105.69526710763964, CurrSamplesPerSec=110.04501902986237, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:32:47,131] [INFO] [logging.py:96:log_dist] [Rank 0] step=7790, skipped=134, lr=[6.72892070489814e-08, 6.72892070489814e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7789|ppo_ep: 1|act_loss: 0.032135009765625|cri_loss: 0.016845703125|unsuper_loss: 0.0 +average reward score: 5.25390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.39%) |Training time=0.45s (20.95%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7790|ppo_ep: 1|act_loss: 0.01026153564453125|cri_loss: 0.005901336669921875|unsuper_loss: 0.0 +average reward score: 5.9765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.20%) |Training time=0.46s (21.17%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56 +[2023-04-14 13:32:51,323] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 7791|ppo_ep: 1|act_loss: 0.0299835205078125|cri_loss: 0.01529693603515625|unsuper_loss: 0.0 +average reward score: 6.15625 +------------------------------------------------------------------------------------- +|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.65%) |Training time=0.42s (19.73%) |Others=0.10 (4.62%)|CurSamplesPerSec=15.07 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7792|ppo_ep: 1|act_loss: 0.0181121826171875|cri_loss: 0.00939178466796875|unsuper_loss: 0.0 +average reward score: 5.65625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.91%) |Training time=0.44s (20.41%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7793|ppo_ep: 1|act_loss: 0.05157470703125|cri_loss: 0.028289794921875|unsuper_loss: 0.0 +average reward score: 5.875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.64%) |Training time=0.45s (20.79%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7794|ppo_ep: 1|act_loss: 0.0053863525390625|cri_loss: 0.00293731689453125|unsuper_loss: 0.0 +average reward score: 5.12109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.55%) |Training time=0.45s (20.79%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7795|ppo_ep: 1|act_loss: 0.0040130615234375|cri_loss: 0.0021514892578125|unsuper_loss: 0.0 +average reward score: 5.46484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.76%) |Training time=0.44s (20.67%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7796|ppo_ep: 1|act_loss: -0.007541656494140625|cri_loss: -0.0037384033203125|unsuper_loss: 0.0 +average reward score: 5.26171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.53%) |Training time=0.45s (20.82%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7797|ppo_ep: 1|act_loss: 0.0087738037109375|cri_loss: 0.00452423095703125|unsuper_loss: 0.0 +average reward score: 6.40625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.63%) |Training time=0.45s (20.80%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7798|ppo_ep: 1|act_loss: -0.00363922119140625|cri_loss: -0.0017547607421875|unsuper_loss: 0.0 +average reward score: 5.765625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.44%) |Training time=0.45s (20.90%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.56 +[2023-04-14 13:33:08,492] [INFO] [logging.py:96:log_dist] [Rank 0] step=7800, skipped=98, lr=[1.1091345110884795e-07, 1.1091345110884795e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:33:08,510] [INFO] [timer.py:199:stop] epoch=0/micro_step=7800/global_step=7800, RunningAvgSamplesPerSec=105.7045714232828, CurrSamplesPerSec=112.16469150844927, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:33:08,603] [INFO] [logging.py:96:log_dist] [Rank 0] step=7800, skipped=134, lr=[6.508917186028196e-08, 6.508917186028196e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7799|ppo_ep: 1|act_loss: 0.03814697265625|cri_loss: 0.020233154296875|unsuper_loss: 0.0 +average reward score: 5.65625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.50%) |Training time=0.45s (20.85%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7800|ppo_ep: 1|act_loss: -0.0081634521484375|cri_loss: -0.003772735595703125|unsuper_loss: 0.0 +average reward score: 4.70703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.48%) |Training time=0.45s (20.82%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7801|ppo_ep: 1|act_loss: -0.009796142578125|cri_loss: -0.0047454833984375|unsuper_loss: 0.0 +average reward score: 5.51171875 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.78s (76.42%) |Training time=0.45s (19.35%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.56 +[2023-04-14 13:33:15,111] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096 +epoch: 0|step: 7802|ppo_ep: 1|act_loss: -0.0110321044921875|cri_loss: -0.005035400390625|unsuper_loss: 0.0 +average reward score: 5.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.12s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.41%) |Training time=0.42s (19.85%) |Others=0.10 (4.74%)|CurSamplesPerSec=15.08 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7803|ppo_ep: 1|act_loss: 0.02301025390625|cri_loss: 0.01174163818359375|unsuper_loss: 0.0 +average reward score: 5.55078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.80%) |Training time=0.45s (20.66%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7804|ppo_ep: 1|act_loss: 0.0006136894226074219|cri_loss: 0.00036263465881347656|unsuper_loss: 0.0 +average reward score: 5.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.19%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7805|ppo_ep: 1|act_loss: -0.0016498565673828125|cri_loss: -0.0007781982421875|unsuper_loss: 0.0 +average reward score: 6.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.71%) |Training time=0.44s (20.64%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7806|ppo_ep: 1|act_loss: 0.015472412109375|cri_loss: 0.00795745849609375|unsuper_loss: 0.0 +average reward score: 5.4609375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.51%) |Training time=0.45s (20.83%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7807|ppo_ep: 1|act_loss: 0.0377197265625|cri_loss: 0.021759033203125|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.23%) |Training time=0.46s (21.22%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7808|ppo_ep: 1|act_loss: -0.02886962890625|cri_loss: -0.014007568359375|unsuper_loss: 0.0 +average reward score: 5.26171875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.04%) |Training time=0.47s (21.37%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.56 +[2023-04-14 13:33:30,325] [INFO] [logging.py:96:log_dist] [Rank 0] step=7810, skipped=99, lr=[1.073776749444405e-07, 1.073776749444405e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:33:30,344] [INFO] [timer.py:199:stop] epoch=0/micro_step=7810/global_step=7810, RunningAvgSamplesPerSec=105.71154895925527, CurrSamplesPerSec=110.02490234735241, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:33:30,436] [INFO] [logging.py:96:log_dist] [Rank 0] step=7810, skipped=134, lr=[6.292522792395812e-08, 6.292522792395812e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7809|ppo_ep: 1|act_loss: -0.0090484619140625|cri_loss: -0.004367828369140625|unsuper_loss: 0.0 +average reward score: 5.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.71s (75.65%) |Training time=0.45s (20.02%) |Others=0.10 (4.33%)|CurSamplesPerSec=14.13 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7810|ppo_ep: 1|act_loss: 0.044677734375|cri_loss: 0.022857666015625|unsuper_loss: 0.0 +average reward score: 5.76953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.90%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7811|ppo_ep: 1|act_loss: -0.0088348388671875|cri_loss: -0.00435638427734375|unsuper_loss: 0.0 +average reward score: 5.28515625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.79%) |Training time=0.44s (20.62%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7812|ppo_ep: 1|act_loss: -0.028411865234375|cri_loss: -0.01348114013671875|unsuper_loss: 0.0 +average reward score: 5.10546875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.80%) |Training time=0.44s (20.54%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7813|ppo_ep: 1|act_loss: -0.03485107421875|cri_loss: -0.0166015625|unsuper_loss: 0.0 +average reward score: 5.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.51%) |Training time=0.45s (20.84%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7814|ppo_ep: 1|act_loss: 0.0007061958312988281|cri_loss: 0.0004711151123046875|unsuper_loss: 0.0 +average reward score: 5.078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.52%) |Training time=0.45s (20.83%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7815|ppo_ep: 1|act_loss: -0.08453369140625|cri_loss: -0.03948974609375|unsuper_loss: 0.0 +average reward score: 5.54296875 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.42%) |Training time=0.46s (20.95%) |Others=0.15 (6.64%)|CurSamplesPerSec=14.43 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7816|ppo_ep: 1|act_loss: 0.001010894775390625|cri_loss: 0.0006361007690429688|unsuper_loss: 0.0 +average reward score: 4.92578125 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.67s (75.08%) |Training time=0.45s (20.05%) |Others=0.11 (4.87%)|CurSamplesPerSec=14.36 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7817|ppo_ep: 1|act_loss: -0.04296875|cri_loss: -0.021209716796875|unsuper_loss: 0.0 +average reward score: 4.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.58%) |Training time=0.45s (20.80%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7818|ppo_ep: 1|act_loss: -0.0019664764404296875|cri_loss: -0.0007276535034179688|unsuper_loss: 0.0 +average reward score: 5.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.47%) |Training time=0.45s (20.88%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.56 +[2023-04-14 13:33:51,976] [INFO] [logging.py:96:log_dist] [Rank 0] step=7820, skipped=99, lr=[1.0351546681472116e-07, 1.0351546681472116e-07], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:33:51,994] [INFO] [timer.py:199:stop] epoch=0/micro_step=7820/global_step=7820, RunningAvgSamplesPerSec=105.7191999963779, CurrSamplesPerSec=114.77339353437402, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:33:52,087] [INFO] [logging.py:96:log_dist] [Rank 0] step=7820, skipped=134, lr=[6.079740731487898e-08, 6.079740731487898e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7819|ppo_ep: 1|act_loss: 0.0115509033203125|cri_loss: 0.00637054443359375|unsuper_loss: 0.0 +average reward score: 5.8359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.89%) |Training time=0.44s (20.54%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7820|ppo_ep: 1|act_loss: -0.0177001953125|cri_loss: -0.00807952880859375|unsuper_loss: 0.0 +average reward score: 5.40234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.62s (75.54%) |Training time=0.42s (19.75%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56 +[2023-04-14 13:33:56,375] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 7821|ppo_ep: 1|act_loss: 0.01320648193359375|cri_loss: 0.0069122314453125|unsuper_loss: 0.0 +average reward score: 6.01953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (75.07%) |Training time=0.44s (20.82%) |Others=0.09 (4.10%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.56 +[2023-04-14 13:33:58,514] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 7822|ppo_ep: 1|act_loss: -0.004489898681640625|cri_loss: -0.001972198486328125|unsuper_loss: 0.0 +average reward score: 6.29296875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.81%) |Training time=0.45s (20.98%) |Others=0.09 (4.21%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7823|ppo_ep: 1|act_loss: -0.0143280029296875|cri_loss: -0.00704193115234375|unsuper_loss: 0.0 +average reward score: 5.59375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.34%) |Training time=0.45s (21.02%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7824|ppo_ep: 1|act_loss: -0.003658294677734375|cri_loss: -0.0014791488647460938|unsuper_loss: 0.0 +average reward score: 5.6796875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.35%) |Training time=0.45s (20.99%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7825|ppo_ep: 1|act_loss: 0.0084686279296875|cri_loss: 0.0049591064453125|unsuper_loss: 0.0 +average reward score: 5.63671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.50%) |Training time=0.45s (20.83%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7826|ppo_ep: 1|act_loss: -0.00666046142578125|cri_loss: -0.003215789794921875|unsuper_loss: 0.0 +average reward score: 5.00390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.30%) |Training time=0.45s (21.07%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7827|ppo_ep: 1|act_loss: 0.016510009765625|cri_loss: 0.0093536376953125|unsuper_loss: 0.0 +average reward score: 4.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.37%) |Training time=0.45s (20.96%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7828|ppo_ep: 1|act_loss: 0.022125244140625|cri_loss: 0.01151275634765625|unsuper_loss: 0.0 +average reward score: 5.15625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.44%) |Training time=0.45s (20.89%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56 +[2023-04-14 13:34:13,448] [INFO] [logging.py:96:log_dist] [Rank 0] step=7830, skipped=99, lr=[9.972324247574424e-08, 9.972324247574424e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:34:13,466] [INFO] [timer.py:199:stop] epoch=0/micro_step=7830/global_step=7830, RunningAvgSamplesPerSec=105.7271991751751, CurrSamplesPerSec=111.44625006331337, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:34:13,559] [INFO] [logging.py:96:log_dist] [Rank 0] step=7830, skipped=136, lr=[5.912118083573476e-08, 5.912118083573476e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7829|ppo_ep: 1|act_loss: 0.01406097412109375|cri_loss: 0.007244110107421875|unsuper_loss: 0.0 +average reward score: 5.25390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.46%) |Training time=0.45s (20.98%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7830|ppo_ep: 1|act_loss: 0.0462646484375|cri_loss: 0.0237579345703125|unsuper_loss: 0.0 +average reward score: 6.25 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.41%) |Training time=0.45s (20.94%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7831|ppo_ep: 1|act_loss: 0.0146026611328125|cri_loss: 0.00739288330078125|unsuper_loss: 0.0 +average reward score: 5.12109375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.66%) |Training time=0.44s (20.66%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7832|ppo_ep: 1|act_loss: 0.0281982421875|cri_loss: 0.01439666748046875|unsuper_loss: 0.0 +average reward score: 5.42578125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.82%) |Training time=0.44s (20.51%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7833|ppo_ep: 1|act_loss: 0.0078125|cri_loss: 0.00453948974609375|unsuper_loss: 0.0 +average reward score: 5.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.76s (76.40%) |Training time=0.45s (19.34%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.86 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7834|ppo_ep: 1|act_loss: 0.00756072998046875|cri_loss: 0.003940582275390625|unsuper_loss: 0.0 +average reward score: 5.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.83%) |Training time=0.44s (20.39%) |Others=0.10 (4.79%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7835|ppo_ep: 1|act_loss: 0.00428009033203125|cri_loss: 0.002353668212890625|unsuper_loss: 0.0 +average reward score: 5.17578125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.33%) |Training time=0.43s (20.07%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7836|ppo_ep: 1|act_loss: 0.00107574462890625|cri_loss: 0.0007843971252441406|unsuper_loss: 0.0 +average reward score: 4.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.18%) |Training time=0.46s (21.15%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7837|ppo_ep: 1|act_loss: 0.01763916015625|cri_loss: 0.009185791015625|unsuper_loss: 0.0 +average reward score: 6.05859375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.71%) |Training time=0.48s (21.78%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7838|ppo_ep: 1|act_loss: -0.01302337646484375|cri_loss: -0.006412506103515625|unsuper_loss: 0.0 +average reward score: 5.3125 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.09%) |Training time=0.47s (21.35%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.50 |AvgSamplesPerSec=14.56 +[2023-04-14 13:34:35,280] [INFO] [logging.py:96:log_dist] [Rank 0] step=7840, skipped=99, lr=[9.60010581374241e-08, 9.60010581374241e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:34:35,298] [INFO] [timer.py:199:stop] epoch=0/micro_step=7840/global_step=7840, RunningAvgSamplesPerSec=105.7335546704453, CurrSamplesPerSec=107.28166269409262, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:34:35,392] [INFO] [logging.py:96:log_dist] [Rank 0] step=7840, skipped=136, lr=[5.705846133496118e-08, 5.705846133496118e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7839|ppo_ep: 1|act_loss: -0.00060272216796875|cri_loss: 0.000186920166015625|unsuper_loss: 0.0 +average reward score: 5.5859375 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.21%) |Training time=0.46s (20.33%) |Others=0.10 (4.46%)|CurSamplesPerSec=14.12 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7840|ppo_ep: 1|act_loss: 0.01113128662109375|cri_loss: 0.00600433349609375|unsuper_loss: 0.0 +average reward score: 5.60546875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.38%) |Training time=0.48s (22.03%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.56 +[2023-04-14 13:34:39,625] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 4096, reducing to 2048 +epoch: 0|step: 7841|ppo_ep: 1|act_loss: 0.04718017578125|cri_loss: 0.026702880859375|unsuper_loss: 0.0 +average reward score: 5.7109375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.88%) |Training time=0.44s (20.44%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7842|ppo_ep: 1|act_loss: -0.0233612060546875|cri_loss: -0.0114898681640625|unsuper_loss: 0.0 +average reward score: 5.92578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.98%) |Training time=0.46s (21.38%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7843|ppo_ep: 1|act_loss: -0.00299072265625|cri_loss: -0.001346588134765625|unsuper_loss: 0.0 +average reward score: 5.484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.72%) |Training time=0.47s (21.67%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7844|ppo_ep: 1|act_loss: -0.0024204254150390625|cri_loss: -0.0009531974792480469|unsuper_loss: 0.0 +average reward score: 5.0078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.67%) |Training time=0.47s (21.72%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7845|ppo_ep: 1|act_loss: 0.016998291015625|cri_loss: 0.0087432861328125|unsuper_loss: 0.0 +average reward score: 5.81640625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.86%) |Training time=0.47s (21.62%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7846|ppo_ep: 1|act_loss: 0.0045928955078125|cri_loss: 0.0025310516357421875|unsuper_loss: 0.0 +average reward score: 5.671875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.59%) |Training time=0.47s (21.45%) |Others=0.11 (4.96%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7847|ppo_ep: 1|act_loss: 0.007110595703125|cri_loss: 0.003620147705078125|unsuper_loss: 0.0 +average reward score: 5.28125 +------------------------------------------------------------------------------------- +|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.53%) |Training time=0.50s (21.23%) |Others=0.10 (4.24%)|CurSamplesPerSec=13.52 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7848|ppo_ep: 1|act_loss: -0.0013885498046875|cri_loss: -0.0004911422729492188|unsuper_loss: 0.0 +average reward score: 4.671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.12%) |Training time=0.46s (21.34%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56 +[2023-04-14 13:34:57,189] [INFO] [logging.py:96:log_dist] [Rank 0] step=7850, skipped=100, lr=[9.271102205441317e-08, 9.271102205441317e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:34:57,208] [INFO] [timer.py:199:stop] epoch=0/micro_step=7850/global_step=7850, RunningAvgSamplesPerSec=105.73152737795587, CurrSamplesPerSec=106.00033328041914, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:34:57,301] [INFO] [logging.py:96:log_dist] [Rank 0] step=7850, skipped=136, lr=[5.5031952121065715e-08, 5.5031952121065715e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7849|ppo_ep: 1|act_loss: 0.003459930419921875|cri_loss: 0.001934051513671875|unsuper_loss: 0.0 +average reward score: 5.44921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.02%) |Training time=0.47s (21.43%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7850|ppo_ep: 1|act_loss: 0.007312774658203125|cri_loss: 0.003902435302734375|unsuper_loss: 0.0 +average reward score: 5.87890625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.53%) |Training time=0.48s (21.84%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7851|ppo_ep: 1|act_loss: 0.00341033935546875|cri_loss: 0.002071380615234375|unsuper_loss: 0.0 +average reward score: 5.3671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.10%) |Training time=0.46s (21.35%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7852|ppo_ep: 1|act_loss: -0.00308990478515625|cri_loss: 0.0007801055908203125|unsuper_loss: 0.0 +average reward score: 5.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.55%) |Training time=0.48s (21.83%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7853|ppo_ep: 1|act_loss: 0.020660400390625|cri_loss: 0.010772705078125|unsuper_loss: 0.0 +average reward score: 5.33203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.72%) |Training time=0.47s (21.67%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7854|ppo_ep: 1|act_loss: 0.030853271484375|cri_loss: 0.0167236328125|unsuper_loss: 0.0 +average reward score: 5.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.80%) |Training time=0.47s (21.59%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7855|ppo_ep: 1|act_loss: 0.0005464553833007812|cri_loss: 0.0004324913024902344|unsuper_loss: 0.0 +average reward score: 5.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.88%) |Training time=0.47s (21.60%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7856|ppo_ep: 1|act_loss: -0.03692626953125|cri_loss: -0.0179595947265625|unsuper_loss: 0.0 +average reward score: 4.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.67%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7857|ppo_ep: 1|act_loss: -0.0096435546875|cri_loss: -0.0046844482421875|unsuper_loss: 0.0 +average reward score: 5.5859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.92%) |Training time=0.47s (21.55%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7858|ppo_ep: 1|act_loss: -0.0118255615234375|cri_loss: -0.005863189697265625|unsuper_loss: 0.0 +average reward score: 5.328125 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.86%) |Training time=0.47s (21.57%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.57 |AvgSamplesPerSec=14.56 +[2023-04-14 13:35:18,956] [INFO] [logging.py:96:log_dist] [Rank 0] step=7860, skipped=100, lr=[8.912206485681388e-08, 8.912206485681388e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:35:18,975] [INFO] [timer.py:199:stop] epoch=0/micro_step=7860/global_step=7860, RunningAvgSamplesPerSec=105.72896164280641, CurrSamplesPerSec=104.51759121109141, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:35:19,067] [INFO] [logging.py:96:log_dist] [Rank 0] step=7860, skipped=136, lr=[5.304168323180425e-08, 5.304168323180425e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7859|ppo_ep: 1|act_loss: 0.0003643035888671875|cri_loss: 0.00044035911560058594|unsuper_loss: 0.0 +average reward score: 5.453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.89%) |Training time=0.47s (21.60%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7860|ppo_ep: 1|act_loss: -0.00658416748046875|cri_loss: -0.0031833648681640625|unsuper_loss: 0.0 +average reward score: 5.69140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.90%) |Training time=0.47s (21.48%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7861|ppo_ep: 1|act_loss: 0.002376556396484375|cri_loss: 0.001617431640625|unsuper_loss: 0.0 +average reward score: 5.74609375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.01%) |Training time=0.47s (21.51%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7862|ppo_ep: 1|act_loss: 0.0129241943359375|cri_loss: 0.006740570068359375|unsuper_loss: 0.0 +average reward score: 5.5859375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.42%) |Training time=0.47s (21.65%) |Others=0.11 (4.93%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7863|ppo_ep: 1|act_loss: 0.0234375|cri_loss: 0.01204681396484375|unsuper_loss: 0.0 +average reward score: 5.44140625 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.61%) |Training time=0.48s (20.60%) |Others=0.11 (4.79%)|CurSamplesPerSec=13.66 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7864|ppo_ep: 1|act_loss: -0.00978851318359375|cri_loss: -0.0033740997314453125|unsuper_loss: 0.0 +average reward score: 5.36328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.05%) |Training time=0.46s (21.33%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7865|ppo_ep: 1|act_loss: 0.0022487640380859375|cri_loss: 0.0019283294677734375|unsuper_loss: 0.0 +average reward score: 5.625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.18%) |Training time=0.46s (21.24%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7866|ppo_ep: 1|act_loss: 0.0089569091796875|cri_loss: 0.00506591796875|unsuper_loss: 0.0 +average reward score: 5.484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.21%) |Training time=0.46s (21.16%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7867|ppo_ep: 1|act_loss: 0.0088348388671875|cri_loss: 0.004520416259765625|unsuper_loss: 0.0 +average reward score: 5.51171875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.49%) |Training time=0.46s (20.98%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7868|ppo_ep: 1|act_loss: -0.03363037109375|cri_loss: -0.0160980224609375|unsuper_loss: 0.0 +average reward score: 5.90234375 +------------------------------------------------------------------------------------- +|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.71s (74.73%) |Training time=0.48s (20.89%) |Others=0.10 (4.39%)|CurSamplesPerSec=14.00 |AvgSamplesPerSec=14.56 +[2023-04-14 13:35:40,958] [INFO] [logging.py:96:log_dist] [Rank 0] step=7870, skipped=100, lr=[8.560330479489633e-08, 8.560330479489633e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:35:40,977] [INFO] [timer.py:199:stop] epoch=0/micro_step=7870/global_step=7870, RunningAvgSamplesPerSec=105.72843790630478, CurrSamplesPerSec=109.30619119053087, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:35:41,069] [INFO] [logging.py:96:log_dist] [Rank 0] step=7870, skipped=136, lr=[5.108768416776266e-08, 5.108768416776266e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7869|ppo_ep: 1|act_loss: 0.004215240478515625|cri_loss: 0.002651214599609375|unsuper_loss: 0.0 +average reward score: 5.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.33%) |Training time=0.46s (21.13%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7870|ppo_ep: 1|act_loss: 0.0010814666748046875|cri_loss: 0.0006008148193359375|unsuper_loss: 0.0 +average reward score: 5.28125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.24%) |Training time=0.46s (21.13%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7871|ppo_ep: 1|act_loss: 0.0111083984375|cri_loss: 0.00591278076171875|unsuper_loss: 0.0 +average reward score: 5.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.36%) |Training time=0.45s (21.08%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7872|ppo_ep: 1|act_loss: -0.00525665283203125|cri_loss: -0.0023651123046875|unsuper_loss: 0.0 +average reward score: 5.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.22%) |Training time=0.46s (21.14%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7873|ppo_ep: 1|act_loss: 0.024139404296875|cri_loss: 0.0124359130859375|unsuper_loss: 0.0 +average reward score: 4.65234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.24%) |Training time=0.46s (21.22%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7874|ppo_ep: 1|act_loss: 0.008209228515625|cri_loss: 0.00452423095703125|unsuper_loss: 0.0 +average reward score: 5.24609375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.61%) |Training time=0.47s (21.79%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7875|ppo_ep: 1|act_loss: -0.016693115234375|cri_loss: -0.00795745849609375|unsuper_loss: 0.0 +average reward score: 5.4453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.11%) |Training time=0.46s (21.36%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7876|ppo_ep: 1|act_loss: 0.00591278076171875|cri_loss: 0.003322601318359375|unsuper_loss: 0.0 +average reward score: 6.03515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.89%) |Training time=0.47s (21.49%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7877|ppo_ep: 1|act_loss: -0.0061492919921875|cri_loss: -0.00229644775390625|unsuper_loss: 0.0 +average reward score: 5.84375 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.78s (75.92%) |Training time=0.47s (19.87%) |Others=0.10 (4.21%)|CurSamplesPerSec=13.67 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7878|ppo_ep: 1|act_loss: -0.009002685546875|cri_loss: -0.0041046142578125|unsuper_loss: 0.0 +average reward score: 5.7734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.19%) |Training time=0.46s (21.26%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56 +[2023-04-14 13:36:02,778] [INFO] [logging.py:96:log_dist] [Rank 0] step=7880, skipped=100, lr=[8.215479402517389e-08, 8.215479402517389e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:36:02,797] [INFO] [timer.py:199:stop] epoch=0/micro_step=7880/global_step=7880, RunningAvgSamplesPerSec=105.72969054760189, CurrSamplesPerSec=104.98801479963392, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:36:02,889] [INFO] [logging.py:96:log_dist] [Rank 0] step=7880, skipped=136, lr=[4.9169983891921294e-08, 4.9169983891921294e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7879|ppo_ep: 1|act_loss: -0.016082763671875|cri_loss: -0.00780487060546875|unsuper_loss: 0.0 +average reward score: 5.21875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.89%) |Training time=0.47s (21.59%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7880|ppo_ep: 1|act_loss: 0.02557373046875|cri_loss: 0.0131072998046875|unsuper_loss: 0.0 +average reward score: 5.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.83%) |Training time=0.47s (21.54%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7881|ppo_ep: 1|act_loss: 0.0496826171875|cri_loss: 0.025543212890625|unsuper_loss: 0.0 +average reward score: 5.15234375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.47%) |Training time=0.48s (22.03%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7882|ppo_ep: 1|act_loss: 0.00325775146484375|cri_loss: 0.001895904541015625|unsuper_loss: 0.0 +average reward score: 5.42578125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.26%) |Training time=0.48s (22.16%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7883|ppo_ep: 1|act_loss: 0.013031005859375|cri_loss: 0.006710052490234375|unsuper_loss: 0.0 +average reward score: 5.0859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.68%) |Training time=0.47s (21.80%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7884|ppo_ep: 1|act_loss: 0.00453948974609375|cri_loss: 0.0025043487548828125|unsuper_loss: 0.0 +average reward score: 5.1875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.36%) |Training time=0.48s (22.04%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7885|ppo_ep: 1|act_loss: -0.0011043548583984375|cri_loss: -0.0003352165222167969|unsuper_loss: 0.0 +average reward score: 5.703125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.70%) |Training time=0.47s (21.75%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7886|ppo_ep: 1|act_loss: 0.020843505859375|cri_loss: 0.01062774658203125|unsuper_loss: 0.0 +average reward score: 5.5234375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.52%) |Training time=0.48s (21.89%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7887|ppo_ep: 1|act_loss: 0.002925872802734375|cri_loss: 0.0016040802001953125|unsuper_loss: 0.0 +average reward score: 5.76953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.83%) |Training time=0.47s (21.66%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7888|ppo_ep: 1|act_loss: 0.0274200439453125|cri_loss: 0.01441192626953125|unsuper_loss: 0.0 +average reward score: 5.78515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.77%) |Training time=0.47s (21.63%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56 +[2023-04-14 13:36:24,543] [INFO] [logging.py:96:log_dist] [Rank 0] step=7890, skipped=100, lr=[7.877658366289522e-08, 7.877658366289522e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:36:24,562] [INFO] [timer.py:199:stop] epoch=0/micro_step=7890/global_step=7890, RunningAvgSamplesPerSec=105.72515201238446, CurrSamplesPerSec=102.51301521834603, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:36:24,654] [INFO] [logging.py:96:log_dist] [Rank 0] step=7890, skipped=136, lr=[4.7288610829223645e-08, 4.7288610829223645e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7889|ppo_ep: 1|act_loss: -0.01506805419921875|cri_loss: -0.004730224609375|unsuper_loss: 0.0 +average reward score: 5.01953125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.69%) |Training time=0.47s (21.81%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7890|ppo_ep: 1|act_loss: 0.00989532470703125|cri_loss: 0.00505828857421875|unsuper_loss: 0.0 +average reward score: 5.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.67%) |Training time=0.47s (21.73%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7891|ppo_ep: 1|act_loss: 0.031524658203125|cri_loss: 0.016021728515625|unsuper_loss: 0.0 +average reward score: 5.61328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.76%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7892|ppo_ep: 1|act_loss: 0.004886627197265625|cri_loss: 0.0025768280029296875|unsuper_loss: 0.0 +average reward score: 5.5546875 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.60s (71.64%) |Training time=0.49s (21.75%) |Others=0.15 (6.61%)|CurSamplesPerSec=14.32 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7893|ppo_ep: 1|act_loss: 0.0214385986328125|cri_loss: 0.0110321044921875|unsuper_loss: 0.0 +average reward score: 6.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.66s (74.48%) |Training time=0.47s (21.09%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.36 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7894|ppo_ep: 1|act_loss: -0.004444122314453125|cri_loss: -0.0020580291748046875|unsuper_loss: 0.0 +average reward score: 5.390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.65%) |Training time=0.47s (21.75%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7895|ppo_ep: 1|act_loss: 0.03582763671875|cri_loss: 0.018310546875|unsuper_loss: 0.0 +average reward score: 5.14453125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.60%) |Training time=0.48s (21.85%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7896|ppo_ep: 1|act_loss: 0.0079345703125|cri_loss: 0.005886077880859375|unsuper_loss: 0.0 +average reward score: 5.27734375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.59%) |Training time=0.48s (21.83%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7897|ppo_ep: 1|act_loss: 0.0352783203125|cri_loss: 0.0183258056640625|unsuper_loss: 0.0 +average reward score: 6.36328125 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.64s (72.27%) |Training time=0.51s (22.65%) |Others=0.12 (5.09%)|CurSamplesPerSec=14.12 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7898|ppo_ep: 1|act_loss: 0.043914794921875|cri_loss: 0.023712158203125|unsuper_loss: 0.0 +average reward score: 5.28515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.99%) |Training time=0.46s (21.38%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56 +[2023-04-14 13:36:46,507] [INFO] [logging.py:96:log_dist] [Rank 0] step=7900, skipped=100, lr=[7.54687237812874e-08, 7.54687237812874e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:36:46,526] [INFO] [timer.py:199:stop] epoch=0/micro_step=7900/global_step=7900, RunningAvgSamplesPerSec=105.7195812062558, CurrSamplesPerSec=103.41567842742634, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:36:46,618] [INFO] [logging.py:96:log_dist] [Rank 0] step=7900, skipped=136, lr=[4.544359286615785e-08, 4.544359286615785e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7899|ppo_ep: 1|act_loss: -0.022247314453125|cri_loss: -0.0109405517578125|unsuper_loss: 0.0 +average reward score: 5.23046875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.71%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7900|ppo_ep: 1|act_loss: 0.0120697021484375|cri_loss: 0.00836944580078125|unsuper_loss: 0.0 +average reward score: 5.29296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.86%) |Training time=0.47s (21.53%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7901|ppo_ep: 1|act_loss: -0.007129669189453125|cri_loss: -0.003482818603515625|unsuper_loss: 0.0 +average reward score: 5.30078125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.90%) |Training time=0.47s (21.52%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7902|ppo_ep: 1|act_loss: 0.0109100341796875|cri_loss: 0.005550384521484375|unsuper_loss: 0.0 +average reward score: 5.29296875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.05%) |Training time=0.46s (21.34%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7903|ppo_ep: 1|act_loss: 0.01129913330078125|cri_loss: 0.005908966064453125|unsuper_loss: 0.0 +average reward score: 5.2421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.67%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7904|ppo_ep: 1|act_loss: 0.0010099411010742188|cri_loss: 0.0005588531494140625|unsuper_loss: 0.0 +average reward score: 5.76171875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.73%) |Training time=0.47s (21.66%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7905|ppo_ep: 1|act_loss: 0.04443359375|cri_loss: 0.0255889892578125|unsuper_loss: 0.0 +average reward score: 4.86328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.87%) |Training time=0.47s (21.60%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7906|ppo_ep: 1|act_loss: 0.009063720703125|cri_loss: 0.004764556884765625|unsuper_loss: 0.0 +average reward score: 5.765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.71%) |Training time=0.47s (21.68%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7907|ppo_ep: 1|act_loss: -0.0076751708984375|cri_loss: -0.0036468505859375|unsuper_loss: 0.0 +average reward score: 5.1328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.98%) |Training time=0.46s (21.32%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7908|ppo_ep: 1|act_loss: -0.018890380859375|cri_loss: -0.00931549072265625|unsuper_loss: 0.0 +average reward score: 5.33984375 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.18%) |Training time=0.50s (21.11%) |Others=0.11 (4.71%)|CurSamplesPerSec=13.54 |AvgSamplesPerSec=14.56 +[2023-04-14 13:37:08,435] [INFO] [logging.py:96:log_dist] [Rank 0] step=7910, skipped=100, lr=[7.223126341081292e-08, 7.223126341081292e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:37:08,454] [INFO] [timer.py:199:stop] epoch=0/micro_step=7910/global_step=7910, RunningAvgSamplesPerSec=105.7167886290013, CurrSamplesPerSec=103.1705201890031, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:37:08,547] [INFO] [logging.py:96:log_dist] [Rank 0] step=7910, skipped=136, lr=[4.3634957350340844e-08, 4.3634957350340844e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7909|ppo_ep: 1|act_loss: -0.00423431396484375|cri_loss: -0.0020046234130859375|unsuper_loss: 0.0 +average reward score: 5.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.74%) |Training time=0.47s (21.72%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7910|ppo_ep: 1|act_loss: 0.030242919921875|cri_loss: 0.015960693359375|unsuper_loss: 0.0 +average reward score: 5.3046875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.67%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7911|ppo_ep: 1|act_loss: 0.00960540771484375|cri_loss: 0.0049896240234375|unsuper_loss: 0.0 +average reward score: 5.6015625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.01%) |Training time=0.47s (21.47%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7912|ppo_ep: 1|act_loss: 0.034393310546875|cri_loss: 0.017669677734375|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.34%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7913|ppo_ep: 1|act_loss: -0.016571044921875|cri_loss: -0.0081634521484375|unsuper_loss: 0.0 +average reward score: 5.73828125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.05%) |Training time=0.47s (21.38%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7914|ppo_ep: 1|act_loss: -0.0011844635009765625|cri_loss: -0.00016069412231445312|unsuper_loss: 0.0 +average reward score: 4.765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.97%) |Training time=0.46s (21.40%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7915|ppo_ep: 1|act_loss: -0.01568603515625|cri_loss: -0.007717132568359375|unsuper_loss: 0.0 +average reward score: 5.6015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.94%) |Training time=0.46s (21.44%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7916|ppo_ep: 1|act_loss: 0.031219482421875|cri_loss: 0.0164794921875|unsuper_loss: 0.0 +average reward score: 5.03125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.75%) |Training time=0.47s (21.65%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7917|ppo_ep: 1|act_loss: 0.0123443603515625|cri_loss: 0.006290435791015625|unsuper_loss: 0.0 +average reward score: 5.88671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.73%) |Training time=0.47s (21.63%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7918|ppo_ep: 1|act_loss: 0.0018014907836914062|cri_loss: 0.001071929931640625|unsuper_loss: 0.0 +average reward score: 5.1015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.87%) |Training time=0.47s (21.51%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56 +[2023-04-14 13:37:30,149] [INFO] [logging.py:96:log_dist] [Rank 0] step=7920, skipped=100, lr=[6.906425053844542e-08, 6.906425053844542e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:37:30,168] [INFO] [timer.py:199:stop] epoch=0/micro_step=7920/global_step=7920, RunningAvgSamplesPerSec=105.7158984428223, CurrSamplesPerSec=105.03155851879676, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:37:30,260] [INFO] [logging.py:96:log_dist] [Rank 0] step=7920, skipped=136, lr=[4.186273109011374e-08, 4.186273109011374e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7919|ppo_ep: 1|act_loss: 0.0194854736328125|cri_loss: 0.0098876953125|unsuper_loss: 0.0 +average reward score: 4.953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.88%) |Training time=0.47s (21.59%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7920|ppo_ep: 1|act_loss: -0.015533447265625|cri_loss: -0.007617950439453125|unsuper_loss: 0.0 +average reward score: 5.390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.85%) |Training time=0.47s (21.52%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7921|ppo_ep: 1|act_loss: 0.0017957687377929688|cri_loss: 0.0020465850830078125|unsuper_loss: 0.0 +average reward score: 5.64453125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.81%) |Training time=0.47s (21.57%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7922|ppo_ep: 1|act_loss: -0.0041046142578125|cri_loss: -0.0018215179443359375|unsuper_loss: 0.0 +average reward score: 5.73828125 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.60s (70.66%) |Training time=0.47s (20.87%) |Others=0.19 (8.47%)|CurSamplesPerSec=14.14 |AvgSamplesPerSec=14.56 +[2023-04-14 13:37:39,018] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 7923|ppo_ep: 1|act_loss: 0.0170135498046875|cri_loss: 0.00887298583984375|unsuper_loss: 0.0 +average reward score: 4.66796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.27%) |Training time=0.47s (21.58%) |Others=0.09 (4.16%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.56 +[2023-04-14 13:37:41,177] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 7924|ppo_ep: 1|act_loss: -0.002185821533203125|cri_loss: -0.001056671142578125|unsuper_loss: 0.0 +average reward score: 5.3359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.23%) |Training time=0.46s (21.51%) |Others=0.09 (4.26%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7925|ppo_ep: 1|act_loss: 0.0026702880859375|cri_loss: 0.0013885498046875|unsuper_loss: 0.0 +average reward score: 5.00390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.34%) |Training time=0.48s (22.10%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7926|ppo_ep: 1|act_loss: -0.0080108642578125|cri_loss: -0.0037822723388671875|unsuper_loss: 0.0 +average reward score: 5.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.63s (74.17%) |Training time=0.47s (21.22%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7927|ppo_ep: 1|act_loss: -0.0149993896484375|cri_loss: -0.007312774658203125|unsuper_loss: 0.0 +average reward score: 4.66796875 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.70s (75.38%) |Training time=0.46s (20.27%) |Others=0.10 (4.35%)|CurSamplesPerSec=14.17 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7928|ppo_ep: 1|act_loss: -0.014862060546875|cri_loss: -0.006984710693359375|unsuper_loss: 0.0 +average reward score: 4.99609375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.38%) |Training time=0.45s (20.95%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.56 +[2023-04-14 13:37:51,969] [INFO] [logging.py:96:log_dist] [Rank 0] step=7930, skipped=100, lr=[6.596773210695512e-08, 6.596773210695512e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:37:51,987] [INFO] [timer.py:199:stop] epoch=0/micro_step=7930/global_step=7930, RunningAvgSamplesPerSec=105.71659977392, CurrSamplesPerSec=109.8774545198531, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:37:52,080] [INFO] [logging.py:96:log_dist] [Rank 0] step=7930, skipped=138, lr=[4.047118241664511e-08, 4.047118241664511e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7929|ppo_ep: 1|act_loss: 0.017242431640625|cri_loss: 0.009002685546875|unsuper_loss: 0.0 +average reward score: 5.3515625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.17%) |Training time=0.45s (21.17%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7930|ppo_ep: 1|act_loss: -0.021514892578125|cri_loss: -0.010650634765625|unsuper_loss: 0.0 +average reward score: 5.29296875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.36%) |Training time=0.45s (20.96%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7931|ppo_ep: 1|act_loss: 0.025634765625|cri_loss: 0.0132598876953125|unsuper_loss: 0.0 +average reward score: 6.0390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.51%) |Training time=0.45s (20.91%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7932|ppo_ep: 1|act_loss: 0.01055145263671875|cri_loss: 0.005443572998046875|unsuper_loss: 0.0 +average reward score: 5.9140625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.24%) |Training time=0.45s (21.09%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7933|ppo_ep: 1|act_loss: -0.01123046875|cri_loss: -0.005527496337890625|unsuper_loss: 0.0 +average reward score: 6.484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.25%) |Training time=0.45s (21.09%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7934|ppo_ep: 1|act_loss: 0.0125274658203125|cri_loss: 0.00664520263671875|unsuper_loss: 0.0 +average reward score: 5.55859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.84%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7935|ppo_ep: 1|act_loss: 0.00568389892578125|cri_loss: 0.003032684326171875|unsuper_loss: 0.0 +average reward score: 6.0234375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.03%) |Training time=0.49s (22.20%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7936|ppo_ep: 1|act_loss: -0.01641845703125|cri_loss: -0.00799560546875|unsuper_loss: 0.0 +average reward score: 4.42578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (73.99%) |Training time=0.46s (21.27%) |Others=0.10 (4.74%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7937|ppo_ep: 1|act_loss: 0.0003528594970703125|cri_loss: 0.00023508071899414062|unsuper_loss: 0.0 +average reward score: 5.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.61s (69.68%) |Training time=0.60s (26.04%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.81 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7938|ppo_ep: 1|act_loss: 0.043304443359375|cri_loss: 0.026031494140625|unsuper_loss: 0.0 +average reward score: 5.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.02%) |Training time=0.46s (21.32%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56 +[2023-04-14 13:38:13,739] [INFO] [logging.py:96:log_dist] [Rank 0] step=7940, skipped=100, lr=[6.294175401421511e-08, 6.294175401421511e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:38:13,757] [INFO] [timer.py:199:stop] epoch=0/micro_step=7940/global_step=7940, RunningAvgSamplesPerSec=105.71327649972606, CurrSamplesPerSec=107.65157898873818, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:38:13,849] [INFO] [logging.py:96:log_dist] [Rank 0] step=7940, skipped=138, lr=[3.876455865061551e-08, 3.876455865061551e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7939|ppo_ep: 1|act_loss: -0.003467559814453125|cri_loss: -0.0015869140625|unsuper_loss: 0.0 +average reward score: 5.92578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.23%) |Training time=0.46s (21.24%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7940|ppo_ep: 1|act_loss: -0.00472259521484375|cri_loss: -0.0023040771484375|unsuper_loss: 0.0 +average reward score: 5.80859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.28%) |Training time=0.43s (20.00%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7941|ppo_ep: 1|act_loss: 0.0321044921875|cri_loss: 0.01654052734375|unsuper_loss: 0.0 +average reward score: 5.54296875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.64s (75.09%) |Training time=0.44s (20.34%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7942|ppo_ep: 1|act_loss: 0.016448974609375|cri_loss: 0.008331298828125|unsuper_loss: 0.0 +average reward score: 5.58984375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.47s (21.78%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7943|ppo_ep: 1|act_loss: -0.0034580230712890625|cri_loss: -0.00157928466796875|unsuper_loss: 0.0 +average reward score: 5.515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.61%) |Training time=0.47s (21.87%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7944|ppo_ep: 1|act_loss: -0.0009264945983886719|cri_loss: -2.09808349609375e-05|unsuper_loss: 0.0 +average reward score: 4.9375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.83%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7945|ppo_ep: 1|act_loss: 0.024078369140625|cri_loss: 0.01306915283203125|unsuper_loss: 0.0 +average reward score: 5.13671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.47%) |Training time=0.48s (22.01%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7946|ppo_ep: 1|act_loss: 0.002613067626953125|cri_loss: 0.00144195556640625|unsuper_loss: 0.0 +average reward score: 5.34375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.77%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7947|ppo_ep: 1|act_loss: 0.00836944580078125|cri_loss: 0.004421234130859375|unsuper_loss: 0.0 +average reward score: 4.83203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.46%) |Training time=0.47s (21.88%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7948|ppo_ep: 1|act_loss: -0.0103759765625|cri_loss: -0.005062103271484375|unsuper_loss: 0.0 +average reward score: 5.44140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.67%) |Training time=0.47s (21.68%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.56 +[2023-04-14 13:38:35,426] [INFO] [logging.py:96:log_dist] [Rank 0] step=7950, skipped=100, lr=[5.998636111252047e-08, 5.998636111252047e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:38:35,444] [INFO] [timer.py:199:stop] epoch=0/micro_step=7950/global_step=7950, RunningAvgSamplesPerSec=105.71354108782916, CurrSamplesPerSec=107.05438952760315, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:38:35,537] [INFO] [logging.py:96:log_dist] [Rank 0] step=7950, skipped=138, lr=[3.709441633123367e-08, 3.709441633123367e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7949|ppo_ep: 1|act_loss: -0.00852203369140625|cri_loss: -0.004192352294921875|unsuper_loss: 0.0 +average reward score: 5.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.99%) |Training time=0.47s (21.49%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7950|ppo_ep: 1|act_loss: -0.00669097900390625|cri_loss: -0.0032520294189453125|unsuper_loss: 0.0 +average reward score: 5.88671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.76%) |Training time=0.47s (21.61%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7951|ppo_ep: 1|act_loss: -0.005435943603515625|cri_loss: -0.0026531219482421875|unsuper_loss: 0.0 +average reward score: 5.87890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.03%) |Training time=0.46s (21.42%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7952|ppo_ep: 1|act_loss: 0.0321044921875|cri_loss: 0.016448974609375|unsuper_loss: 0.0 +average reward score: 6.01953125 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.76s (74.66%) |Training time=0.50s (21.06%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.58 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7953|ppo_ep: 1|act_loss: 0.006015777587890625|cri_loss: 0.0031280517578125|unsuper_loss: 0.0 +average reward score: 4.9921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (22.05%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7954|ppo_ep: 1|act_loss: -0.0130157470703125|cri_loss: -0.00617218017578125|unsuper_loss: 0.0 +average reward score: 5.6484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.17%) |Training time=0.48s (22.21%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7955|ppo_ep: 1|act_loss: -0.00555419921875|cri_loss: -0.0027217864990234375|unsuper_loss: 0.0 +average reward score: 5.0703125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.23%) |Training time=0.48s (22.00%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7956|ppo_ep: 1|act_loss: -0.01708984375|cri_loss: -0.00836944580078125|unsuper_loss: 0.0 +average reward score: 5.19140625 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.63s (73.67%) |Training time=0.48s (21.74%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.49 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7957|ppo_ep: 1|act_loss: -0.010009765625|cri_loss: -0.004848480224609375|unsuper_loss: 0.0 +average reward score: 5.65625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.45s (20.79%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7958|ppo_ep: 1|act_loss: 0.0113372802734375|cri_loss: 0.006534576416015625|unsuper_loss: 0.0 +average reward score: 5.64453125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.64%) |Training time=0.44s (20.67%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.56 +[2023-04-14 13:38:57,265] [INFO] [logging.py:96:log_dist] [Rank 0] step=7960, skipped=100, lr=[5.710159720792351e-08, 5.710159720792351e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:38:57,284] [INFO] [timer.py:199:stop] epoch=0/micro_step=7960/global_step=7960, RunningAvgSamplesPerSec=105.71254016421052, CurrSamplesPerSec=113.03297482527259, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:38:57,376] [INFO] [logging.py:96:log_dist] [Rank 0] step=7960, skipped=138, lr=[3.5460780214038593e-08, 3.5460780214038593e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7959|ppo_ep: 1|act_loss: 0.0247802734375|cri_loss: 0.01277923583984375|unsuper_loss: 0.0 +average reward score: 5.609375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.63%) |Training time=0.45s (20.81%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.94 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7960|ppo_ep: 1|act_loss: -0.0133819580078125|cri_loss: -0.00598907470703125|unsuper_loss: 0.0 +average reward score: 5.796875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.47%) |Training time=0.45s (20.87%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7961|ppo_ep: 1|act_loss: -0.01129913330078125|cri_loss: -0.0054931640625|unsuper_loss: 0.0 +average reward score: 5.17578125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.56%) |Training time=0.44s (20.75%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.56 +epoch: 0|step: 7962|ppo_ep: 1|act_loss: -0.004425048828125|cri_loss: -0.0021190643310546875|unsuper_loss: 0.0 +average reward score: 5.2109375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.61%) |Training time=0.44s (20.73%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.57 +epoch: 0|step: 7963|ppo_ep: 1|act_loss: -0.0086669921875|cri_loss: -0.003204345703125|unsuper_loss: 0.0 +average reward score: 5.953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.60%) |Training time=0.44s (20.80%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.57 +epoch: 0|step: 7964|ppo_ep: 1|act_loss: 0.00540924072265625|cri_loss: 0.002834320068359375|unsuper_loss: 0.0 +average reward score: 5.34375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.44%) |Training time=0.45s (20.88%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.57 +epoch: 0|step: 7965|ppo_ep: 1|act_loss: 0.0267333984375|cri_loss: 0.01537322998046875|unsuper_loss: 0.0 +average reward score: 5.1953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.68%) |Training time=0.44s (20.73%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.57 +epoch: 0|step: 7966|ppo_ep: 1|act_loss: 0.00862884521484375|cri_loss: 0.005084991455078125|unsuper_loss: 0.0 +average reward score: 6.19921875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.93%) |Training time=0.44s (20.38%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.57 +epoch: 0|step: 7967|ppo_ep: 1|act_loss: -0.0021800994873046875|cri_loss: -0.0008406639099121094|unsuper_loss: 0.0 +average reward score: 5.03515625 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.62%) |Training time=0.44s (19.02%) |Others=0.10 (4.35%)|CurSamplesPerSec=13.85 |AvgSamplesPerSec=14.57 +epoch: 0|step: 7968|ppo_ep: 1|act_loss: -0.0058746337890625|cri_loss: -0.0027408599853515625|unsuper_loss: 0.0 +average reward score: 6.0234375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.97%) |Training time=0.44s (20.41%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.57 +[2023-04-14 13:39:18,820] [INFO] [logging.py:96:log_dist] [Rank 0] step=7970, skipped=100, lr=[5.4287505059585056e-08, 5.4287505059585056e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:39:18,838] [INFO] [timer.py:199:stop] epoch=0/micro_step=7970/global_step=7970, RunningAvgSamplesPerSec=105.72253965619953, CurrSamplesPerSec=114.09618176810422, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:39:18,931] [INFO] [logging.py:96:log_dist] [Rank 0] step=7970, skipped=138, lr=[3.38636745134574e-08, 3.38636745134574e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7969|ppo_ep: 1|act_loss: -0.0057830810546875|cri_loss: -0.0027828216552734375|unsuper_loss: 0.0 +average reward score: 5.28125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.68%) |Training time=0.44s (20.74%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.57 +epoch: 0|step: 7970|ppo_ep: 1|act_loss: 0.0011510848999023438|cri_loss: 0.0006318092346191406|unsuper_loss: 0.0 +average reward score: 4.9609375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.70%) |Training time=0.44s (20.60%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.57 +epoch: 0|step: 7971|ppo_ep: 1|act_loss: 0.027252197265625|cri_loss: 0.01389312744140625|unsuper_loss: 0.0 +average reward score: 4.7421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.55%) |Training time=0.45s (20.86%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.57 +epoch: 0|step: 7972|ppo_ep: 1|act_loss: -0.0053253173828125|cri_loss: -0.00258636474609375|unsuper_loss: 0.0 +average reward score: 5.6875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.86%) |Training time=0.44s (20.41%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.57 +epoch: 0|step: 7973|ppo_ep: 1|act_loss: -0.009918212890625|cri_loss: -0.004913330078125|unsuper_loss: 0.0 +average reward score: 5.375 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.61s (75.27%) |Training time=0.43s (20.12%) |Others=0.10 (4.61%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.57 +epoch: 0|step: 7974|ppo_ep: 1|act_loss: 0.059112548828125|cri_loss: 0.0310211181640625|unsuper_loss: 0.0 +average reward score: 5.05078125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.58%) |Training time=0.44s (20.75%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.57 +epoch: 0|step: 7975|ppo_ep: 1|act_loss: 0.00844573974609375|cri_loss: 0.004795074462890625|unsuper_loss: 0.0 +average reward score: 5.50390625 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.86%) |Training time=0.44s (20.55%) |Others=0.10 (4.59%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.57 +epoch: 0|step: 7976|ppo_ep: 1|act_loss: -0.0298004150390625|cri_loss: -0.01467132568359375|unsuper_loss: 0.0 +average reward score: 5.1953125 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.82%) |Training time=0.44s (20.47%) |Others=0.10 (4.71%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.57 +epoch: 0|step: 7977|ppo_ep: 1|act_loss: 0.008209228515625|cri_loss: 0.00433349609375|unsuper_loss: 0.0 +average reward score: 5.1953125 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.86%) |Training time=0.44s (20.53%) |Others=0.10 (4.61%)|CurSamplesPerSec=15.00 |AvgSamplesPerSec=14.57 +epoch: 0|step: 7978|ppo_ep: 1|act_loss: -0.0465087890625|cri_loss: -0.0218505859375|unsuper_loss: 0.0 +average reward score: 5.5 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.80%) |Training time=0.44s (20.52%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.57 +[2023-04-14 13:39:40,188] [INFO] [logging.py:96:log_dist] [Rank 0] step=7980, skipped=100, lr=[5.154412637913806e-08, 5.154412637913806e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:39:40,206] [INFO] [timer.py:199:stop] epoch=0/micro_step=7980/global_step=7980, RunningAvgSamplesPerSec=105.7339108712191, CurrSamplesPerSec=115.24871199527388, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:39:40,299] [INFO] [logging.py:96:log_dist] [Rank 0] step=7980, skipped=138, lr=[3.230312290245008e-08, 3.230312290245008e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7979|ppo_ep: 1|act_loss: 0.0009937286376953125|cri_loss: 0.0007677078247070312|unsuper_loss: 0.0 +average reward score: 5.5 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.76%) |Training time=0.44s (20.65%) |Others=0.10 (4.59%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.57 +epoch: 0|step: 7980|ppo_ep: 1|act_loss: -0.01158905029296875|cri_loss: -0.005451202392578125|unsuper_loss: 0.0 +average reward score: 5.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.73%) |Training time=0.44s (20.58%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.57 +epoch: 0|step: 7981|ppo_ep: 1|act_loss: -0.03515625|cri_loss: -0.01605224609375|unsuper_loss: 0.0 +average reward score: 5.3671875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.45%) |Training time=0.45s (20.98%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.57 +epoch: 0|step: 7982|ppo_ep: 1|act_loss: 0.022247314453125|cri_loss: 0.01145172119140625|unsuper_loss: 0.0 +average reward score: 5.3359375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.63%) |Training time=0.44s (20.68%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.99 |AvgSamplesPerSec=14.57 +epoch: 0|step: 7983|ppo_ep: 1|act_loss: -0.014556884765625|cri_loss: -0.007183074951171875|unsuper_loss: 0.0 +average reward score: 5.1328125 +------------------------------------------------------------------------------------- +|E2E latency=2.30s |Gather latency=0.00s (0.00%) |Generate time=1.69s (73.36%) |Training time=0.51s (22.29%) |Others=0.10 (4.35%)|CurSamplesPerSec=13.90 |AvgSamplesPerSec=14.57 +epoch: 0|step: 7984|ppo_ep: 1|act_loss: -0.015045166015625|cri_loss: -0.0073394775390625|unsuper_loss: 0.0 +average reward score: 5.13671875 +------------------------------------------------------------------------------------- +|E2E latency=2.13s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.76%) |Training time=0.44s (20.57%) |Others=0.10 (4.67%)|CurSamplesPerSec=15.01 |AvgSamplesPerSec=14.57 +epoch: 0|step: 7985|ppo_ep: 1|act_loss: 0.075439453125|cri_loss: 0.042724609375|unsuper_loss: 0.0 +average reward score: 5.328125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.75%) |Training time=0.44s (20.47%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.57 +epoch: 0|step: 7986|ppo_ep: 1|act_loss: 0.00634002685546875|cri_loss: 0.003490447998046875|unsuper_loss: 0.0 +average reward score: 5.75390625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.64s (74.31%) |Training time=0.47s (21.12%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.57 +epoch: 0|step: 7987|ppo_ep: 1|act_loss: -0.03533935546875|cri_loss: -0.0172882080078125|unsuper_loss: 0.0 +average reward score: 4.9921875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.97%) |Training time=0.44s (20.34%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.57 +epoch: 0|step: 7988|ppo_ep: 1|act_loss: -0.023040771484375|cri_loss: -0.01068115234375|unsuper_loss: 0.0 +average reward score: 5.91015625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.34%) |Training time=0.45s (20.95%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.57 +[2023-04-14 13:40:01,805] [INFO] [logging.py:96:log_dist] [Rank 0] step=7990, skipped=100, lr=[4.887150183007368e-08, 4.887150183007368e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:40:01,823] [INFO] [timer.py:199:stop] epoch=0/micro_step=7990/global_step=7990, RunningAvgSamplesPerSec=105.74197867872607, CurrSamplesPerSec=112.59064195032909, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:40:01,916] [INFO] [logging.py:96:log_dist] [Rank 0] step=7990, skipped=138, lr=[3.077914851215585e-08, 3.077914851215585e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7989|ppo_ep: 1|act_loss: -0.03289794921875|cri_loss: -0.016021728515625|unsuper_loss: 0.0 +average reward score: 4.76171875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.52%) |Training time=0.45s (20.90%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.57 +epoch: 0|step: 7990|ppo_ep: 1|act_loss: -0.03704833984375|cri_loss: -0.016448974609375|unsuper_loss: 0.0 +average reward score: 5.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.88%) |Training time=0.44s (20.43%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.57 +epoch: 0|step: 7991|ppo_ep: 1|act_loss: 0.00550079345703125|cri_loss: 0.003116607666015625|unsuper_loss: 0.0 +average reward score: 5.703125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.45s (20.87%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.57 +epoch: 0|step: 7992|ppo_ep: 1|act_loss: -0.0137481689453125|cri_loss: -0.006755828857421875|unsuper_loss: 0.0 +average reward score: 5.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.64%) |Training time=0.44s (20.66%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.57 +epoch: 0|step: 7993|ppo_ep: 1|act_loss: -0.005077362060546875|cri_loss: -0.00238800048828125|unsuper_loss: 0.0 +average reward score: 5.66015625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.78%) |Training time=0.44s (20.63%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.97 |AvgSamplesPerSec=14.57 +epoch: 0|step: 7994|ppo_ep: 1|act_loss: -0.0119781494140625|cri_loss: -0.005706787109375|unsuper_loss: 0.0 +average reward score: 6.09765625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.62%) |Training time=0.44s (20.70%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.57 +epoch: 0|step: 7995|ppo_ep: 1|act_loss: 0.0263214111328125|cri_loss: 0.01381683349609375|unsuper_loss: 0.0 +average reward score: 6.078125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.54%) |Training time=0.45s (20.87%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.57 +epoch: 0|step: 7996|ppo_ep: 1|act_loss: -0.0144805908203125|cri_loss: -0.007122039794921875|unsuper_loss: 0.0 +average reward score: 4.9921875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.81%) |Training time=0.44s (20.48%) |Others=0.10 (4.72%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.57 +epoch: 0|step: 7997|ppo_ep: 1|act_loss: -0.007595062255859375|cri_loss: -0.003307342529296875|unsuper_loss: 0.0 +average reward score: 5.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.64%) |Training time=0.44s (20.69%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.57 +epoch: 0|step: 7998|ppo_ep: 1|act_loss: 0.001560211181640625|cri_loss: 0.0010843276977539062|unsuper_loss: 0.0 +average reward score: 5.5625 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.77s (76.38%) |Training time=0.45s (19.30%) |Others=0.10 (4.32%)|CurSamplesPerSec=13.80 |AvgSamplesPerSec=14.57 +[2023-04-14 13:40:23,385] [INFO] [logging.py:96:log_dist] [Rank 0] step=8000, skipped=100, lr=[4.6269671027135476e-08, 4.6269671027135476e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:40:23,403] [INFO] [timer.py:199:stop] epoch=0/micro_step=8000/global_step=8000, RunningAvgSamplesPerSec=105.75146589136898, CurrSamplesPerSec=110.35337026640816, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:40:23,496] [INFO] [logging.py:96:log_dist] [Rank 0] step=8000, skipped=138, lr=[2.9291773931551525e-08, 2.9291773931551525e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 7999|ppo_ep: 1|act_loss: 0.01483154296875|cri_loss: 0.007595062255859375|unsuper_loss: 0.0 +average reward score: 6.6484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.47%) |Training time=0.45s (21.00%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8000|ppo_ep: 1|act_loss: -0.00762176513671875|cri_loss: -0.00371551513671875|unsuper_loss: 0.0 +average reward score: 5.51953125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.78%) |Training time=0.44s (20.52%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.98 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8001|ppo_ep: 1|act_loss: -0.013580322265625|cri_loss: -0.006587982177734375|unsuper_loss: 0.0 +average reward score: 5.49609375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.74%) |Training time=0.44s (20.66%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8002|ppo_ep: 1|act_loss: -0.0023956298828125|cri_loss: -0.001026153564453125|unsuper_loss: 0.0 +average reward score: 5.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.99%) |Training time=0.46s (21.35%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8003|ppo_ep: 1|act_loss: -0.00656890869140625|cri_loss: -0.0019073486328125|unsuper_loss: 0.0 +average reward score: 5.62109375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.36%) |Training time=0.48s (22.09%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8004|ppo_ep: 1|act_loss: -0.00583648681640625|cri_loss: -0.002777099609375|unsuper_loss: 0.0 +average reward score: 5.42578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (21.97%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8005|ppo_ep: 1|act_loss: -0.00774383544921875|cri_loss: -0.00351715087890625|unsuper_loss: 0.0 +average reward score: 5.71875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.28%) |Training time=0.48s (22.19%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8006|ppo_ep: 1|act_loss: 0.00832366943359375|cri_loss: 0.00452423095703125|unsuper_loss: 0.0 +average reward score: 4.890625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.34%) |Training time=0.48s (22.02%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8007|ppo_ep: 1|act_loss: 0.0088043212890625|cri_loss: 0.0045166015625|unsuper_loss: 0.0 +average reward score: 5.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.24%) |Training time=0.48s (22.23%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8008|ppo_ep: 1|act_loss: -0.0028076171875|cri_loss: -0.00128936767578125|unsuper_loss: 0.0 +average reward score: 6.09375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.40%) |Training time=0.48s (21.95%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.57 +[2023-04-14 13:40:45,038] [INFO] [logging.py:96:log_dist] [Rank 0] step=8010, skipped=100, lr=[4.373867253573118e-08, 4.373867253573118e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:40:45,056] [INFO] [timer.py:199:stop] epoch=0/micro_step=8010/global_step=8010, RunningAvgSamplesPerSec=105.74922781707347, CurrSamplesPerSec=100.61894819858148, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:40:45,149] [INFO] [logging.py:96:log_dist] [Rank 0] step=8010, skipped=138, lr=[2.7841021207116736e-08, 2.7841021207116736e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 8009|ppo_ep: 1|act_loss: -0.0247039794921875|cri_loss: -0.01165771484375|unsuper_loss: 0.0 +average reward score: 5.078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.40%) |Training time=0.48s (22.11%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8010|ppo_ep: 1|act_loss: -0.025115966796875|cri_loss: -0.01239776611328125|unsuper_loss: 0.0 +average reward score: 5.34765625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.95%) |Training time=0.49s (22.45%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8011|ppo_ep: 1|act_loss: 0.02227783203125|cri_loss: 0.01142120361328125|unsuper_loss: 0.0 +average reward score: 6.1171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.42%) |Training time=0.48s (22.05%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8012|ppo_ep: 1|act_loss: -0.01776123046875|cri_loss: -0.00848388671875|unsuper_loss: 0.0 +average reward score: 5.2734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.26%) |Training time=0.48s (22.13%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8013|ppo_ep: 1|act_loss: -0.00836944580078125|cri_loss: -0.00402069091796875|unsuper_loss: 0.0 +average reward score: 5.55859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.36%) |Training time=0.48s (21.99%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8014|ppo_ep: 1|act_loss: -0.02996826171875|cri_loss: -0.01462554931640625|unsuper_loss: 0.0 +average reward score: 4.5703125 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.12%) |Training time=0.48s (20.53%) |Others=0.10 (4.35%)|CurSamplesPerSec=13.65 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8015|ppo_ep: 1|act_loss: 0.020477294921875|cri_loss: 0.0107574462890625|unsuper_loss: 0.0 +average reward score: 5.6328125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.93%) |Training time=0.47s (21.54%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8016|ppo_ep: 1|act_loss: -0.00269317626953125|cri_loss: -0.0012426376342773438|unsuper_loss: 0.0 +average reward score: 5.9140625 +------------------------------------------------------------------------------------- +|E2E latency=2.29s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.01%) |Training time=0.49s (21.61%) |Others=0.10 (4.38%)|CurSamplesPerSec=14.00 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8017|ppo_ep: 1|act_loss: 0.0018358230590820312|cri_loss: 0.0013628005981445312|unsuper_loss: 0.0 +average reward score: 5.3984375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.06%) |Training time=0.49s (22.44%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.60 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8018|ppo_ep: 1|act_loss: -0.00238037109375|cri_loss: -0.0007963180541992188|unsuper_loss: 0.0 +average reward score: 5.1796875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.33%) |Training time=0.48s (22.07%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.57 +[2023-04-14 13:41:07,086] [INFO] [logging.py:96:log_dist] [Rank 0] step=8020, skipped=100, lr=[4.127854387136491e-08, 4.127854387136491e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:41:07,104] [INFO] [timer.py:199:stop] epoch=0/micro_step=8020/global_step=8020, RunningAvgSamplesPerSec=105.74197847665799, CurrSamplesPerSec=100.35315457578353, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:41:07,197] [INFO] [logging.py:96:log_dist] [Rank 0] step=8020, skipped=138, lr=[2.6426911842506175e-08, 2.6426911842506175e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 8019|ppo_ep: 1|act_loss: -0.011383056640625|cri_loss: -0.00542449951171875|unsuper_loss: 0.0 +average reward score: 5.5859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.37%) |Training time=0.48s (22.14%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8020|ppo_ep: 1|act_loss: 0.0060577392578125|cri_loss: 0.003360748291015625|unsuper_loss: 0.0 +average reward score: 5.27734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.39%) |Training time=0.48s (22.00%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8021|ppo_ep: 1|act_loss: -0.01690673828125|cri_loss: -0.0083465576171875|unsuper_loss: 0.0 +average reward score: 5.0859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (21.95%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8022|ppo_ep: 1|act_loss: -0.04150390625|cri_loss: -0.02032470703125|unsuper_loss: 0.0 +average reward score: 6.12109375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.46%) |Training time=0.48s (21.92%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8023|ppo_ep: 1|act_loss: -0.00159454345703125|cri_loss: -0.000701904296875|unsuper_loss: 0.0 +average reward score: 5.609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.94%) |Training time=0.49s (22.41%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8024|ppo_ep: 1|act_loss: -0.0018596649169921875|cri_loss: -0.0006575584411621094|unsuper_loss: 0.0 +average reward score: 5.93359375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.03%) |Training time=0.49s (22.40%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.57 +[2023-04-14 13:41:20,232] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 8025|ppo_ep: 1|act_loss: 0.010040283203125|cri_loss: 0.00547027587890625|unsuper_loss: 0.0 +average reward score: 5.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.49%) |Training time=0.48s (22.45%) |Others=0.09 (4.06%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.57 +[2023-04-14 13:41:22,396] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 8026|ppo_ep: 1|act_loss: -0.029541015625|cri_loss: -0.0144195556640625|unsuper_loss: 0.0 +average reward score: 5.2109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.08%) |Training time=0.49s (22.80%) |Others=0.09 (4.12%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8027|ppo_ep: 1|act_loss: 0.00446319580078125|cri_loss: 0.0026836395263671875|unsuper_loss: 0.0 +average reward score: 5.82421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.80%) |Training time=0.49s (22.60%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8028|ppo_ep: 1|act_loss: -0.0071868896484375|cri_loss: -0.0032958984375|unsuper_loss: 0.0 +average reward score: 5.45703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.11%) |Training time=0.48s (22.27%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57 +[2023-04-14 13:41:28,805] [INFO] [logging.py:96:log_dist] [Rank 0] step=8030, skipped=100, lr=[3.888932149907788e-08, 3.888932149907788e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:41:28,824] [INFO] [timer.py:199:stop] epoch=0/micro_step=8030/global_step=8030, RunningAvgSamplesPerSec=105.73302555462291, CurrSamplesPerSec=96.34630702129675, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:41:28,916] [INFO] [logging.py:96:log_dist] [Rank 0] step=8030, skipped=140, lr=[2.5322021673613308e-08, 2.5322021673613308e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 8029|ppo_ep: 1|act_loss: 0.0107421875|cri_loss: 0.00566864013671875|unsuper_loss: 0.0 +average reward score: 5.796875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.78%) |Training time=0.49s (22.73%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8030|ppo_ep: 1|act_loss: 0.00165557861328125|cri_loss: 0.0010852813720703125|unsuper_loss: 0.0 +average reward score: 5.5234375 +------------------------------------------------------------------------------------- +|E2E latency=2.34s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.68%) |Training time=0.49s (21.04%) |Others=0.10 (4.28%)|CurSamplesPerSec=13.69 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8031|ppo_ep: 1|act_loss: -0.0274658203125|cri_loss: -0.01277923583984375|unsuper_loss: 0.0 +average reward score: 5.328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.05%) |Training time=0.49s (22.32%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8032|ppo_ep: 1|act_loss: -0.019622802734375|cri_loss: -0.009552001953125|unsuper_loss: 0.0 +average reward score: 5.890625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.72%) |Training time=0.50s (22.70%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8033|ppo_ep: 1|act_loss: 0.005889892578125|cri_loss: 0.0031986236572265625|unsuper_loss: 0.0 +average reward score: 5.37109375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.81%) |Training time=0.49s (22.57%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8034|ppo_ep: 1|act_loss: 0.02691650390625|cri_loss: 0.0142822265625|unsuper_loss: 0.0 +average reward score: 5.5703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.94%) |Training time=0.49s (22.44%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8035|ppo_ep: 1|act_loss: -0.005855560302734375|cri_loss: -0.0028533935546875|unsuper_loss: 0.0 +average reward score: 5.58203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.98%) |Training time=0.49s (22.49%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8036|ppo_ep: 1|act_loss: -0.0105743408203125|cri_loss: -0.0033416748046875|unsuper_loss: 0.0 +average reward score: 5.62109375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.61%) |Training time=0.50s (22.79%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8037|ppo_ep: 1|act_loss: -0.0026702880859375|cri_loss: -0.001216888427734375|unsuper_loss: 0.0 +average reward score: 5.34765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.96%) |Training time=0.49s (22.41%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8038|ppo_ep: 1|act_loss: -0.0136566162109375|cri_loss: -0.006710052490234375|unsuper_loss: 0.0 +average reward score: 5.640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.75%) |Training time=0.49s (22.63%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.57 +[2023-04-14 13:41:50,718] [INFO] [logging.py:96:log_dist] [Rank 0] step=8040, skipped=100, lr=[3.657104083291008e-08, 3.657104083291008e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:41:50,736] [INFO] [timer.py:199:stop] epoch=0/micro_step=8040/global_step=8040, RunningAvgSamplesPerSec=105.72194107019268, CurrSamplesPerSec=98.03218566099805, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:41:50,828] [INFO] [logging.py:96:log_dist] [Rank 0] step=8040, skipped=140, lr=[2.3973922811987295e-08, 2.3973922811987295e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 8039|ppo_ep: 1|act_loss: 0.041595458984375|cri_loss: 0.0220184326171875|unsuper_loss: 0.0 +average reward score: 4.953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.84%) |Training time=0.49s (22.56%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8040|ppo_ep: 1|act_loss: -0.011566162109375|cri_loss: -0.0054473876953125|unsuper_loss: 0.0 +average reward score: 4.96875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.97%) |Training time=0.48s (22.39%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8041|ppo_ep: 1|act_loss: -0.003047943115234375|cri_loss: -0.0013294219970703125|unsuper_loss: 0.0 +average reward score: 5.26953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.72%) |Training time=0.49s (22.68%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8042|ppo_ep: 1|act_loss: -0.0085906982421875|cri_loss: -0.0038299560546875|unsuper_loss: 0.0 +average reward score: 5.80859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.76%) |Training time=0.49s (22.61%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8043|ppo_ep: 1|act_loss: 0.00794219970703125|cri_loss: 0.004283905029296875|unsuper_loss: 0.0 +average reward score: 5.86328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.88%) |Training time=0.49s (22.58%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8044|ppo_ep: 1|act_loss: -0.002685546875|cri_loss: -0.000995635986328125|unsuper_loss: 0.0 +average reward score: 6.1015625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.66%) |Training time=0.50s (22.70%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8045|ppo_ep: 1|act_loss: -0.0005998611450195312|cri_loss: -0.00021529197692871094|unsuper_loss: 0.0 +average reward score: 5.3828125 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.28%) |Training time=0.51s (23.17%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.44 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8046|ppo_ep: 1|act_loss: 0.00278472900390625|cri_loss: 0.001537322998046875|unsuper_loss: 0.0 +average reward score: 5.6328125 +------------------------------------------------------------------------------------- +|E2E latency=2.36s |Gather latency=0.00s (0.00%) |Generate time=1.75s (73.86%) |Training time=0.52s (21.89%) |Others=0.10 (4.25%)|CurSamplesPerSec=13.54 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8047|ppo_ep: 1|act_loss: 0.0053863525390625|cri_loss: 0.0028438568115234375|unsuper_loss: 0.0 +average reward score: 5.63671875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.77%) |Training time=0.49s (22.68%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8048|ppo_ep: 1|act_loss: 0.0013275146484375|cri_loss: 0.0008149147033691406|unsuper_loss: 0.0 +average reward score: 5.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.68%) |Training time=0.49s (22.74%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.57 +[2023-04-14 13:42:12,673] [INFO] [logging.py:96:log_dist] [Rank 0] step=8050, skipped=100, lr=[3.4323736235372594e-08, 3.4323736235372594e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:42:12,691] [INFO] [timer.py:199:stop] epoch=0/micro_step=8050/global_step=8050, RunningAvgSamplesPerSec=105.708434479618, CurrSamplesPerSec=97.58464677133455, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:42:12,784] [INFO] [logging.py:96:log_dist] [Rank 0] step=8050, skipped=140, lr=[2.266252462991253e-08, 2.266252462991253e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 8049|ppo_ep: 1|act_loss: 0.017578125|cri_loss: 0.0091705322265625|unsuper_loss: 0.0 +average reward score: 5.484375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.73%) |Training time=0.49s (22.59%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8050|ppo_ep: 1|act_loss: -0.001239776611328125|cri_loss: -0.0003724098205566406|unsuper_loss: 0.0 +average reward score: 5.4375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.04%) |Training time=0.48s (22.33%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8051|ppo_ep: 1|act_loss: 0.0269927978515625|cri_loss: 0.0146484375|unsuper_loss: 0.0 +average reward score: 4.96484375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.20%) |Training time=0.50s (22.84%) |Others=0.11 (4.96%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8052|ppo_ep: 1|act_loss: -0.0083770751953125|cri_loss: -0.004146575927734375|unsuper_loss: 0.0 +average reward score: 5.4921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.95%) |Training time=0.49s (22.41%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8053|ppo_ep: 1|act_loss: 0.001995086669921875|cri_loss: 0.0011777877807617188|unsuper_loss: 0.0 +average reward score: 5.80078125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.95%) |Training time=0.49s (22.43%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8054|ppo_ep: 1|act_loss: -0.0205535888671875|cri_loss: -0.00970458984375|unsuper_loss: 0.0 +average reward score: 5.7421875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.84%) |Training time=0.49s (22.56%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8055|ppo_ep: 1|act_loss: 0.00510406494140625|cri_loss: 0.0027561187744140625|unsuper_loss: 0.0 +average reward score: 5.0234375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.99%) |Training time=0.49s (22.45%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8056|ppo_ep: 1|act_loss: -0.01055908203125|cri_loss: -0.004421234130859375|unsuper_loss: 0.0 +average reward score: 5.359375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.79%) |Training time=0.49s (22.61%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.70 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8057|ppo_ep: 1|act_loss: 0.002025604248046875|cri_loss: 0.0011615753173828125|unsuper_loss: 0.0 +average reward score: 6.08984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.05%) |Training time=0.49s (22.42%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8058|ppo_ep: 1|act_loss: 0.00799560546875|cri_loss: 0.00431060791015625|unsuper_loss: 0.0 +average reward score: 5.31640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.71%) |Training time=0.49s (22.67%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.57 +[2023-04-14 13:42:34,383] [INFO] [logging.py:96:log_dist] [Rank 0] step=8060, skipped=100, lr=[3.21474410169414e-08, 3.21474410169414e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:42:34,401] [INFO] [timer.py:199:stop] epoch=0/micro_step=8060/global_step=8060, RunningAvgSamplesPerSec=105.69826892905085, CurrSamplesPerSec=98.67971046991659, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:42:34,494] [INFO] [logging.py:96:log_dist] [Rank 0] step=8060, skipped=140, lr=[2.1387846565474047e-08, 2.1387846565474047e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 8059|ppo_ep: 1|act_loss: 0.028778076171875|cri_loss: 0.014984130859375|unsuper_loss: 0.0 +average reward score: 5.05078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.89%) |Training time=0.49s (22.49%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8060|ppo_ep: 1|act_loss: -0.0341796875|cri_loss: -0.0166473388671875|unsuper_loss: 0.0 +average reward score: 4.65625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.64%) |Training time=0.50s (22.76%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8061|ppo_ep: 1|act_loss: 0.0007905960083007812|cri_loss: 0.0005269050598144531|unsuper_loss: 0.0 +average reward score: 4.9375 +------------------------------------------------------------------------------------- +|E2E latency=2.35s |Gather latency=0.00s (0.00%) |Generate time=1.75s (74.25%) |Training time=0.51s (21.57%) |Others=0.10 (4.18%)|CurSamplesPerSec=13.60 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8062|ppo_ep: 1|act_loss: 0.0484619140625|cri_loss: 0.028228759765625|unsuper_loss: 0.0 +average reward score: 5.69140625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.78%) |Training time=0.49s (22.61%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8063|ppo_ep: 1|act_loss: 0.00508880615234375|cri_loss: 0.0027179718017578125|unsuper_loss: 0.0 +average reward score: 5.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.90%) |Training time=0.49s (22.59%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8064|ppo_ep: 1|act_loss: -0.04681396484375|cri_loss: -0.0221099853515625|unsuper_loss: 0.0 +average reward score: 5.44921875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.33%) |Training time=0.50s (23.00%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8065|ppo_ep: 1|act_loss: 0.001220703125|cri_loss: 0.001300811767578125|unsuper_loss: 0.0 +average reward score: 5.08203125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.44%) |Training time=0.50s (23.06%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.66 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8066|ppo_ep: 1|act_loss: -0.002658843994140625|cri_loss: -0.0011577606201171875|unsuper_loss: 0.0 +average reward score: 5.1796875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.52%) |Training time=0.50s (22.90%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8067|ppo_ep: 1|act_loss: 0.01605224609375|cri_loss: 0.0081939697265625|unsuper_loss: 0.0 +average reward score: 4.80859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.76%) |Training time=0.49s (22.70%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8068|ppo_ep: 1|act_loss: -0.0056304931640625|cri_loss: -0.0023651123046875|unsuper_loss: 0.0 +average reward score: 5.29296875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.47%) |Training time=0.50s (22.94%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.57 +[2023-04-14 13:42:56,307] [INFO] [logging.py:96:log_dist] [Rank 0] step=8070, skipped=100, lr=[3.004218743556186e-08, 3.004218743556186e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:42:56,325] [INFO] [timer.py:199:stop] epoch=0/micro_step=8070/global_step=8070, RunningAvgSamplesPerSec=105.68438726030831, CurrSamplesPerSec=97.16916567905542, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:42:56,418] [INFO] [logging.py:96:log_dist] [Rank 0] step=8070, skipped=140, lr=[2.0149907512475585e-08, 2.0149907512475585e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 8069|ppo_ep: 1|act_loss: -0.00200653076171875|cri_loss: -0.0005998611450195312|unsuper_loss: 0.0 +average reward score: 5.7265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.76%) |Training time=0.49s (22.72%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8070|ppo_ep: 1|act_loss: -0.019775390625|cri_loss: -0.00946807861328125|unsuper_loss: 0.0 +average reward score: 4.7109375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.28%) |Training time=0.50s (23.13%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8071|ppo_ep: 1|act_loss: 0.01132965087890625|cri_loss: 0.00701141357421875|unsuper_loss: 0.0 +average reward score: 5.6640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.89%) |Training time=0.49s (22.59%) |Others=0.10 (4.52%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8072|ppo_ep: 1|act_loss: -0.01421356201171875|cri_loss: -0.00688934326171875|unsuper_loss: 0.0 +average reward score: 5.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.46%) |Training time=0.50s (22.98%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8073|ppo_ep: 1|act_loss: -0.00011110305786132812|cri_loss: 0.00040340423583984375|unsuper_loss: 0.0 +average reward score: 4.9453125 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.14%) |Training time=0.51s (23.36%) |Others=0.10 (4.50%)|CurSamplesPerSec=14.63 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8074|ppo_ep: 1|act_loss: -0.022796630859375|cri_loss: -0.0111541748046875|unsuper_loss: 0.0 +average reward score: 5.24609375 +------------------------------------------------------------------------------------- +|E2E latency=2.22s |Gather latency=0.00s (0.00%) |Generate time=1.61s (72.73%) |Training time=0.50s (22.76%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.44 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8075|ppo_ep: 1|act_loss: -0.002719879150390625|cri_loss: -0.0008921623229980469|unsuper_loss: 0.0 +average reward score: 4.9921875 +------------------------------------------------------------------------------------- +|E2E latency=2.50s |Gather latency=0.00s (0.00%) |Generate time=1.85s (73.98%) |Training time=0.55s (22.10%) |Others=0.10 (3.92%)|CurSamplesPerSec=12.79 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8076|ppo_ep: 1|act_loss: -0.0154876708984375|cri_loss: -0.0076904296875|unsuper_loss: 0.0 +average reward score: 5.69921875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.80%) |Training time=0.49s (22.59%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8077|ppo_ep: 1|act_loss: 0.019439697265625|cri_loss: 0.00988006591796875|unsuper_loss: 0.0 +average reward score: 5.3046875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.21%) |Training time=0.51s (23.16%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.59 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8078|ppo_ep: 1|act_loss: 0.006072998046875|cri_loss: 0.003635406494140625|unsuper_loss: 0.0 +average reward score: 5.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.59%) |Training time=0.50s (22.81%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.57 +[2023-04-14 13:43:18,464] [INFO] [logging.py:96:log_dist] [Rank 0] step=8080, skipped=100, lr=[2.8008006696170348e-08, 2.8008006696170348e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:43:18,483] [INFO] [timer.py:199:stop] epoch=0/micro_step=8080/global_step=8080, RunningAvgSamplesPerSec=105.6675462528966, CurrSamplesPerSec=98.04500416380557, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:43:18,575] [INFO] [logging.py:96:log_dist] [Rank 0] step=8080, skipped=140, lr=[1.8948725820160663e-08, 1.8948725820160663e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 8079|ppo_ep: 1|act_loss: -0.007293701171875|cri_loss: -0.003353118896484375|unsuper_loss: 0.0 +average reward score: 5.703125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.15%) |Training time=0.49s (22.37%) |Others=0.10 (4.49%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8080|ppo_ep: 1|act_loss: -0.01055908203125|cri_loss: -0.0049285888671875|unsuper_loss: 0.0 +average reward score: 5.83984375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.69%) |Training time=0.49s (22.70%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8081|ppo_ep: 1|act_loss: -0.005428314208984375|cri_loss: -0.00260162353515625|unsuper_loss: 0.0 +average reward score: 5.2734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.13%) |Training time=0.48s (22.21%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8082|ppo_ep: 1|act_loss: 0.005420684814453125|cri_loss: 0.003185272216796875|unsuper_loss: 0.0 +average reward score: 5.0625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.62%) |Training time=0.50s (22.77%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8083|ppo_ep: 1|act_loss: 0.006107330322265625|cri_loss: 0.003162384033203125|unsuper_loss: 0.0 +average reward score: 5.3359375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.89%) |Training time=0.49s (22.46%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8084|ppo_ep: 1|act_loss: -0.0063629150390625|cri_loss: -0.003002166748046875|unsuper_loss: 0.0 +average reward score: 5.45703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.14%) |Training time=0.48s (22.22%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8085|ppo_ep: 1|act_loss: -0.0124359130859375|cri_loss: -0.005931854248046875|unsuper_loss: 0.0 +average reward score: 5.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.02%) |Training time=0.48s (22.37%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8086|ppo_ep: 1|act_loss: -0.008056640625|cri_loss: -0.003505706787109375|unsuper_loss: 0.0 +average reward score: 5.55078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.02%) |Training time=0.48s (22.34%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8087|ppo_ep: 1|act_loss: 0.002338409423828125|cri_loss: 0.0013360977172851562|unsuper_loss: 0.0 +average reward score: 4.47265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.10%) |Training time=0.48s (22.26%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8088|ppo_ep: 1|act_loss: 0.05010986328125|cri_loss: 0.0265655517578125|unsuper_loss: 0.0 +average reward score: 5.2109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.98%) |Training time=0.48s (22.37%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.57 +[2023-04-14 13:43:40,120] [INFO] [logging.py:96:log_dist] [Rank 0] step=8090, skipped=100, lr=[2.6044928950231964e-08, 2.6044928950231964e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:43:40,139] [INFO] [timer.py:199:stop] epoch=0/micro_step=8090/global_step=8090, RunningAvgSamplesPerSec=105.65891652113257, CurrSamplesPerSec=100.38415276719263, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:43:40,231] [INFO] [logging.py:96:log_dist] [Rank 0] step=8090, skipped=140, lr=[1.7784319292939166e-08, 1.7784319292939166e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 8089|ppo_ep: 1|act_loss: -0.00270843505859375|cri_loss: -0.0010089874267578125|unsuper_loss: 0.0 +average reward score: 5.5 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.16%) |Training time=0.48s (22.31%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8090|ppo_ep: 1|act_loss: 0.007781982421875|cri_loss: 0.0042724609375|unsuper_loss: 0.0 +average reward score: 6.19140625 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.13%) |Training time=0.48s (20.54%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.77 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8091|ppo_ep: 1|act_loss: -0.00762939453125|cri_loss: -0.0021915435791015625|unsuper_loss: 0.0 +average reward score: 5.71484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.94%) |Training time=0.48s (22.42%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8092|ppo_ep: 1|act_loss: 0.00388336181640625|cri_loss: 0.001983642578125|unsuper_loss: 0.0 +average reward score: 5.1875 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.66%) |Training time=0.49s (22.72%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8093|ppo_ep: 1|act_loss: 0.05047607421875|cri_loss: 0.0269012451171875|unsuper_loss: 0.0 +average reward score: 5.1171875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.06%) |Training time=0.48s (22.31%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8094|ppo_ep: 1|act_loss: 0.019500732421875|cri_loss: 0.0110321044921875|unsuper_loss: 0.0 +average reward score: 4.95703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.86%) |Training time=0.49s (22.55%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8095|ppo_ep: 1|act_loss: -0.018035888671875|cri_loss: -0.0085296630859375|unsuper_loss: 0.0 +average reward score: 5.91015625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.58s (73.12%) |Training time=0.48s (22.33%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8096|ppo_ep: 1|act_loss: 0.0235137939453125|cri_loss: 0.01200103759765625|unsuper_loss: 0.0 +average reward score: 5.66796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.96%) |Training time=0.49s (22.41%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8097|ppo_ep: 1|act_loss: -0.021636962890625|cri_loss: -0.010711669921875|unsuper_loss: 0.0 +average reward score: 5.75390625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.81%) |Training time=0.49s (22.59%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8098|ppo_ep: 1|act_loss: -0.033905029296875|cri_loss: -0.015106201171875|unsuper_loss: 0.0 +average reward score: 5.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.64%) |Training time=0.49s (22.75%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57 +[2023-04-14 13:44:01,948] [INFO] [logging.py:96:log_dist] [Rank 0] step=8100, skipped=100, lr=[2.4152983295295917e-08, 2.4152983295295917e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:44:01,966] [INFO] [timer.py:199:stop] epoch=0/micro_step=8100/global_step=8100, RunningAvgSamplesPerSec=105.64966874876379, CurrSamplesPerSec=98.67942026513467, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:44:02,060] [INFO] [logging.py:96:log_dist] [Rank 0] step=8100, skipped=140, lr=[1.6656705190125078e-08, 1.6656705190125078e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 8099|ppo_ep: 1|act_loss: -0.01568603515625|cri_loss: -0.007781982421875|unsuper_loss: 0.0 +average reward score: 5.01953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.98%) |Training time=0.49s (22.46%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8100|ppo_ep: 1|act_loss: 0.00882720947265625|cri_loss: 0.004547119140625|unsuper_loss: 0.0 +average reward score: 5.125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.57s (72.87%) |Training time=0.49s (22.53%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8101|ppo_ep: 1|act_loss: -0.002315521240234375|cri_loss: -0.0010395050048828125|unsuper_loss: 0.0 +average reward score: 5.515625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.99%) |Training time=0.49s (22.47%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8102|ppo_ep: 1|act_loss: -0.025299072265625|cri_loss: -0.0122222900390625|unsuper_loss: 0.0 +average reward score: 5.66015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.90%) |Training time=0.49s (22.47%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8103|ppo_ep: 1|act_loss: -0.0008783340454101562|cri_loss: -0.00017642974853515625|unsuper_loss: 0.0 +average reward score: 5.28125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.15%) |Training time=0.48s (22.24%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8104|ppo_ep: 1|act_loss: -0.03424072265625|cri_loss: -0.015899658203125|unsuper_loss: 0.0 +average reward score: 5.86328125 +------------------------------------------------------------------------------------- +|E2E latency=2.28s |Gather latency=0.00s (0.00%) |Generate time=1.69s (73.86%) |Training time=0.50s (21.75%) |Others=0.10 (4.39%)|CurSamplesPerSec=14.01 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8105|ppo_ep: 1|act_loss: 0.029693603515625|cri_loss: 0.0153961181640625|unsuper_loss: 0.0 +average reward score: 5.34375 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.01%) |Training time=0.48s (20.76%) |Others=0.10 (4.23%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8106|ppo_ep: 1|act_loss: 0.016998291015625|cri_loss: 0.00864410400390625|unsuper_loss: 0.0 +average reward score: 5.56640625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.04%) |Training time=0.50s (22.85%) |Others=0.11 (5.11%)|CurSamplesPerSec=14.61 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8107|ppo_ep: 1|act_loss: 0.0084686279296875|cri_loss: 0.00434112548828125|unsuper_loss: 0.0 +average reward score: 4.7890625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.58s (72.73%) |Training time=0.50s (22.74%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8108|ppo_ep: 1|act_loss: -0.0159454345703125|cri_loss: -0.00785064697265625|unsuper_loss: 0.0 +average reward score: 5.76953125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.08%) |Training time=0.49s (22.31%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.57 +[2023-04-14 13:44:23,944] [INFO] [logging.py:96:log_dist] [Rank 0] step=8110, skipped=100, lr=[2.233219777456001e-08, 2.233219777456001e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:44:23,962] [INFO] [timer.py:199:stop] epoch=0/micro_step=8110/global_step=8110, RunningAvgSamplesPerSec=105.63997826562213, CurrSamplesPerSec=102.96742378386755, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:44:24,059] [INFO] [logging.py:96:log_dist] [Rank 0] step=8110, skipped=140, lr=[1.556590022567972e-08, 1.556590022567972e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 8109|ppo_ep: 1|act_loss: -0.01812744140625|cri_loss: -0.008941650390625|unsuper_loss: 0.0 +average reward score: 4.953125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.41%) |Training time=0.48s (21.83%) |Others=0.10 (4.76%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8110|ppo_ep: 1|act_loss: 0.00665283203125|cri_loss: 0.0033893585205078125|unsuper_loss: 0.0 +average reward score: 4.83984375 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.67s (76.23%) |Training time=0.42s (19.02%) |Others=0.10 (4.75%)|CurSamplesPerSec=14.62 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8111|ppo_ep: 1|act_loss: -0.00710296630859375|cri_loss: -0.0033931732177734375|unsuper_loss: 0.0 +average reward score: 5.75 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.18%) |Training time=0.46s (21.15%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.69 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8112|ppo_ep: 1|act_loss: -0.0011281967163085938|cri_loss: -6.67572021484375e-05|unsuper_loss: 0.0 +average reward score: 5.9609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.47%) |Training time=0.45s (20.86%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8113|ppo_ep: 1|act_loss: 0.0004973411560058594|cri_loss: 0.00041937828063964844|unsuper_loss: 0.0 +average reward score: 5.05859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.39%) |Training time=0.45s (20.92%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8114|ppo_ep: 1|act_loss: 0.004322052001953125|cri_loss: 0.0022449493408203125|unsuper_loss: 0.0 +average reward score: 5.9609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.50%) |Training time=0.45s (20.83%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8115|ppo_ep: 1|act_loss: -0.01165008544921875|cri_loss: -0.005748748779296875|unsuper_loss: 0.0 +average reward score: 5.3203125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.63s (75.30%) |Training time=0.44s (20.13%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8116|ppo_ep: 1|act_loss: 0.04052734375|cri_loss: 0.0211029052734375|unsuper_loss: 0.0 +average reward score: 6.1953125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.07%) |Training time=0.46s (21.28%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8117|ppo_ep: 1|act_loss: 0.00750732421875|cri_loss: 0.00391387939453125|unsuper_loss: 0.0 +average reward score: 4.9765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.21%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8118|ppo_ep: 1|act_loss: 0.0135040283203125|cri_loss: 0.006977081298828125|unsuper_loss: 0.0 +average reward score: 4.80859375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.13%) |Training time=0.46s (21.22%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.57 +[2023-04-14 13:44:45,867] [INFO] [logging.py:96:log_dist] [Rank 0] step=8120, skipped=100, lr=[2.0582599376458696e-08, 2.0582599376458696e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:44:45,885] [INFO] [timer.py:199:stop] epoch=0/micro_step=8120/global_step=8120, RunningAvgSamplesPerSec=105.64460919016575, CurrSamplesPerSec=96.58647737641874, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:44:45,978] [INFO] [logging.py:96:log_dist] [Rank 0] step=8120, skipped=140, lr=[1.4511920567963911e-08, 1.4511920567963911e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 8119|ppo_ep: 1|act_loss: -0.017852783203125|cri_loss: -0.00876617431640625|unsuper_loss: 0.0 +average reward score: 6.0859375 +------------------------------------------------------------------------------------- +|E2E latency=2.37s |Gather latency=0.00s (0.00%) |Generate time=1.77s (74.94%) |Training time=0.49s (20.90%) |Others=0.10 (4.16%)|CurSamplesPerSec=13.52 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8120|ppo_ep: 1|act_loss: 0.01031494140625|cri_loss: 0.00534820556640625|unsuper_loss: 0.0 +average reward score: 5.6796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.02%) |Training time=0.46s (21.33%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8121|ppo_ep: 1|act_loss: -0.0057525634765625|cri_loss: -0.002651214599609375|unsuper_loss: 0.0 +average reward score: 4.7109375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.90%) |Training time=0.47s (21.57%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8122|ppo_ep: 1|act_loss: 0.0091400146484375|cri_loss: 0.0060882568359375|unsuper_loss: 0.0 +average reward score: 5.5703125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.90%) |Training time=0.47s (21.43%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8123|ppo_ep: 1|act_loss: -0.00714874267578125|cri_loss: -0.0032329559326171875|unsuper_loss: 0.0 +average reward score: 5.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.29%) |Training time=0.46s (21.15%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8124|ppo_ep: 1|act_loss: -0.0182342529296875|cri_loss: -0.00887298583984375|unsuper_loss: 0.0 +average reward score: 5.13671875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.60%) |Training time=0.47s (21.75%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8125|ppo_ep: 1|act_loss: -0.00010943412780761719|cri_loss: 7.2479248046875e-05|unsuper_loss: 0.0 +average reward score: 5.25 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.43%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8126|ppo_ep: 1|act_loss: -0.00679779052734375|cri_loss: -0.0029354095458984375|unsuper_loss: 0.0 +average reward score: 5.609375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.79%) |Training time=0.47s (21.56%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57 +[2023-04-14 13:45:03,323] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 8127|ppo_ep: 1|act_loss: -0.011077880859375|cri_loss: -0.005207061767578125|unsuper_loss: 0.0 +average reward score: 5.70703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.86%) |Training time=0.48s (22.07%) |Others=0.09 (4.07%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.57 +[2023-04-14 13:45:05,485] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 8128|ppo_ep: 1|act_loss: -0.0008001327514648438|cri_loss: -0.0003199577331542969|unsuper_loss: 0.0 +average reward score: 5.44921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.56%) |Training time=0.48s (22.28%) |Others=0.09 (4.15%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.57 +[2023-04-14 13:45:07,544] [INFO] [logging.py:96:log_dist] [Rank 0] step=8130, skipped=100, lr=[1.8904214034259727e-08, 1.8904214034259727e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:45:07,562] [INFO] [timer.py:199:stop] epoch=0/micro_step=8130/global_step=8130, RunningAvgSamplesPerSec=105.6427885351493, CurrSamplesPerSec=100.87728193430314, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:45:07,655] [INFO] [logging.py:96:log_dist] [Rank 0] step=8130, skipped=142, lr=[1.3695261579316776e-08, 1.3695261579316776e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 8129|ppo_ep: 1|act_loss: 0.004642486572265625|cri_loss: 0.002925872802734375|unsuper_loss: 0.0 +average reward score: 5.61328125 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.30%) |Training time=0.48s (22.10%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8130|ppo_ep: 1|act_loss: 0.00099945068359375|cri_loss: 0.0006532669067382812|unsuper_loss: 0.0 +average reward score: 6.16015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.53%) |Training time=0.47s (21.84%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8131|ppo_ep: 1|act_loss: -0.02508544921875|cri_loss: -0.01206207275390625|unsuper_loss: 0.0 +average reward score: 5.6640625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.33%) |Training time=0.48s (22.14%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8132|ppo_ep: 1|act_loss: -0.0108642578125|cri_loss: -0.0050048828125|unsuper_loss: 0.0 +average reward score: 5.92578125 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.32%) |Training time=0.48s (22.07%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8133|ppo_ep: 1|act_loss: 0.007167816162109375|cri_loss: 0.003833770751953125|unsuper_loss: 0.0 +average reward score: 6.10546875 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.46%) |Training time=0.49s (22.09%) |Others=0.10 (4.45%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8134|ppo_ep: 1|act_loss: -0.0309295654296875|cri_loss: -0.015045166015625|unsuper_loss: 0.0 +average reward score: 5.8046875 +------------------------------------------------------------------------------------- +|E2E latency=2.51s |Gather latency=0.00s (0.00%) |Generate time=1.87s (74.40%) |Training time=0.54s (21.60%) |Others=0.10 (4.00%)|CurSamplesPerSec=12.74 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8135|ppo_ep: 1|act_loss: 0.0028095245361328125|cri_loss: 0.0014448165893554688|unsuper_loss: 0.0 +average reward score: 5.09765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.90%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8136|ppo_ep: 1|act_loss: 0.01763916015625|cri_loss: 0.00968170166015625|unsuper_loss: 0.0 +average reward score: 5.53515625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.43%) |Training time=0.48s (21.94%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8137|ppo_ep: 1|act_loss: -0.00473785400390625|cri_loss: -0.0020809173583984375|unsuper_loss: 0.0 +average reward score: 5.53125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.57%) |Training time=0.47s (21.85%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8138|ppo_ep: 1|act_loss: -0.018798828125|cri_loss: -0.00905609130859375|unsuper_loss: 0.0 +average reward score: 5.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.34%) |Training time=0.48s (22.02%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.74 |AvgSamplesPerSec=14.57 +[2023-04-14 13:45:29,625] [INFO] [logging.py:96:log_dist] [Rank 0] step=8140, skipped=100, lr=[1.729706662568434e-08, 1.729706662568434e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:45:29,643] [INFO] [timer.py:199:stop] epoch=0/micro_step=8140/global_step=8140, RunningAvgSamplesPerSec=105.63444801584032, CurrSamplesPerSec=99.91842909073584, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:45:29,738] [INFO] [logging.py:96:log_dist] [Rank 0] step=8140, skipped=142, lr=[1.2707606475514667e-08, 1.2707606475514667e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 8139|ppo_ep: 1|act_loss: -0.0002751350402832031|cri_loss: 0.00021839141845703125|unsuper_loss: 0.0 +average reward score: 5.34765625 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.16%) |Training time=0.48s (22.23%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.67 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8140|ppo_ep: 1|act_loss: 0.0008211135864257812|cri_loss: 0.0006456375122070312|unsuper_loss: 0.0 +average reward score: 6.015625 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.59s (72.93%) |Training time=0.49s (22.47%) |Others=0.10 (4.60%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8141|ppo_ep: 1|act_loss: -0.0084075927734375|cri_loss: -0.003559112548828125|unsuper_loss: 0.0 +average reward score: 5.5234375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.13%) |Training time=0.49s (22.37%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.65 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8142|ppo_ep: 1|act_loss: -0.009246826171875|cri_loss: -0.00395965576171875|unsuper_loss: 0.0 +average reward score: 5.5390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.60%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8143|ppo_ep: 1|act_loss: -0.019439697265625|cri_loss: -0.00914764404296875|unsuper_loss: 0.0 +average reward score: 5.92578125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.73%) |Training time=0.47s (21.72%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8144|ppo_ep: 1|act_loss: -0.02069091796875|cri_loss: -0.01020050048828125|unsuper_loss: 0.0 +average reward score: 6.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.69%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8145|ppo_ep: 1|act_loss: -0.01068115234375|cri_loss: -0.005199432373046875|unsuper_loss: 0.0 +average reward score: 5.25390625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.51%) |Training time=0.47s (21.94%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8146|ppo_ep: 1|act_loss: -0.0067138671875|cri_loss: -0.0033111572265625|unsuper_loss: 0.0 +average reward score: 5.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.35%) |Training time=0.48s (21.87%) |Others=0.10 (4.77%)|CurSamplesPerSec=14.71 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8147|ppo_ep: 1|act_loss: 0.0004782676696777344|cri_loss: 0.0006284713745117188|unsuper_loss: 0.0 +average reward score: 5.4765625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.86%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8148|ppo_ep: 1|act_loss: -0.023040771484375|cri_loss: -0.01136016845703125|unsuper_loss: 0.0 +average reward score: 5.5234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.47s (21.60%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.57 +[2023-04-14 13:45:51,466] [INFO] [logging.py:96:log_dist] [Rank 0] step=8150, skipped=100, lr=[1.5761180972532812e-08, 1.5761180972532812e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:45:51,484] [INFO] [timer.py:199:stop] epoch=0/micro_step=8150/global_step=8150, RunningAvgSamplesPerSec=105.63041590312197, CurrSamplesPerSec=103.20169652866737, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:45:51,577] [INFO] [logging.py:96:log_dist] [Rank 0] step=8150, skipped=142, lr=[1.1756819045251055e-08, 1.1756819045251055e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 8149|ppo_ep: 1|act_loss: -0.011749267578125|cri_loss: -0.005733489990234375|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.48%) |Training time=0.47s (20.33%) |Others=0.10 (4.19%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8150|ppo_ep: 1|act_loss: -0.01499176025390625|cri_loss: -0.007411956787109375|unsuper_loss: 0.0 +average reward score: 5.0546875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.83%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8151|ppo_ep: 1|act_loss: 0.0272369384765625|cri_loss: 0.0148773193359375|unsuper_loss: 0.0 +average reward score: 4.75 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.80%) |Training time=0.47s (21.65%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8152|ppo_ep: 1|act_loss: -0.00673675537109375|cri_loss: -0.0031871795654296875|unsuper_loss: 0.0 +average reward score: 5.1484375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.60%) |Training time=0.47s (21.77%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8153|ppo_ep: 1|act_loss: -0.01482391357421875|cri_loss: -0.00731658935546875|unsuper_loss: 0.0 +average reward score: 5.5078125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.47s (21.66%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8154|ppo_ep: 1|act_loss: -0.015289306640625|cri_loss: -0.00749969482421875|unsuper_loss: 0.0 +average reward score: 4.94921875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.63%) |Training time=0.47s (21.73%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57 +[2023-04-14 13:46:04,438] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, but hysteresis is 2. Reducing hysteresis to 1 +epoch: 0|step: 8155|ppo_ep: 1|act_loss: -0.00965118408203125|cri_loss: -0.004589080810546875|unsuper_loss: 0.0 +average reward score: 5.765625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.54%) |Training time=0.45s (20.90%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.96 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8156|ppo_ep: 1|act_loss: -0.015777587890625|cri_loss: -0.0076141357421875|unsuper_loss: 0.0 +average reward score: 5.296875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.58%) |Training time=0.47s (21.79%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8157|ppo_ep: 1|act_loss: 0.0056304931640625|cri_loss: 0.00444793701171875|unsuper_loss: 0.0 +average reward score: 5.64453125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.79%) |Training time=0.47s (21.67%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8158|ppo_ep: 1|act_loss: -0.0048980712890625|cri_loss: -0.0023345947265625|unsuper_loss: 0.0 +average reward score: 6.34375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.72%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.57 +[2023-04-14 13:46:13,063] [INFO] [logging.py:96:log_dist] [Rank 0] step=8160, skipped=101, lr=[1.4439831523126524e-08, 1.4439831523126524e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:46:13,082] [INFO] [timer.py:199:stop] epoch=0/micro_step=8160/global_step=8160, RunningAvgSamplesPerSec=105.62953923713883, CurrSamplesPerSec=105.34076847791707, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:46:13,174] [INFO] [logging.py:96:log_dist] [Rank 0] step=8160, skipped=142, lr=[1.0842913381488851e-08, 1.0842913381488851e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 8159|ppo_ep: 1|act_loss: 0.0166015625|cri_loss: 0.00849151611328125|unsuper_loss: 0.0 +average reward score: 5.328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.85%) |Training time=0.47s (21.62%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8160|ppo_ep: 1|act_loss: -0.001918792724609375|cri_loss: -0.000820159912109375|unsuper_loss: 0.0 +average reward score: 6.1953125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.74%) |Training time=0.47s (21.61%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8161|ppo_ep: 1|act_loss: -0.012054443359375|cri_loss: -0.005950927734375|unsuper_loss: 0.0 +average reward score: 5.27734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.83%) |Training time=0.47s (21.59%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.57 +[2023-04-14 13:46:19,563] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384, reducing to 8192 +epoch: 0|step: 8162|ppo_ep: 1|act_loss: -0.01099395751953125|cri_loss: -0.00539398193359375|unsuper_loss: 0.0 +average reward score: 5.16015625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.78%) |Training time=0.45s (20.61%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.75 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8163|ppo_ep: 1|act_loss: 0.00798797607421875|cri_loss: 0.00482177734375|unsuper_loss: 0.0 +average reward score: 5.25 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.96%) |Training time=0.47s (20.67%) |Others=0.10 (4.36%)|CurSamplesPerSec=14.18 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8164|ppo_ep: 1|act_loss: 0.0006723403930664062|cri_loss: 0.0004429817199707031|unsuper_loss: 0.0 +average reward score: 5.8203125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.62%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8165|ppo_ep: 1|act_loss: 0.01507568359375|cri_loss: 0.007755279541015625|unsuper_loss: 0.0 +average reward score: 6.05078125 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.57%) |Training time=0.47s (20.21%) |Others=0.10 (4.22%)|CurSamplesPerSec=13.73 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8166|ppo_ep: 1|act_loss: -0.0082244873046875|cri_loss: -0.003971099853515625|unsuper_loss: 0.0 +average reward score: 5.6328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.77%) |Training time=0.47s (21.58%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8167|ppo_ep: 1|act_loss: -0.002979278564453125|cri_loss: -0.0011272430419921875|unsuper_loss: 0.0 +average reward score: 5.25390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.15%) |Training time=0.46s (21.28%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8168|ppo_ep: 1|act_loss: -0.0156402587890625|cri_loss: -0.007701873779296875|unsuper_loss: 0.0 +average reward score: 4.68359375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.21%) |Training time=0.45s (21.11%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.57 +[2023-04-14 13:46:34,899] [INFO] [logging.py:96:log_dist] [Rank 0] step=8170, skipped=102, lr=[1.3176238413572798e-08, 1.3176238413572798e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:46:34,917] [INFO] [timer.py:199:stop] epoch=0/micro_step=8170/global_step=8170, RunningAvgSamplesPerSec=105.63095062148561, CurrSamplesPerSec=107.79319611192628, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:46:35,010] [INFO] [logging.py:96:log_dist] [Rank 0] step=8170, skipped=142, lr=[9.965903030514923e-09, 9.965903030514923e-09], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 8169|ppo_ep: 1|act_loss: -0.03314208984375|cri_loss: -0.0162353515625|unsuper_loss: 0.0 +average reward score: 5.2265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.10%) |Training time=0.46s (21.35%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8170|ppo_ep: 1|act_loss: -0.05389404296875|cri_loss: -0.0263214111328125|unsuper_loss: 0.0 +average reward score: 5.55078125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.31%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8171|ppo_ep: 1|act_loss: -0.01548004150390625|cri_loss: -0.0076446533203125|unsuper_loss: 0.0 +average reward score: 5.86328125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.12%) |Training time=0.46s (21.19%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8172|ppo_ep: 1|act_loss: -0.0423583984375|cri_loss: -0.02008056640625|unsuper_loss: 0.0 +average reward score: 5.37109375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.60%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8173|ppo_ep: 1|act_loss: -0.0012054443359375|cri_loss: 0.003978729248046875|unsuper_loss: 0.0 +average reward score: 5.578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.08%) |Training time=0.46s (21.35%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8174|ppo_ep: 1|act_loss: -0.005931854248046875|cri_loss: -0.0025997161865234375|unsuper_loss: 0.0 +average reward score: 5.34375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.88%) |Training time=0.46s (21.46%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8175|ppo_ep: 1|act_loss: -0.0185089111328125|cri_loss: -0.00872039794921875|unsuper_loss: 0.0 +average reward score: 5.13671875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.72%) |Training time=0.47s (21.66%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8176|ppo_ep: 1|act_loss: -0.0109405517578125|cri_loss: -0.005306243896484375|unsuper_loss: 0.0 +average reward score: 5.3203125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.12%) |Training time=0.46s (21.18%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8177|ppo_ep: 1|act_loss: -0.01474761962890625|cri_loss: -0.0072021484375|unsuper_loss: 0.0 +average reward score: 5.04296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.31%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8178|ppo_ep: 1|act_loss: 0.027130126953125|cri_loss: 0.0140533447265625|unsuper_loss: 0.0 +average reward score: 5.859375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.87%) |Training time=0.46s (21.49%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.57 +[2023-04-14 13:46:56,452] [INFO] [logging.py:96:log_dist] [Rank 0] step=8180, skipped=102, lr=[1.1840003415331683e-08, 1.1840003415331683e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:46:56,470] [INFO] [timer.py:199:stop] epoch=0/micro_step=8180/global_step=8180, RunningAvgSamplesPerSec=105.63260714351426, CurrSamplesPerSec=106.2880881995418, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:46:56,607] [INFO] [logging.py:96:log_dist] [Rank 0] step=8180, skipped=142, lr=[9.12580099173832e-09, 9.12580099173832e-09], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 8179|ppo_ep: 1|act_loss: -0.0120086669921875|cri_loss: -0.0056304931640625|unsuper_loss: 0.0 +average reward score: 5.91015625 +------------------------------------------------------------------------------------- +|E2E latency=2.20s |Gather latency=0.00s (0.00%) |Generate time=1.60s (72.47%) |Training time=0.47s (21.36%) |Others=0.14 (6.17%)|CurSamplesPerSec=14.53 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8180|ppo_ep: 1|act_loss: -0.042755126953125|cri_loss: -0.01361846923828125|unsuper_loss: 0.0 +average reward score: 5.2578125 +------------------------------------------------------------------------------------- +|E2E latency=2.25s |Gather latency=0.00s (0.00%) |Generate time=1.68s (74.74%) |Training time=0.46s (20.48%) |Others=0.11 (4.79%)|CurSamplesPerSec=14.19 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8181|ppo_ep: 1|act_loss: -0.00039386749267578125|cri_loss: 0.00017213821411132812|unsuper_loss: 0.0 +average reward score: 5.00390625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.02%) |Training time=0.46s (21.35%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8182|ppo_ep: 1|act_loss: -0.012725830078125|cri_loss: -0.00620269775390625|unsuper_loss: 0.0 +average reward score: 5.265625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.70%) |Training time=0.47s (21.63%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8183|ppo_ep: 1|act_loss: 0.03289794921875|cri_loss: 0.0170745849609375|unsuper_loss: 0.0 +average reward score: 5.02734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.76%) |Training time=0.47s (21.54%) |Others=0.10 (4.69%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.57 +[2023-04-14 13:47:07,378] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192, reducing to 4096 +epoch: 0|step: 8184|ppo_ep: 1|act_loss: 0.01544189453125|cri_loss: 0.00815582275390625|unsuper_loss: 0.0 +average reward score: 4.6796875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.25%) |Training time=0.45s (21.10%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.95 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8185|ppo_ep: 1|act_loss: -0.02349853515625|cri_loss: -0.0108642578125|unsuper_loss: 0.0 +average reward score: 5.234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.70%) |Training time=0.46s (21.51%) |Others=0.10 (4.79%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8186|ppo_ep: 1|act_loss: -0.00730133056640625|cri_loss: -0.0034046173095703125|unsuper_loss: 0.0 +average reward score: 4.81640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.09%) |Training time=0.46s (21.23%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8187|ppo_ep: 1|act_loss: -0.01087188720703125|cri_loss: -0.00518035888671875|unsuper_loss: 0.0 +average reward score: 5.4453125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.24%) |Training time=0.45s (21.19%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8188|ppo_ep: 1|act_loss: 0.011016845703125|cri_loss: 0.005649566650390625|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.19%) |Training time=0.45s (21.15%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.57 +[2023-04-14 13:47:18,096] [INFO] [logging.py:96:log_dist] [Rank 0] step=8190, skipped=103, lr=[1.0698389333422932e-08, 1.0698389333422932e-08], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:47:18,114] [INFO] [timer.py:199:stop] epoch=0/micro_step=8190/global_step=8190, RunningAvgSamplesPerSec=105.63530852231541, CurrSamplesPerSec=109.11442656642257, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:47:18,206] [INFO] [logging.py:96:log_dist] [Rank 0] step=8190, skipped=142, lr=[8.322619717497648e-09, 8.322619717497648e-09], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 8189|ppo_ep: 1|act_loss: -0.01428985595703125|cri_loss: -0.00637054443359375|unsuper_loss: 0.0 +average reward score: 5.51953125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.21%) |Training time=0.46s (21.24%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8190|ppo_ep: 1|act_loss: -0.00807952880859375|cri_loss: -0.0038166046142578125|unsuper_loss: 0.0 +average reward score: 5.9140625 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.00%) |Training time=0.47s (21.42%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.72 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8191|ppo_ep: 1|act_loss: -0.0005846023559570312|cri_loss: -0.00010395050048828125|unsuper_loss: 0.0 +average reward score: 4.96484375 +------------------------------------------------------------------------------------- +|E2E latency=2.18s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.32%) |Training time=0.48s (22.09%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.68 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8192|ppo_ep: 1|act_loss: -0.023468017578125|cri_loss: -0.01143646240234375|unsuper_loss: 0.0 +average reward score: 5.546875 +------------------------------------------------------------------------------------- +|E2E latency=2.21s |Gather latency=0.00s (0.00%) |Generate time=1.62s (73.45%) |Training time=0.49s (22.02%) |Others=0.10 (4.53%)|CurSamplesPerSec=14.47 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8193|ppo_ep: 1|act_loss: 0.004619598388671875|cri_loss: 0.00238800048828125|unsuper_loss: 0.0 +average reward score: 6.140625 +------------------------------------------------------------------------------------- +|E2E latency=2.27s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.65%) |Training time=0.48s (20.93%) |Others=0.10 (4.41%)|CurSamplesPerSec=14.10 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8194|ppo_ep: 1|act_loss: -0.001743316650390625|cri_loss: -0.0005774497985839844|unsuper_loss: 0.0 +average reward score: 5.59765625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.18%) |Training time=0.45s (21.15%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8195|ppo_ep: 1|act_loss: -0.0200042724609375|cri_loss: -0.008514404296875|unsuper_loss: 0.0 +average reward score: 5.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.33s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.42%) |Training time=0.47s (20.11%) |Others=0.10 (4.47%)|CurSamplesPerSec=13.74 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8196|ppo_ep: 1|act_loss: -0.0178985595703125|cri_loss: -0.0088043212890625|unsuper_loss: 0.0 +average reward score: 5.734375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.06%) |Training time=0.46s (21.29%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8197|ppo_ep: 1|act_loss: 0.02801513671875|cri_loss: 0.0142059326171875|unsuper_loss: 0.0 +average reward score: 6.1875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.20%) |Training time=0.45s (21.14%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8198|ppo_ep: 1|act_loss: -0.0113067626953125|cri_loss: -0.004970550537109375|unsuper_loss: 0.0 +average reward score: 5.703125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.06%) |Training time=0.46s (21.28%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.57 +[2023-04-14 13:47:40,025] [INFO] [logging.py:96:log_dist] [Rank 0] step=8200, skipped=103, lr=[9.497721415816521e-09, 9.497721415816521e-09], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:47:40,044] [INFO] [timer.py:199:stop] epoch=0/micro_step=8200/global_step=8200, RunningAvgSamplesPerSec=105.63448505052008, CurrSamplesPerSec=103.44995325327169, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:47:40,136] [INFO] [logging.py:96:log_dist] [Rank 0] step=8200, skipped=142, lr=[7.556371112877325e-09, 7.556371112877325e-09], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 8199|ppo_ep: 1|act_loss: 0.011444091796875|cri_loss: 0.006195068359375|unsuper_loss: 0.0 +average reward score: 6.37109375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.72%) |Training time=0.47s (21.76%) |Others=0.10 (4.51%)|CurSamplesPerSec=14.76 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8200|ppo_ep: 1|act_loss: -0.0039825439453125|cri_loss: -0.0019474029541015625|unsuper_loss: 0.0 +average reward score: 5.55078125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.09%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8201|ppo_ep: 1|act_loss: 0.004802703857421875|cri_loss: 0.002658843994140625|unsuper_loss: 0.0 +average reward score: 5.48046875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.35%) |Training time=0.45s (20.99%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8202|ppo_ep: 1|act_loss: -0.0181884765625|cri_loss: -0.00818634033203125|unsuper_loss: 0.0 +average reward score: 5.43359375 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.46%) |Training time=0.45s (20.86%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.93 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8203|ppo_ep: 1|act_loss: 0.010589599609375|cri_loss: 0.0053863525390625|unsuper_loss: 0.0 +average reward score: 5.34765625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.35%) |Training time=0.45s (21.09%) |Others=0.10 (4.57%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8204|ppo_ep: 1|act_loss: 0.033782958984375|cri_loss: 0.0172882080078125|unsuper_loss: 0.0 +average reward score: 5.96484375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.17%) |Training time=0.45s (21.15%) |Others=0.10 (4.68%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8205|ppo_ep: 1|act_loss: 0.019134521484375|cri_loss: 0.01016998291015625|unsuper_loss: 0.0 +average reward score: 5.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.56%) |Training time=0.44s (20.74%) |Others=0.10 (4.70%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8206|ppo_ep: 1|act_loss: -0.020904541015625|cri_loss: -0.00949859619140625|unsuper_loss: 0.0 +average reward score: 5.4921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.96%) |Training time=0.46s (21.40%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8207|ppo_ep: 1|act_loss: -0.008209228515625|cri_loss: -0.00391387939453125|unsuper_loss: 0.0 +average reward score: 5.87890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.02%) |Training time=0.46s (21.37%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8208|ppo_ep: 1|act_loss: -0.0041656494140625|cri_loss: -0.001949310302734375|unsuper_loss: 0.0 +average reward score: 5.11328125 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.32%) |Training time=0.45s (21.01%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.57 +[2023-04-14 13:48:01,494] [INFO] [logging.py:96:log_dist] [Rank 0] step=8210, skipped=103, lr=[8.368430858746151e-09, 8.368430858746151e-09], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:48:01,512] [INFO] [timer.py:199:stop] epoch=0/micro_step=8210/global_step=8210, RunningAvgSamplesPerSec=105.63972264176873, CurrSamplesPerSec=108.21060710870746, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:48:01,604] [INFO] [logging.py:96:log_dist] [Rank 0] step=8210, skipped=142, lr=[6.827066535529947e-09, 6.827066535529947e-09], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 8209|ppo_ep: 1|act_loss: 0.01201629638671875|cri_loss: 0.006168365478515625|unsuper_loss: 0.0 +average reward score: 5.56640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.04%) |Training time=0.46s (21.34%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8210|ppo_ep: 1|act_loss: -0.012237548828125|cri_loss: -0.00591278076171875|unsuper_loss: 0.0 +average reward score: 5.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.94%) |Training time=0.46s (21.41%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8211|ppo_ep: 1|act_loss: -0.002490997314453125|cri_loss: -0.0008959770202636719|unsuper_loss: 0.0 +average reward score: 5.78125 +------------------------------------------------------------------------------------- +|E2E latency=2.31s |Gather latency=0.00s (0.00%) |Generate time=1.75s (75.81%) |Training time=0.46s (19.86%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.83 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8212|ppo_ep: 1|act_loss: -0.001079559326171875|cri_loss: -0.0004200935363769531|unsuper_loss: 0.0 +average reward score: 6.21875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.30%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8213|ppo_ep: 1|act_loss: -0.004608154296875|cri_loss: -0.002239227294921875|unsuper_loss: 0.0 +average reward score: 4.78515625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.03%) |Training time=0.46s (21.38%) |Others=0.10 (4.59%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8214|ppo_ep: 1|act_loss: -0.0010585784912109375|cri_loss: -0.0004050731658935547|unsuper_loss: 0.0 +average reward score: 5.22265625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.98%) |Training time=0.46s (21.30%) |Others=0.10 (4.71%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8215|ppo_ep: 1|act_loss: 0.00254058837890625|cri_loss: 0.0013818740844726562|unsuper_loss: 0.0 +average reward score: 5.56640625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.15%) |Training time=0.45s (21.18%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8216|ppo_ep: 1|act_loss: 0.033416748046875|cri_loss: 0.018310546875|unsuper_loss: 0.0 +average reward score: 5.42578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.05%) |Training time=0.46s (21.32%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8217|ppo_ep: 1|act_loss: 0.0007562637329101562|cri_loss: 0.0010824203491210938|unsuper_loss: 0.0 +average reward score: 5.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.14s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.22%) |Training time=0.45s (21.11%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8218|ppo_ep: 1|act_loss: -0.0105743408203125|cri_loss: -0.004993438720703125|unsuper_loss: 0.0 +average reward score: 5.1796875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.33%) |Training time=0.46s (21.05%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.57 +[2023-04-14 13:48:23,164] [INFO] [logging.py:96:log_dist] [Rank 0] step=8220, skipped=103, lr=[7.3105344010205834e-09, 7.3105344010205834e-09], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:48:23,182] [INFO] [timer.py:199:stop] epoch=0/micro_step=8220/global_step=8220, RunningAvgSamplesPerSec=105.64292040215666, CurrSamplesPerSec=107.84100454045472, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:48:23,275] [INFO] [logging.py:96:log_dist] [Rank 0] step=8220, skipped=142, lr=[6.134716795508644e-09, 6.134716795508644e-09], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 8219|ppo_ep: 1|act_loss: 0.07666015625|cri_loss: 0.0426025390625|unsuper_loss: 0.0 +average reward score: 5.8671875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.95%) |Training time=0.46s (21.43%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8220|ppo_ep: 1|act_loss: -0.00444793701171875|cri_loss: -0.002170562744140625|unsuper_loss: 0.0 +average reward score: 5.12890625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.00%) |Training time=0.46s (21.36%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8221|ppo_ep: 1|act_loss: 0.0042572021484375|cri_loss: 0.002227783203125|unsuper_loss: 0.0 +average reward score: 5.77734375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.44%) |Training time=0.45s (21.00%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8222|ppo_ep: 1|act_loss: 0.030242919921875|cri_loss: 0.0159759521484375|unsuper_loss: 0.0 +average reward score: 5.5 +------------------------------------------------------------------------------------- +|E2E latency=2.19s |Gather latency=0.00s (0.00%) |Generate time=1.62s (74.21%) |Training time=0.46s (21.23%) |Others=0.10 (4.56%)|CurSamplesPerSec=14.64 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8223|ppo_ep: 1|act_loss: -0.0179443359375|cri_loss: -0.0082550048828125|unsuper_loss: 0.0 +average reward score: 5.62109375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.15%) |Training time=0.46s (21.27%) |Others=0.10 (4.58%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8224|ppo_ep: 1|act_loss: -0.004680633544921875|cri_loss: -0.0015468597412109375|unsuper_loss: 0.0 +average reward score: 5.625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.26%) |Training time=0.45s (21.07%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.89 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8225|ppo_ep: 1|act_loss: 0.0092010498046875|cri_loss: 0.004795074462890625|unsuper_loss: 0.0 +average reward score: 5.84765625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.11%) |Training time=0.46s (21.23%) |Others=0.10 (4.66%)|CurSamplesPerSec=14.90 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8226|ppo_ep: 1|act_loss: -0.004886627197265625|cri_loss: -0.002384185791015625|unsuper_loss: 0.0 +average reward score: 5.0234375 +------------------------------------------------------------------------------------- +|E2E latency=2.32s |Gather latency=0.00s (0.00%) |Generate time=1.76s (75.97%) |Training time=0.46s (19.70%) |Others=0.10 (4.33%)|CurSamplesPerSec=13.79 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8227|ppo_ep: 1|act_loss: -0.0252532958984375|cri_loss: -0.01244354248046875|unsuper_loss: 0.0 +average reward score: 4.96875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.26%) |Training time=0.45s (21.07%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.91 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8228|ppo_ep: 1|act_loss: 0.0002658367156982422|cri_loss: 0.0002161264419555664|unsuper_loss: 0.0 +average reward score: 6.0234375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.23%) |Training time=0.45s (21.12%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.88 |AvgSamplesPerSec=14.57 +[2023-04-14 13:48:44,872] [INFO] [logging.py:96:log_dist] [Rank 0] step=8230, skipped=103, lr=[6.324047723218296e-09, 6.324047723218296e-09], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:48:44,890] [INFO] [timer.py:199:stop] epoch=0/micro_step=8230/global_step=8230, RunningAvgSamplesPerSec=105.64653553431586, CurrSamplesPerSec=106.69110328035784, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:48:44,974] [INFO] [loss_scaler.py:188:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, but hysteresis is 2. Reducing hysteresis to 1 +[2023-04-14 13:48:44,975] [INFO] [logging.py:96:log_dist] [Rank 0] step=8230, skipped=143, lr=[5.543206908509291e-09, 5.543206908509291e-09], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 8229|ppo_ep: 1|act_loss: -0.0010890960693359375|cri_loss: -0.00036597251892089844|unsuper_loss: 0.0 +average reward score: 5.44921875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.33%) |Training time=0.46s (21.58%) |Others=0.09 (4.09%)|CurSamplesPerSec=14.92 |AvgSamplesPerSec=14.57 +[2023-04-14 13:48:47,137] [INFO] [loss_scaler.py:181:update_scale] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768 +epoch: 0|step: 8230|ppo_ep: 1|act_loss: 0.00010842084884643555|cri_loss: 0.00010132789611816406|unsuper_loss: 0.0 +average reward score: 4.859375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.75%) |Training time=0.48s (22.10%) |Others=0.09 (4.15%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8231|ppo_ep: 1|act_loss: 0.01229095458984375|cri_loss: 0.006389617919921875|unsuper_loss: 0.0 +average reward score: 5.546875 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.76%) |Training time=0.47s (21.62%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.77 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8232|ppo_ep: 1|act_loss: -0.05596923828125|cri_loss: -0.0206756591796875|unsuper_loss: 0.0 +average reward score: 5.4140625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.80%) |Training time=0.46s (21.53%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.84 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8233|ppo_ep: 1|act_loss: 0.0100555419921875|cri_loss: 0.005420684814453125|unsuper_loss: 0.0 +average reward score: 5.0234375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.73%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.80 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8234|ppo_ep: 1|act_loss: 0.00278472900390625|cri_loss: 0.001552581787109375|unsuper_loss: 0.0 +average reward score: 5.42578125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.37%) |Training time=0.45s (20.96%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8235|ppo_ep: 1|act_loss: 0.005443572998046875|cri_loss: 0.00420379638671875|unsuper_loss: 0.0 +average reward score: 5.2890625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.68%) |Training time=0.47s (21.77%) |Others=0.10 (4.55%)|CurSamplesPerSec=14.79 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8236|ppo_ep: 1|act_loss: 0.0144195556640625|cri_loss: 0.0084686279296875|unsuper_loss: 0.0 +average reward score: 5.51171875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.46s (21.54%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8237|ppo_ep: 1|act_loss: 0.01497650146484375|cri_loss: 0.00762939453125|unsuper_loss: 0.0 +average reward score: 5.93359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.46s (21.54%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8238|ppo_ep: 1|act_loss: 0.02374267578125|cri_loss: 0.0127105712890625|unsuper_loss: 0.0 +average reward score: 6.1640625 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.66%) |Training time=0.47s (21.71%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.57 +[2023-04-14 13:49:06,455] [INFO] [logging.py:96:log_dist] [Rank 0] step=8240, skipped=103, lr=[5.408985447451789e-09, 5.408985447451789e-09], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:49:06,473] [INFO] [timer.py:199:stop] epoch=0/micro_step=8240/global_step=8240, RunningAvgSamplesPerSec=105.64600782971256, CurrSamplesPerSec=105.71246698890796, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:49:06,566] [INFO] [logging.py:96:log_dist] [Rank 0] step=8240, skipped=144, lr=[4.981645860989148e-09, 4.981645860989148e-09], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 8239|ppo_ep: 1|act_loss: -0.0228118896484375|cri_loss: -0.01100921630859375|unsuper_loss: 0.0 +average reward score: 4.86328125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.81%) |Training time=0.47s (21.56%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8240|ppo_ep: 1|act_loss: 0.00240325927734375|cri_loss: 0.0013647079467773438|unsuper_loss: 0.0 +average reward score: 5.609375 +------------------------------------------------------------------------------------- +|E2E latency=2.24s |Gather latency=0.00s (0.00%) |Generate time=1.59s (71.19%) |Training time=0.47s (21.06%) |Others=0.17 (7.75%)|CurSamplesPerSec=14.30 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8241|ppo_ep: 1|act_loss: -0.0002484321594238281|cri_loss: 6.747245788574219e-05|unsuper_loss: 0.0 +average reward score: 5.80859375 +------------------------------------------------------------------------------------- +|E2E latency=2.23s |Gather latency=0.00s (0.00%) |Generate time=1.67s (74.66%) |Training time=0.47s (20.86%) |Others=0.10 (4.48%)|CurSamplesPerSec=14.33 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8242|ppo_ep: 1|act_loss: 0.01103973388671875|cri_loss: 0.006000518798828125|unsuper_loss: 0.0 +average reward score: 4.703125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.79%) |Training time=0.47s (21.57%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8243|ppo_ep: 1|act_loss: 0.030548095703125|cri_loss: 0.0157928466796875|unsuper_loss: 0.0 +average reward score: 6.09375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.79%) |Training time=0.47s (21.59%) |Others=0.10 (4.63%)|CurSamplesPerSec=14.82 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8244|ppo_ep: 1|act_loss: -0.0150299072265625|cri_loss: -0.00738525390625|unsuper_loss: 0.0 +average reward score: 5.46875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.71%) |Training time=0.47s (21.64%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8245|ppo_ep: 1|act_loss: -0.021026611328125|cri_loss: -0.01030731201171875|unsuper_loss: 0.0 +average reward score: 5.9375 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.54%) |Training time=0.47s (21.84%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8246|ppo_ep: 1|act_loss: 0.00293731689453125|cri_loss: 0.0015716552734375|unsuper_loss: 0.0 +average reward score: 4.8984375 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.98%) |Training time=0.46s (21.38%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8247|ppo_ep: 1|act_loss: 0.0004968643188476562|cri_loss: 0.0003986358642578125|unsuper_loss: 0.0 +average reward score: 5.76171875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (73.84%) |Training time=0.47s (21.62%) |Others=0.10 (4.54%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8248|ppo_ep: 1|act_loss: 0.027130126953125|cri_loss: 0.01389312744140625|unsuper_loss: 0.0 +average reward score: 4.73828125 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.65%) |Training time=0.47s (21.74%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.81 |AvgSamplesPerSec=14.57 +[2023-04-14 13:49:28,193] [INFO] [logging.py:96:log_dist] [Rank 0] step=8250, skipped=103, lr=[4.5653611371511645e-09, 4.5653611371511645e-09], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:49:28,212] [INFO] [timer.py:199:stop] epoch=0/micro_step=8250/global_step=8250, RunningAvgSamplesPerSec=105.64539594557534, CurrSamplesPerSec=105.90983071658027, MemAllocated=9.61GB, MaxMemAllocated=19.38GB +[2023-04-14 13:49:28,304] [INFO] [logging.py:96:log_dist] [Rank 0] step=8250, skipped=144, lr=[4.3928225118547444e-09, 4.3928225118547444e-09], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 8249|ppo_ep: 1|act_loss: 0.00682830810546875|cri_loss: 0.0035343170166015625|unsuper_loss: 0.0 +average reward score: 5.765625 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.82%) |Training time=0.46s (21.56%) |Others=0.10 (4.62%)|CurSamplesPerSec=14.85 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8250|ppo_ep: 1|act_loss: 0.022125244140625|cri_loss: 0.01197052001953125|unsuper_loss: 0.0 +average reward score: 4.421875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.55%) |Training time=0.47s (21.54%) |Others=0.11 (4.91%)|CurSamplesPerSec=14.78 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8251|ppo_ep: 1|act_loss: 0.021697998046875|cri_loss: 0.01129150390625|unsuper_loss: 0.0 +average reward score: 5.25 +------------------------------------------------------------------------------------- +|E2E latency=2.17s |Gather latency=0.00s (0.00%) |Generate time=1.61s (74.07%) |Training time=0.46s (21.32%) |Others=0.10 (4.61%)|CurSamplesPerSec=14.73 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8252|ppo_ep: 1|act_loss: 0.01220703125|cri_loss: 0.00616455078125|unsuper_loss: 0.0 +average reward score: 5.37890625 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.69s (74.86%) |Training time=0.47s (20.71%) |Others=0.10 (4.43%)|CurSamplesPerSec=14.15 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8253|ppo_ep: 1|act_loss: -0.006011962890625|cri_loss: -0.00284576416015625|unsuper_loss: 0.0 +average reward score: 5.3046875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.16%) |Training time=0.46s (21.17%) |Others=0.10 (4.67%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8254|ppo_ep: 1|act_loss: 0.0171051025390625|cri_loss: 0.0088653564453125|unsuper_loss: 0.0 +average reward score: 5.54296875 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.99%) |Training time=0.46s (21.37%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.87 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8255|ppo_ep: 1|act_loss: -0.02935791015625|cri_loss: -0.01371002197265625|unsuper_loss: 0.0 +average reward score: 5.34375 +------------------------------------------------------------------------------------- +|E2E latency=2.26s |Gather latency=0.00s (0.00%) |Generate time=1.64s (72.87%) |Training time=0.51s (22.66%) |Others=0.10 (4.47%)|CurSamplesPerSec=14.18 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8256|ppo_ep: 1|act_loss: -0.0007495880126953125|cri_loss: 0.0006227493286132812|unsuper_loss: 0.0 +average reward score: 5.8125 +------------------------------------------------------------------------------------- +|E2E latency=2.15s |Gather latency=0.00s (0.00%) |Generate time=1.59s (74.05%) |Training time=0.46s (21.30%) |Others=0.10 (4.65%)|CurSamplesPerSec=14.86 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8257|ppo_ep: 1|act_loss: 0.00316619873046875|cri_loss: 0.0016927719116210938|unsuper_loss: 0.0 +average reward score: 5.66796875 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.60s (74.01%) |Training time=0.46s (21.35%) |Others=0.10 (4.64%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.57 +epoch: 0|step: 8258|ppo_ep: 1|act_loss: 0.00679779052734375|cri_loss: 0.0038509368896484375|unsuper_loss: 0.0 +average reward score: 5.43359375 +------------------------------------------------------------------------------------- +|E2E latency=2.16s |Gather latency=0.00s (0.00%) |Generate time=1.59s (73.78%) |Training time=0.46s (21.29%) |Others=0.11 (4.93%)|CurSamplesPerSec=14.83 |AvgSamplesPerSec=14.57 +[2023-04-14 13:49:49,728] [INFO] [logging.py:96:log_dist] [Rank 0] step=8260, skipped=103, lr=[3.793187296863779e-09, 3.793187296863779e-09], mom=[(0.9, 0.95), (0.9, 0.95)] +[2023-04-14 13:49:49,747] [INFO] [timer.py:199:stop] epoch=0/micro_step=8260/global_step=8260, RunningAvgSamplesPerSec=105.65211622011834, CurrSamplesPerSec=163.9855291500147, MemAllocated=9.46GB, MaxMemAllocated=19.38GB +[2023-04-14 13:49:50,315] [INFO] [logging.py:96:log_dist] [Rank 0] step=8260, skipped=144, lr=[3.840990081415141e-09, 3.840990081415141e-09], mom=[(0.9, 0.95), (0.9, 0.95)] +epoch: 0|step: 8259|ppo_ep: 1|act_loss: 0.0166015625|cri_loss: 0.00836181640625|unsuper_loss: 0.0 +average reward score: 6.16015625 +------------------------------------------------------------------------------------- +saving model ... +saving model ...saving model ...saving model ... + + +saving model ...saving model ... +saving model ...saving model ... + + +[2023-04-14 13:49:56,880] [INFO] [launch.py:460:main] Process 412204 exits successfully. +[2023-04-14 13:49:57,881] [INFO] [launch.py:460:main] Process 412199 exits successfully. +[2023-04-14 13:49:57,881] [INFO] [launch.py:460:main] Process 412200 exits successfully. +[2023-04-14 13:49:57,881] [INFO] [launch.py:460:main] Process 412205 exits successfully. +[2023-04-14 13:49:57,881] [INFO] [launch.py:460:main] Process 412201 exits successfully. +[2023-04-14 13:49:58,883] [INFO] [launch.py:460:main] Process 412202 exits successfully. +[2023-04-14 13:49:58,883] [INFO] [launch.py:460:main] Process 412203 exits successfully. +[2023-04-14 13:50:01,886] [INFO] [launch.py:460:main] Process 412198 exits successfully.