diff --git "a/training.log" "b/training.log" --- "a/training.log" +++ "b/training.log" @@ -1,126 +1,126 @@ -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version! +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version! warnings.warn( [2023-04-14 08:32:38,890] [WARNING] [runner.py:190:fetch_hostfile] Unable to find hostfile, will proceed with training with local resources only. -[2023-04-14 08:32:39,797] [INFO] [runner.py:540:main] cmd = /home/minutiae/.conda/envs/py39/bin/python -u -m deepspeed.launcher.launch --world_info=eyJsb2NhbGhvc3QiOiBbMCwgMSwgMiwgMywgNCwgNSwgNiwgN119 --master_addr=127.0.0.1 --master_port=12346 --enable_each_rank_log=None main.py --data_path Dahoas/rm-static Dahoas/full-hh-rlhf Dahoas/synthetic-instruct-gptj-pairwise yitingxie/rlhf-reward-datasets openai/webgpt_comparisons stanfordnlp/SHP --data_split 2,4,4 --actor_model_name_or_path /lus/grand/projects/BNN-Scale/chatgpt/hf_runs/DeepSpeedExamples/applications/DeepSpeed-Chat/output/actor-models/1.3b --critic_model_name_or_path /lus/grand/projects/BNN-Scale/chatgpt/hf_runs/DeepSpeedExamples/applications/DeepSpeed-Chat/output/reward-models/350m --num_padding_at_beginning 1 --per_device_train_batch_size 4 --per_device_mini_train_batch_size 4 --generation_batch_numbers 1 --ppo_epochs 1 --max_answer_seq_len 256 --max_prompt_seq_len 256 --actor_learning_rate 9.65e-6 --critic_learning_rate 5e-6 --actor_weight_decay 0.1 --critic_weight_decay 0.1 --num_train_epochs 1 --lr_scheduler_type cosine --gradient_accumulation_steps 1 --num_warmup_steps 100 --deepspeed --seed 1234 --enable_hybrid_engine --actor_zero_stage 2 --critic_zero_stage 2 --output_dir /lus/grand/projects/BNN-Scale/chatgpt/hf_runs/DeepSpeedExamples/applications/DeepSpeed-Chat/output/step3-models/1.3b -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version! +[2023-04-14 08:32:39,797] [INFO] [runner.py:540:main] cmd = /home/AdamG012/.conda/envs/py39/bin/python -u -m deepspeed.launcher.launch --world_info=eyJsb2NhbGhvc3QiOiBbMCwgMSwgMiwgMywgNCwgNSwgNiwgN119 --master_addr=127.0.0.1 --master_port=12346 --enable_each_rank_log=None main.py --data_path Dahoas/rm-static Dahoas/full-hh-rlhf Dahoas/synthetic-instruct-gptj-pairwise yitingxie/rlhf-reward-datasets openai/webgpt_comparisons stanfordnlp/SHP --data_split 2,4,4 --actor_model_name_or_path /chatgpt/hf_runs/DeepSpeedExamples/applications/DeepSpeed-Chat/output/actor-models/1.3b --critic_model_name_or_path /chatgpt/hf_runs/DeepSpeedExamples/applications/DeepSpeed-Chat/output/reward-models/350m --num_padding_at_beginning 1 --per_device_train_batch_size 4 --per_device_mini_train_batch_size 4 --generation_batch_numbers 1 --ppo_epochs 1 --max_answer_seq_len 256 --max_prompt_seq_len 256 --actor_learning_rate 9.65e-6 --critic_learning_rate 5e-6 --actor_weight_decay 0.1 --critic_weight_decay 0.1 --num_train_epochs 1 --lr_scheduler_type cosine --gradient_accumulation_steps 1 --num_warmup_steps 100 --deepspeed --seed 1234 --enable_hybrid_engine --actor_zero_stage 2 --critic_zero_stage 2 --output_dir /chatgpt/hf_runs/DeepSpeedExamples/applications/DeepSpeed-Chat/output/step3-models/1.3b +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version! warnings.warn( [2023-04-14 08:32:50,431] [INFO] [launch.py:229:main] WORLD INFO DICT: {'localhost': [0, 1, 2, 3, 4, 5, 6, 7]} [2023-04-14 08:32:50,884] [INFO] [launch.py:235:main] nnodes=1, num_local_procs=8, node_rank=0 [2023-04-14 08:32:50,884] [INFO] [launch.py:246:main] global_rank_mapping=defaultdict(, {'localhost': [0, 1, 2, 3, 4, 5, 6, 7]}) [2023-04-14 08:32:50,884] [INFO] [launch.py:247:main] dist_world_size=8 [2023-04-14 08:32:50,884] [INFO] [launch.py:249:main] Setting CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version! +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version! warnings.warn( -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version! +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version! warnings.warn( -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version! +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version! warnings.warn( -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version! +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version! warnings.warn( -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version! +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version! warnings.warn( -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version! +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version! warnings.warn( -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version! +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version! warnings.warn( -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version! +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version! warnings.warn( [2023-04-14 08:35:14,863] [INFO] [comm.py:586:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl -Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec) - 0%| | 0/2 [00:00 [2023-04-14 08:44:13,193] [INFO] [logging.py:96:log_dist] [Rank 0] Creating torch.float16 ZeRO stage 2 optimizer @@ -283,7 +283,7 @@ To disable this warning, you can either: [2023-04-14 08:44:13,704] [INFO] [stage_1_and_2.py:134:__init__] Allgather bucket size 500,000,000 [2023-04-14 08:44:13,704] [INFO] [stage_1_and_2.py:135:__init__] CPU Offload: False [2023-04-14 08:44:13,704] [INFO] [stage_1_and_2.py:136:__init__] Round robin gradient partitioning: False -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible @@ -292,7 +292,7 @@ huggingface/tokenizers: The current process just got forked, after parallelism h To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) -Emitting ninja build file /home/minutiae/.cache/torch_extensions/py39_cu113/utils/build.ninja... +Emitting ninja build file /home/AdamG012/.cache/torch_extensions/py39_cu113/utils/build.ninja... Building extension module utils... Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... @@ -324,30 +324,30 @@ Rank: 3 partition count [8, 8] and sizes[(164401920, False), (67840, False)] Rank: 2 partition count [8, 8] and sizes[(164401920, False), (67840, False)] Rank: 7 partition count [8, 8] and sizes[(164401920, False), (67840, False)] Rank: 5 partition count [8, 8] and sizes[(164401920, False), (67840, False)] -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module utils, skipping build step... Loading extension module utils... -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... Time to load utils op: 0.0018334388732910156 seconds No modifications detected for re-loaded extension module utils, skipping build step... Loading extension module utils... -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... Time to load utils op: 0.0009965896606445312 seconds No modifications detected for re-loaded extension module utils, skipping build step... Loading extension module utils... Time to load utils op: 0.001102447509765625 seconds -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module utils, skipping build step... Loading extension module utils... Time to load utils op: 0.0010309219360351562 seconds -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module utils, skipping build step... Loading extension module utils... Time to load utils op: 0.0010993480682373047 seconds No modifications detected for re-loaded extension module utils, skipping build step... Loading extension module utils... -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... Time to load utils op: 0.0011150836944580078 seconds No modifications detected for re-loaded extension module utils, skipping build step... Loading extension module utils... @@ -511,7 +511,7 @@ Time to load utils op: 0.0014064311981201172 seconds "tp_gather_partition_size": 8 } } -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module utils, skipping build step... Loading extension module utils... Time to load utils op: 0.0014083385467529297 seconds @@ -584,7 +584,7 @@ huggingface/tokenizers: The current process just got forked, after parallelism h To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible @@ -610,21 +610,21 @@ huggingface/tokenizers: The current process just got forked, after parallelism h To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) -Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combinationUsing /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combinationUsing /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... To disable this warning, you can either: @@ -638,288 +638,288 @@ huggingface/tokenizers: The current process just got forked, after parallelism h To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... Detected CUDA files, patching ldflags -Emitting ninja build file /home/minutiae/.cache/torch_extensions/py39_cu113/transformer_inference/build.ninja... +Emitting ninja build file /home/AdamG012/.cache/torch_extensions/py39_cu113/transformer_inference/build.ninja... Building extension module transformer_inference... Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) -[1/9] /usr/local/cuda-11.4/bin/nvcc -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/minutiae/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/dequantize.cu -o dequantize.cuda.o -[2/9] /usr/local/cuda-11.4/bin/nvcc -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/minutiae/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/gelu.cu -o gelu.cuda.o -[3/9] /usr/local/cuda-11.4/bin/nvcc -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/minutiae/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/relu.cu -o relu.cuda.o -[4/9] /usr/local/cuda-11.4/bin/nvcc -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/minutiae/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/apply_rotary_pos_emb.cu -o apply_rotary_pos_emb.cuda.o -[5/9] /usr/local/cuda-11.4/bin/nvcc -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/minutiae/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu -o transform.cuda.o -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu(57): warning: variable "lane" was declared but never referenced +[1/9] /usr/local/cuda-11.4/bin/nvcc -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/AdamG012/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/dequantize.cu -o dequantize.cuda.o +[2/9] /usr/local/cuda-11.4/bin/nvcc -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/AdamG012/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/gelu.cu -o gelu.cuda.o +[3/9] /usr/local/cuda-11.4/bin/nvcc -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/AdamG012/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/relu.cu -o relu.cuda.o +[4/9] /usr/local/cuda-11.4/bin/nvcc -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/AdamG012/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/apply_rotary_pos_emb.cu -o apply_rotary_pos_emb.cuda.o +[5/9] /usr/local/cuda-11.4/bin/nvcc -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/AdamG012/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu -o transform.cuda.o +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu(57): warning: variable "lane" was declared but never referenced -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu(94): warning: variable "half_dim" was declared but never referenced +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu(94): warning: variable "half_dim" was declared but never referenced -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu(111): warning: variable "vals_half" was declared but never referenced +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu(111): warning: variable "vals_half" was declared but never referenced -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu(112): warning: variable "output_half" was declared but never referenced +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu(112): warning: variable "output_half" was declared but never referenced -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu(129): warning: variable "lane" was declared but never referenced +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu(129): warning: variable "lane" was declared but never referenced -[6/9] /usr/local/cuda-11.4/bin/nvcc -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/minutiae/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/softmax.cu -o softmax.cuda.o -[7/9] /usr/local/cuda-11.4/bin/nvcc -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/minutiae/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu -o layer_norm.cuda.o -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced +[6/9] /usr/local/cuda-11.4/bin/nvcc -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/AdamG012/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/softmax.cu -o softmax.cuda.o +[7/9] /usr/local/cuda-11.4/bin/nvcc -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/AdamG012/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu -o layer_norm.cuda.o +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced detected during: instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=1, maxThreads=256]" (166): here instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" (192): here -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced detected during: instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=1, maxThreads=256]" (166): here instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" (192): here -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced detected during: instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=2, maxThreads=256]" (168): here instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" (192): here -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced detected during: instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=2, maxThreads=256]" (168): here instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" (192): here -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced detected during: instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=4, maxThreads=256]" (170): here instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" (192): here -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced detected during: instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=4, maxThreads=256]" (170): here instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" (192): here -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced detected during: instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=8, maxThreads=256]" (172): here instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" (192): here -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced detected during: instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=8, maxThreads=256]" (172): here instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" (192): here -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced detected during: instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=16, maxThreads=256]" (174): here instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" (192): here -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced detected during: instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=16, maxThreads=256]" (174): here instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" (192): here -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced detected during: instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=2, threadsPerGroup=256, maxThreads=256]" (179): here instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" (192): here -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced detected during: instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=2, threadsPerGroup=256, maxThreads=256]" (179): here instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" (192): here -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced detected during: instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=4, threadsPerGroup=256, maxThreads=256]" (182): here instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" (192): here -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced detected during: instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=4, threadsPerGroup=256, maxThreads=256]" (182): here instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" (192): here -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced detected during: instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=6, threadsPerGroup=256, maxThreads=256]" (185): here instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" (192): here -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced detected during: instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=6, threadsPerGroup=256, maxThreads=256]" (185): here instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" (192): here -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced detected during: instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=8, threadsPerGroup=256, maxThreads=256]" (188): here instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" (192): here -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced detected during: instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=8, threadsPerGroup=256, maxThreads=256]" (188): here instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" (192): here -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced detected during: instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=1, maxThreads=256]" (166): here instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" (200): here -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced detected during: instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=1, maxThreads=256]" (166): here instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" (200): here -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced detected during: instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=2, maxThreads=256]" (168): here instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" (200): here -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced detected during: instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=2, maxThreads=256]" (168): here instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" (200): here -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced detected during: instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=4, maxThreads=256]" (170): here instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" (200): here -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced detected during: instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=4, maxThreads=256]" (170): here instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" (200): here -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced detected during: instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=8, maxThreads=256]" (172): here instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" (200): here -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced detected during: instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=8, maxThreads=256]" (172): here instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" (200): here -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced detected during: instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=16, maxThreads=256]" (174): here instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" (200): here -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced detected during: instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=16, maxThreads=256]" (174): here instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" (200): here -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced detected during: instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=4, threadsPerGroup=256, maxThreads=256]" (179): here instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" (200): here -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced detected during: instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=4, threadsPerGroup=256, maxThreads=256]" (179): here instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" (200): here -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced detected during: instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=8, threadsPerGroup=256, maxThreads=256]" (182): here instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" (200): here -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced detected during: instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=8, threadsPerGroup=256, maxThreads=256]" (182): here instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" (200): here -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced detected during: instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=12, threadsPerGroup=256, maxThreads=256]" (185): here instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" (200): here -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced detected during: instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=12, threadsPerGroup=256, maxThreads=256]" (185): here instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" (200): here -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced detected during: instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=16, threadsPerGroup=256, maxThreads=256]" (188): here instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" (200): here -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced detected during: instantiation of "void fused_ln(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=16, threadsPerGroup=256, maxThreads=256]" (188): here instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" (200): here -[8/9] /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicxx -MMD -MF pt_binding.o.d -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/minutiae/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++14 -O3 -std=c++14 -g -Wno-reorder -c /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp -o pt_binding.o -In file included from /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:10: -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes/inference_context.h: In member function ‘void InferenceContext::GenWorkSpace(const unsigned int&, const unsigned int&, const size_t&, const size_t&, const size_t&, const unsigned int&, const bool&, const size_t&, const unsigned int&, unsigned int, unsigned int)’: -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes/inference_context.h:139:52: warning: format ‘%d’ expects argument of type ‘int’, but argument 2 has type ‘size_t’ {aka ‘long unsigned int’} [-Wformat=] +[8/9] /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicxx -MMD -MF pt_binding.o.d -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/AdamG012/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++14 -O3 -std=c++14 -g -Wno-reorder -c /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp -o pt_binding.o +In file included from /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:10: +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes/inference_context.h: In member function ‘void InferenceContext::GenWorkSpace(const unsigned int&, const unsigned int&, const size_t&, const size_t&, const size_t&, const unsigned int&, const bool&, const size_t&, const unsigned int&, unsigned int, unsigned int)’: +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes/inference_context.h:139:52: warning: format ‘%d’ expects argument of type ‘int’, but argument 2 has type ‘size_t’ {aka ‘long unsigned int’} [-Wformat=] 139 | "Allocatable workspace available (%d tokens) is less than minimum requested " | ~^ | | @@ -930,43 +930,43 @@ In file included from /home/minutiae/.conda/envs/py39/lib/python3.9/site-package | ~~~~~~~~~~~~ | | | size_t {aka long unsigned int} -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp: In instantiation of ‘std::vector ds_softmax_context(at::Tensor&, at::Tensor&, int, bool, bool, int, float, bool, bool, int, bool, unsigned int, unsigned int, at::Tensor&) [with T = float]’: -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:1750:99: required from here -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:536:50: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp: In instantiation of ‘std::vector ds_softmax_context(at::Tensor&, at::Tensor&, int, bool, bool, int, float, bool, bool, int, bool, unsigned int, unsigned int, at::Tensor&) [with T = float]’: +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:1750:99: required from here +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:536:50: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] 536 | {hidden_dim * InferenceContext::Instance().GetMaxTokenLenght(), | ~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:536:50: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:537:41: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:536:50: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:537:41: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] 537 | k * InferenceContext::Instance().GetMaxTokenLenght(), | ~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:537:41: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:545:38: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:537:41: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:545:38: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] 545 | {hidden_dim * InferenceContext::Instance().GetMaxTokenLenght(), | ~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:545:38: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:546:29: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:545:38: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:546:29: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] 546 | k * InferenceContext::Instance().GetMaxTokenLenght(), | ~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:546:29: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp: In instantiation of ‘std::vector ds_softmax_context(at::Tensor&, at::Tensor&, int, bool, bool, int, float, bool, bool, int, bool, unsigned int, unsigned int, at::Tensor&) [with T = __half]’: -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:1753:49: required from here -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:536:50: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:546:29: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp: In instantiation of ‘std::vector ds_softmax_context(at::Tensor&, at::Tensor&, int, bool, bool, int, float, bool, bool, int, bool, unsigned int, unsigned int, at::Tensor&) [with T = __half]’: +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:1753:49: required from here +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:536:50: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] 536 | {hidden_dim * InferenceContext::Instance().GetMaxTokenLenght(), | ~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:536:50: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:537:41: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:536:50: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:537:41: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] 537 | k * InferenceContext::Instance().GetMaxTokenLenght(), | ~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:537:41: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:545:38: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:537:41: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:545:38: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] 545 | {hidden_dim * InferenceContext::Instance().GetMaxTokenLenght(), | ~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:545:38: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:546:29: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:545:38: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:546:29: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] 546 | k * InferenceContext::Instance().GetMaxTokenLenght(), | ~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:546:29: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] -[9/9] /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicxx pt_binding.o gelu.cuda.o relu.cuda.o layer_norm.cuda.o softmax.cuda.o dequantize.cuda.o apply_rotary_pos_emb.cuda.o transform.cuda.o -shared -lcurand -L/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/lib -lc10 -lc10_cuda -ltorch_cpu -ltorch_cuda_cu -ltorch_cuda_cpp -ltorch -ltorch_python -L/usr/local/cuda-11.4/lib64 -lcudart -o transformer_inference.so +/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:546:29: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing] +[9/9] /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicxx pt_binding.o gelu.cuda.o relu.cuda.o layer_norm.cuda.o softmax.cuda.o dequantize.cuda.o apply_rotary_pos_emb.cuda.o transform.cuda.o -shared -lcurand -L/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/lib -lc10 -lc10_cuda -ltorch_cpu -ltorch_cuda_cu -ltorch_cuda_cpp -ltorch -ltorch_python -L/usr/local/cuda-11.4/lib64 -lcudart -o transformer_inference.so Loading extension module transformer_inference... Loading extension module transformer_inference... Time to load transformer_inference op: 33.57252836227417 seconds @@ -1052,7 +1052,7 @@ huggingface/tokenizers: The current process just got forked, after parallelism h To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module transformer_inference, skipping build step... Loading extension module transformer_inference... Time to load transformer_inference op: 0.17304539680480957 seconds @@ -1078,7 +1078,7 @@ huggingface/tokenizers: The current process just got forked, after parallelism h To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module transformer_inference, skipping build step... Loading extension module transformer_inference... Time to load transformer_inference op: 0.2198350429534912 seconds @@ -1091,11 +1091,11 @@ To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module transformer_inference, skipping build step... Loading extension module transformer_inference... Time to load transformer_inference op: 0.23125028610229492 seconds -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module transformer_inference, skipping build step... Loading extension module transformer_inference... Time to load transformer_inference op: 0.23965048789978027 seconds @@ -1108,7 +1108,7 @@ huggingface/tokenizers: The current process just got forked, after parallelism h To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module transformer_inference, skipping build step... Loading extension module transformer_inference... Time to load transformer_inference op: 0.24375414848327637 seconds @@ -1116,7 +1116,7 @@ huggingface/tokenizers: The current process just got forked, after parallelism h To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module transformer_inference, skipping build step... Loading extension module transformer_inference... Time to load transformer_inference op: 0.25126194953918457 seconds @@ -1145,7 +1145,7 @@ huggingface/tokenizers: The current process just got forked, after parallelism h To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module transformer_inference, skipping build step... Loading extension module transformer_inference... Time to load transformer_inference op: 0.31780099868774414 seconds @@ -1153,7 +1153,7 @@ huggingface/tokenizers: The current process just got forked, after parallelism h To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module transformer_inference, skipping build step... Loading extension module transformer_inference... Time to load transformer_inference op: 0.1689906120300293 seconds @@ -1161,7 +1161,7 @@ huggingface/tokenizers: The current process just got forked, after parallelism h To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible @@ -1201,7 +1201,7 @@ huggingface/tokenizers: The current process just got forked, after parallelism h To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module transformer_inference, skipping build step... Loading extension module transformer_inference... Time to load transformer_inference op: 0.2203667163848877 seconds @@ -1210,7 +1210,7 @@ huggingface/tokenizers: The current process just got forked, after parallelism h To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module transformer_inference, skipping build step... Loading extension module transformer_inference... Time to load transformer_inference op: 0.2368457317352295 seconds @@ -1218,7 +1218,7 @@ huggingface/tokenizers: The current process just got forked, after parallelism h To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module transformer_inference, skipping build step... Loading extension module transformer_inference... Time to load transformer_inference op: 0.2534804344177246 seconds @@ -1226,7 +1226,7 @@ huggingface/tokenizers: The current process just got forked, after parallelism h To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module transformer_inference, skipping build step... Loading extension module transformer_inference... Time to load transformer_inference op: 0.2447066307067871 seconds @@ -1234,7 +1234,7 @@ huggingface/tokenizers: The current process just got forked, after parallelism h To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module transformer_inference, skipping build step... Loading extension module transformer_inference... Time to load transformer_inference op: 0.27658915519714355 seconds @@ -1248,17 +1248,17 @@ huggingface/tokenizers: The current process just got forked, after parallelism h To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module transformer_inference, skipping build step... Loading extension module transformer_inference... Time to load transformer_inference op: 0.3282308578491211 seconds -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module transformer_inference, skipping build step... Loading extension module transformer_inference... Time to load transformer_inference op: 0.8675589561462402 seconds *****************[end] Initialized Actor Model [end] (duration: 119.75s)****************** *************************[start] Initializing Ref Model [start] ************************** -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module utils, skipping build step... Loading extension module utils... @@ -1266,23 +1266,23 @@ Time to load utils op: 1.891409158706665 seconds No modifications detected for re-loaded extension module utils, skipping build step... Loading extension module utils... Time to load utils op: 0.8250091075897217 seconds -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module utils, skipping build step... Loading extension module utils... Time to load utils op: 0.0019402503967285156 seconds -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module utils, skipping build step... Loading extension module utils... Time to load utils op: 0.0012993812561035156 seconds -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module utils, skipping build step... Loading extension module utils... Time to load utils op: 0.0013997554779052734 seconds -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module utils, skipping build step... Loading extension module utils... Time to load utils op: 0.0013012886047363281 seconds -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module utils, skipping build step... Loading extension module utils... Time to load utils op: 0.0018343925476074219 seconds @@ -1420,7 +1420,7 @@ Time to load utils op: 0.0018343925476074219 seconds "prescale_gradients": false, "wall_clock_breakdown": false } -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module utils, skipping build step... Loading extension module utils... Time to load utils op: 0.0015451908111572266 seconds @@ -1431,7 +1431,7 @@ To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module fused_adam, skipping build step... Loading extension module fused_adam... Time to load fused_adam op: 0.0030541419982910156 seconds @@ -1440,7 +1440,7 @@ To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module fused_adam, skipping build step... Loading extension module fused_adam... Time to load fused_adam op: 0.0025348663330078125 seconds @@ -1449,7 +1449,7 @@ To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module fused_adam, skipping build step... Loading extension module fused_adam... Time to load fused_adam op: 0.0019736289978027344 seconds @@ -1462,7 +1462,7 @@ To disable this warning, you can either: - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module fused_adam, skipping build step... Loading extension module fused_adam... Time to load fused_adam op: 0.0029854774475097656 seconds @@ -1471,12 +1471,12 @@ To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module fused_adam, skipping build step... Loading extension module fused_adam... Time to load fused_adam op: 0.0022940635681152344 seconds Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module fused_adam, skipping build step... Loading extension module fused_adam... Time to load fused_adam op: 0.0029518604278564453 seconds @@ -1485,15 +1485,15 @@ To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module fused_adam, skipping build step... Loading extension module fused_adam... Time to load fused_adam op: 0.002866029739379883 seconds -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module utils, skipping build step... Loading extension module utils... Time to load utils op: 0.0012390613555908203 seconds -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module utils, skipping build step... Loading extension module utils... Time to load utils op: 0.003458738327026367 seconds @@ -1502,12 +1502,12 @@ To disable this warning, you can either: - Avoid using `tokenizers` before the fork if possible - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module fused_adam, skipping build step... Loading extension module fused_adam... Time to load fused_adam op: 0.002884387969970703 seconds [2023-04-14 08:46:30,739] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed info: version=0.9.0, git-hash=unknown, git-branch=unknown -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module utils, skipping build step... Loading extension module utils... Time to load utils op: 0.5726070404052734 seconds @@ -1515,13 +1515,13 @@ Time to load utils op: 0.5726070404052734 seconds Rank: 6 partition count [8, 8] and sizes[(41365824, False), (33792, False)] Rank: 7 partition count [8, 8] and sizes[(41365824, False), (33792, False)] Rank: 1 partition count [8, 8] and sizes[(41365824, False), (33792, False)] -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module utils, skipping build step... Loading extension module utils... Time to load utils op: 0.0016791820526123047 seconds [2023-04-14 08:46:36,743] [INFO] [logging.py:96:log_dist] [Rank 0] Removing param_group that has no 'params' in the client Optimizer [2023-04-14 08:46:36,743] [INFO] [logging.py:96:log_dist] [Rank 0] Using client Optimizer as basic optimizer -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module utils, skipping build step... Loading extension module utils... Time to load utils op: 0.002676725387573242 seconds @@ -1532,15 +1532,15 @@ Time to load utils op: 0.002676725387573242 seconds [2023-04-14 08:46:36,763] [INFO] [stage_1_and_2.py:134:__init__] Allgather bucket size 500,000,000 [2023-04-14 08:46:36,763] [INFO] [stage_1_and_2.py:135:__init__] CPU Offload: False [2023-04-14 08:46:36,763] [INFO] [stage_1_and_2.py:136:__init__] Round robin gradient partitioning: False -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module utils, skipping build step... Loading extension module utils... Time to load utils op: 0.0006117820739746094 seconds No modifications detected for re-loaded extension module utils, skipping build step... Loading extension module utils... Time to load utils op: 0.002028226852416992 seconds -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module utils, skipping build step... Loading extension module utils... Time to load utils op: 0.0013179779052734375 seconds @@ -1549,27 +1549,27 @@ Rank: 3 partition count [8, 8] and sizes[(41365824, False), (33792, False)] Rank: 4 partition count [8, 8] and sizes[(41365824, False), (33792, False)] Rank: 5 partition count [8, 8] and sizes[(41365824, False), (33792, False)] Rank: 2 partition count [8, 8] and sizes[(41365824, False), (33792, False)] -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module utils, skipping build step... Loading extension module utils... -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...Time to load utils op: 0.0013089179992675781 seconds +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...Time to load utils op: 0.0013089179992675781 seconds No modifications detected for re-loaded extension module utils, skipping build step... Loading extension module utils... Time to load utils op: 0.0011861324310302734 seconds -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module utils, skipping build step... Loading extension module utils... -Time to load utils op: 0.0008509159088134766 secondsUsing /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Time to load utils op: 0.0008509159088134766 secondsUsing /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module utils, skipping build step... Loading extension module utils... Time to load utils op: 0.0007336139678955078 seconds -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module utils, skipping build step... Loading extension module utils... -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...Time to load utils op: 0.0008528232574462891 seconds +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...Time to load utils op: 0.0008528232574462891 seconds No modifications detected for re-loaded extension module utils, skipping build step... Loading extension module utils... @@ -1736,26 +1736,26 @@ Time to load utils op: 0.0012271404266357422 seconds "tp_gather_partition_size": 8 } } -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module utils, skipping build step... Loading extension module utils... Time to load utils op: 0.0017056465148925781 seconds *****************[end] Initialized Critic Model [end] (duration: 31.02s)****************** ************************[start] Initializing Reward Model [start] ************************ [2023-04-14 08:47:02,078] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed info: version=0.9.0, git-hash=unknown, git-branch=unknown -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module utils, skipping build step... Loading extension module utils... Time to load utils op: 1.1240429878234863 seconds No modifications detected for re-loaded extension module utils, skipping build step... Loading extension module utils... Time to load utils op: 0.42214012145996094 seconds -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module utils, skipping build step... Loading extension module utils... Time to load utils op: 0.002139568328857422 seconds -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module utils, skipping build step... Loading extension module utils... Time to load utils op: 0.0012676715850830078 seconds @@ -1892,22 +1892,22 @@ Time to load utils op: 0.0012676715850830078 seconds "prescale_gradients": false, "wall_clock_breakdown": false } -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module utils, skipping build step... Loading extension module utils... Time to load utils op: 0.0011258125305175781 seconds *****************[end] Initialized Reward Model [end] (duration: 37.09s)****************** ***** Running training ***** Beginning of Epoch 1/1, Total Generation Batches 8260 -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module utils, skipping build step... Loading extension module utils... Time to load utils op: 0.009740114212036133 seconds -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module utils, skipping build step... Loading extension module utils... Time to load utils op: 0.001196146011352539 seconds -Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... +Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root... No modifications detected for re-loaded extension module utils, skipping build step... Loading extension module utils... Time to load utils op: 0.001905679702758789 seconds