diff --git "a/training.log" "b/training.log"
--- "a/training.log"
+++ "b/training.log"
@@ -1,126 +1,126 @@
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
   warnings.warn(
 [2023-04-14 08:32:38,890] [WARNING] [runner.py:190:fetch_hostfile] Unable to find hostfile, will proceed with training with local resources only.
-[2023-04-14 08:32:39,797] [INFO] [runner.py:540:main] cmd = /home/minutiae/.conda/envs/py39/bin/python -u -m deepspeed.launcher.launch --world_info=eyJsb2NhbGhvc3QiOiBbMCwgMSwgMiwgMywgNCwgNSwgNiwgN119 --master_addr=127.0.0.1 --master_port=12346 --enable_each_rank_log=None main.py --data_path Dahoas/rm-static Dahoas/full-hh-rlhf Dahoas/synthetic-instruct-gptj-pairwise yitingxie/rlhf-reward-datasets openai/webgpt_comparisons stanfordnlp/SHP --data_split 2,4,4 --actor_model_name_or_path /lus/grand/projects/BNN-Scale/chatgpt/hf_runs/DeepSpeedExamples/applications/DeepSpeed-Chat/output/actor-models/1.3b --critic_model_name_or_path /lus/grand/projects/BNN-Scale/chatgpt/hf_runs/DeepSpeedExamples/applications/DeepSpeed-Chat/output/reward-models/350m --num_padding_at_beginning 1 --per_device_train_batch_size 4 --per_device_mini_train_batch_size 4 --generation_batch_numbers 1 --ppo_epochs 1 --max_answer_seq_len 256 --max_prompt_seq_len 256 --actor_learning_rate 9.65e-6 --critic_learning_rate 5e-6 --actor_weight_decay 0.1 --critic_weight_decay 0.1 --num_train_epochs 1 --lr_scheduler_type cosine --gradient_accumulation_steps 1 --num_warmup_steps 100 --deepspeed --seed 1234 --enable_hybrid_engine --actor_zero_stage 2 --critic_zero_stage 2 --output_dir /lus/grand/projects/BNN-Scale/chatgpt/hf_runs/DeepSpeedExamples/applications/DeepSpeed-Chat/output/step3-models/1.3b
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
+[2023-04-14 08:32:39,797] [INFO] [runner.py:540:main] cmd = /home/AdamG012/.conda/envs/py39/bin/python -u -m deepspeed.launcher.launch --world_info=eyJsb2NhbGhvc3QiOiBbMCwgMSwgMiwgMywgNCwgNSwgNiwgN119 --master_addr=127.0.0.1 --master_port=12346 --enable_each_rank_log=None main.py --data_path Dahoas/rm-static Dahoas/full-hh-rlhf Dahoas/synthetic-instruct-gptj-pairwise yitingxie/rlhf-reward-datasets openai/webgpt_comparisons stanfordnlp/SHP --data_split 2,4,4 --actor_model_name_or_path /chatgpt/hf_runs/DeepSpeedExamples/applications/DeepSpeed-Chat/output/actor-models/1.3b --critic_model_name_or_path /chatgpt/hf_runs/DeepSpeedExamples/applications/DeepSpeed-Chat/output/reward-models/350m --num_padding_at_beginning 1 --per_device_train_batch_size 4 --per_device_mini_train_batch_size 4 --generation_batch_numbers 1 --ppo_epochs 1 --max_answer_seq_len 256 --max_prompt_seq_len 256 --actor_learning_rate 9.65e-6 --critic_learning_rate 5e-6 --actor_weight_decay 0.1 --critic_weight_decay 0.1 --num_train_epochs 1 --lr_scheduler_type cosine --gradient_accumulation_steps 1 --num_warmup_steps 100 --deepspeed --seed 1234 --enable_hybrid_engine --actor_zero_stage 2 --critic_zero_stage 2 --output_dir /chatgpt/hf_runs/DeepSpeedExamples/applications/DeepSpeed-Chat/output/step3-models/1.3b
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
   warnings.warn(
 [2023-04-14 08:32:50,431] [INFO] [launch.py:229:main] WORLD INFO DICT: {'localhost': [0, 1, 2, 3, 4, 5, 6, 7]}
 [2023-04-14 08:32:50,884] [INFO] [launch.py:235:main] nnodes=1, num_local_procs=8, node_rank=0
 [2023-04-14 08:32:50,884] [INFO] [launch.py:246:main] global_rank_mapping=defaultdict(<class 'list'>, {'localhost': [0, 1, 2, 3, 4, 5, 6, 7]})
 [2023-04-14 08:32:50,884] [INFO] [launch.py:247:main] dist_world_size=8
 [2023-04-14 08:32:50,884] [INFO] [launch.py:249:main] Setting CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
   warnings.warn(
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
   warnings.warn(
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
   warnings.warn(
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
   warnings.warn(
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
   warnings.warn(
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
   warnings.warn(
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
   warnings.warn(
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
   warnings.warn(
 [2023-04-14 08:35:14,863] [INFO] [comm.py:586:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl
-Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
-  0%|          | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+  0%|          | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
   0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 64.08it/s]
 100%|██████████| 2/2 [00:00<00:00, 690.82it/s]
-Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
   0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 571.39it/s]
-Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
   0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 716.85it/s]
-Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
   0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 662.45it/s]
-Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
   0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 746.78it/s]
-Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
   0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 679.02it/s]
-Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
   0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 639.18it/s]
-Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
-  0%|          | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
-  0%|          | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+Found cached dataset parquet (/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+  0%|          | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+  0%|          | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
 100%|██████████| 2/2 [00:00<00:00,  6.89it/s]100%|██████████| 2/2 [00:00<00:00,  6.89it/s]
 100%|██████████| 2/2 [00:00<00:00, 156.58it/s]
   0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 26.93it/s]
-Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+Found cached dataset parquet (/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
   0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 666.77it/s]
-Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+Found cached dataset parquet (/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
   0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 23.94it/s]
-Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+Found cached dataset parquet (/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
   0%|          | 0/2 [00:00<?, ?it/s] 50%|█████     | 1/2 [00:00<00:00,  1.18it/s]100%|██████████| 2/2 [00:00<00:00,  2.21it/s]
-Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
-Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+Found cached dataset parquet (/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+Found cached dataset parquet (/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
   0%|          | 0/2 [00:00<?, ?it/s] 50%|█████     | 1/2 [00:00<00:00,  7.78it/s]  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:03<00:00,  2.20s/it]100%|██████████| 2/2 [00:04<00:00,  2.01s/it]
  50%|█████     | 1/2 [00:02<00:02,  2.33s/it]100%|██████████| 2/2 [00:02<00:00,  1.08s/it]100%|██████████| 2/2 [00:02<00:00,  1.27s/it]
-Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
-  0%|          | 0/1 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+Found cached dataset parquet (/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+  0%|          | 0/1 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
 100%|██████████| 1/1 [00:00<00:00, 15.08it/s]
   0%|          | 0/1 [00:00<?, ?it/s]100%|██████████| 1/1 [00:00<00:00, 681.67it/s]
-Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+Found cached dataset parquet (/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
   0%|          | 0/1 [00:00<?, ?it/s]100%|██████████| 1/1 [00:00<00:00, 15.83it/s]
-Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+Found cached dataset parquet (/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
   0%|          | 0/1 [00:00<?, ?it/s]100%|██████████| 1/1 [00:00<00:00, 564.36it/s]
-Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+Found cached dataset parquet (/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
   0%|          | 0/1 [00:00<?, ?it/s]100%|██████████| 1/1 [00:00<00:00, 683.11it/s]
-Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
-  0%|          | 0/1 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+Found cached dataset parquet (/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+  0%|          | 0/1 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
 100%|██████████| 1/1 [00:07<00:00,  7.49s/it]100%|██████████| 1/1 [00:07<00:00,  7.66s/it]
   0%|          | 0/1 [00:00<?, ?it/s]100%|██████████| 1/1 [00:00<00:00, 475.71it/s]
-Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+Found cached dataset parquet (/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
   0%|          | 0/1 [00:00<?, ?it/s]100%|██████████| 1/1 [00:00<00:00,  3.12it/s]100%|██████████| 1/1 [00:00<00:00,  3.12it/s]
-Found cached dataset parquet (/grand/projects/BNN-Scale/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+Found cached dataset parquet (/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
   0%|          | 0/2 [00:00<?, ?it/s] 50%|█████     | 1/2 [00:00<00:00,  5.79it/s]100%|██████████| 2/2 [00:00<00:00,  2.84it/s]100%|██████████| 2/2 [00:00<00:00,  3.08it/s]
-Found cached dataset parquet (/grand/projects/BNN-Scale/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
-Found cached dataset parquet (/grand/projects/BNN-Scale/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+Found cached dataset parquet (/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+Found cached dataset parquet (/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
   0%|          | 0/2 [00:00<?, ?it/s]  0%|          | 0/2 [00:00<?, ?it/s] 50%|█████     | 1/2 [00:00<00:00,  1.81it/s]100%|██████████| 2/2 [00:00<00:00,  3.11it/s]
 100%|██████████| 2/2 [00:00<00:00, 21.36it/s]
-Found cached dataset parquet (/grand/projects/BNN-Scale/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
-  0%|          | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+Found cached dataset parquet (/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+  0%|          | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
  50%|█████     | 1/2 [00:05<00:05,  5.17s/it]100%|██████████| 2/2 [00:05<00:00,  2.35s/it]100%|██████████| 2/2 [00:05<00:00,  2.78s/it]
   0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 223.68it/s]
-Found cached dataset parquet (/grand/projects/BNN-Scale/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+Found cached dataset parquet (/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
   0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 751.26it/s]
-Found cached dataset parquet (/grand/projects/BNN-Scale/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
-  0%|          | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+Found cached dataset parquet (/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
+  0%|          | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
 100%|██████████| 2/2 [00:00<00:00, 22.35it/s]
   0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 756.48it/s]
-Found cached dataset webgpt_comparisons (/grand/projects/BNN-Scale/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
-  0%|          | 0/1 [00:00<?, ?it/s]Found cached dataset webgpt_comparisons (/grand/projects/BNN-Scale/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
+Found cached dataset webgpt_comparisons (/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
+  0%|          | 0/1 [00:00<?, ?it/s]Found cached dataset webgpt_comparisons (/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
   0%|          | 0/1 [00:00<?, ?it/s]100%|██████████| 1/1 [00:00<00:00, 67.81it/s]
 100%|██████████| 1/1 [00:00<00:00, 278.17it/s]
-Found cached dataset webgpt_comparisons (/grand/projects/BNN-Scale/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
+Found cached dataset webgpt_comparisons (/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
   0%|          | 0/1 [00:00<?, ?it/s]100%|██████████| 1/1 [00:00<00:00, 293.12it/s]
-Found cached dataset webgpt_comparisons (/grand/projects/BNN-Scale/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
+Found cached dataset webgpt_comparisons (/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
   0%|          | 0/1 [00:00<?, ?it/s]100%|██████████| 1/1 [00:00<00:00, 276.56it/s]
-Found cached dataset webgpt_comparisons (/grand/projects/BNN-Scale/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
+Found cached dataset webgpt_comparisons (/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
   0%|          | 0/1 [00:00<?, ?it/s]100%|██████████| 1/1 [00:00<00:00, 169.24it/s]
-Found cached dataset webgpt_comparisons (/grand/projects/BNN-Scale/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
-  0%|          | 0/1 [00:00<?, ?it/s]Found cached dataset webgpt_comparisons (/grand/projects/BNN-Scale/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
+Found cached dataset webgpt_comparisons (/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
+  0%|          | 0/1 [00:00<?, ?it/s]Found cached dataset webgpt_comparisons (/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
 100%|██████████| 1/1 [00:00<00:00, 289.58it/s]
   0%|          | 0/1 [00:00<?, ?it/s]100%|██████████| 1/1 [00:00<00:00, 289.08it/s]
-Found cached dataset webgpt_comparisons (/grand/projects/BNN-Scale/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
+Found cached dataset webgpt_comparisons (/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
   0%|          | 0/1 [00:00<?, ?it/s]100%|██████████| 1/1 [00:08<00:00,  8.44s/it]100%|██████████| 1/1 [00:09<00:00,  9.61s/it]
-Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
-  0%|          | 0/3 [00:00<?, ?it/s]Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
+Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
+  0%|          | 0/3 [00:00<?, ?it/s]Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
  33%|███▎      | 1/3 [00:15<00:30, 15.43s/it]  0%|          | 0/3 [00:00<?, ?it/s] 67%|██████▋   | 2/3 [00:20<00:09,  9.35s/it]100%|██████████| 3/3 [00:23<00:00,  6.47s/it]100%|██████████| 3/3 [00:23<00:00,  7.85s/it]
  33%|███▎      | 1/3 [00:03<00:06,  3.12s/it] 67%|██████▋   | 2/3 [00:03<00:01,  1.36s/it]100%|██████████| 3/3 [00:03<00:00,  1.08s/it]
-Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
-  0%|          | 0/3 [00:00<?, ?it/s]Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
-  0%|          | 0/3 [00:00<?, ?it/s] 33%|███▎      | 1/3 [00:09<00:19,  9.58s/it]Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
+Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
+  0%|          | 0/3 [00:00<?, ?it/s]Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
+  0%|          | 0/3 [00:00<?, ?it/s] 33%|███▎      | 1/3 [00:09<00:19,  9.58s/it]Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
  33%|███▎      | 1/3 [00:00<00:00,  8.48it/s]100%|██████████| 3/3 [00:00<00:00, 18.45it/s]
 100%|██████████| 3/3 [00:09<00:00,  3.22s/it]
   0%|          | 0/3 [00:00<?, ?it/s]100%|██████████| 3/3 [00:00<00:00, 241.71it/s]
-Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
-Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
+Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
+Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
   0%|          | 0/3 [00:00<?, ?it/s]  0%|          | 0/3 [00:00<?, ?it/s] 33%|███▎      | 1/3 [00:05<00:10,  5.20s/it] 33%|███▎      | 1/3 [00:02<00:05,  2.93s/it] 67%|██████▋   | 2/3 [00:05<00:02,  2.29s/it] 67%|██████▋   | 2/3 [00:03<00:01,  1.35s/it]100%|██████████| 3/3 [00:03<00:00,  1.22it/s]100%|██████████| 3/3 [00:05<00:00,  1.33s/it]100%|██████████| 3/3 [00:03<00:00,  1.12s/it]100%|██████████| 3/3 [00:05<00:00,  1.88s/it]
 
-Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
+Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
   0%|          | 0/3 [00:00<?, ?it/s]100%|██████████| 3/3 [00:00<00:00, 261.87it/s]
 ************************[start] Initializing Actor Model [start] *************************
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
@@ -185,18 +185,18 @@ Installed CUDA version 11.4 does not match the version torch was compiled with 1
 Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
 Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combinationInstalled CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
 
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
 Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
@@ -206,7 +206,7 @@ To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
 Detected CUDA files, patching ldflags
-Emitting ninja build file /home/minutiae/.cache/torch_extensions/py39_cu113/fused_adam/build.ninja...
+Emitting ninja build file /home/AdamG012/.cache/torch_extensions/py39_cu113/fused_adam/build.ninja...
 Building extension module fused_adam...
 Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
@@ -237,7 +237,7 @@ To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
 Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
@@ -251,7 +251,7 @@ To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
 Detected CUDA files, patching ldflags
-Emitting ninja build file /home/minutiae/.cache/torch_extensions/py39_cu113/fused_adam/build.ninja...
+Emitting ninja build file /home/AdamG012/.cache/torch_extensions/py39_cu113/fused_adam/build.ninja...
 Building extension module fused_adam...
 Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
@@ -265,13 +265,13 @@ Time to load fused_adam op: 7.266183376312256 seconds
 [2023-04-14 08:44:08,781] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False
 [2023-04-14 08:44:13,170] [INFO] [logging.py:96:log_dist] [Rank 0] Removing param_group that has no 'params' in the client Optimizer
 [2023-04-14 08:44:13,170] [INFO] [logging.py:96:log_dist] [Rank 0] Using client Optimizer as basic optimizer
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 [2023-04-14 08:44:13,193] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Basic Optimizer = FusedAdam
 [2023-04-14 08:44:13,193] [INFO] [utils.py:51:is_zero_supported_optimizer] Checking ZeRO support for optimizer=FusedAdam type=<class 'deepspeed.ops.adam.fused_adam.FusedAdam'>
 [2023-04-14 08:44:13,193] [INFO] [logging.py:96:log_dist] [Rank 0] Creating torch.float16 ZeRO stage 2 optimizer
@@ -283,7 +283,7 @@ To disable this warning, you can either:
 [2023-04-14 08:44:13,704] [INFO] [stage_1_and_2.py:134:__init__] Allgather bucket size 500,000,000
 [2023-04-14 08:44:13,704] [INFO] [stage_1_and_2.py:135:__init__] CPU Offload: False
 [2023-04-14 08:44:13,704] [INFO] [stage_1_and_2.py:136:__init__] Round robin gradient partitioning: False
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
@@ -292,7 +292,7 @@ huggingface/tokenizers: The current process just got forked, after parallelism h
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-Emitting ninja build file /home/minutiae/.cache/torch_extensions/py39_cu113/utils/build.ninja...
+Emitting ninja build file /home/AdamG012/.cache/torch_extensions/py39_cu113/utils/build.ninja...
 Building extension module utils...
 Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
@@ -324,30 +324,30 @@ Rank: 3 partition count [8, 8] and sizes[(164401920, False), (67840, False)]
 Rank: 2 partition count [8, 8] and sizes[(164401920, False), (67840, False)] 
 Rank: 7 partition count [8, 8] and sizes[(164401920, False), (67840, False)] 
 Rank: 5 partition count [8, 8] and sizes[(164401920, False), (67840, False)] 
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 Time to load utils op: 0.0018334388732910156 seconds
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 Time to load utils op: 0.0009965896606445312 seconds
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
 Time to load utils op: 0.001102447509765625 seconds
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
 Time to load utils op: 0.0010309219360351562 seconds
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
 Time to load utils op: 0.0010993480682373047 seconds
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 Time to load utils op: 0.0011150836944580078 seconds
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
@@ -511,7 +511,7 @@ Time to load utils op: 0.0014064311981201172 seconds
         "tp_gather_partition_size": 8
     }
 }
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
 Time to load utils op: 0.0014083385467529297 seconds
@@ -584,7 +584,7 @@ huggingface/tokenizers: The current process just got forked, after parallelism h
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
@@ -610,21 +610,21 @@ huggingface/tokenizers: The current process just got forked, after parallelism h
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combinationUsing /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combinationUsing /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
@@ -638,288 +638,288 @@ huggingface/tokenizers: The current process just got forked, after parallelism h
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 Detected CUDA files, patching ldflags
-Emitting ninja build file /home/minutiae/.cache/torch_extensions/py39_cu113/transformer_inference/build.ninja...
+Emitting ninja build file /home/AdamG012/.cache/torch_extensions/py39_cu113/transformer_inference/build.ninja...
 Building extension module transformer_inference...
 Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-[1/9] /usr/local/cuda-11.4/bin/nvcc  -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/minutiae/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/dequantize.cu -o dequantize.cuda.o 
-[2/9] /usr/local/cuda-11.4/bin/nvcc  -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/minutiae/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/gelu.cu -o gelu.cuda.o 
-[3/9] /usr/local/cuda-11.4/bin/nvcc  -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/minutiae/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/relu.cu -o relu.cuda.o 
-[4/9] /usr/local/cuda-11.4/bin/nvcc  -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/minutiae/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/apply_rotary_pos_emb.cu -o apply_rotary_pos_emb.cuda.o 
-[5/9] /usr/local/cuda-11.4/bin/nvcc  -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/minutiae/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu -o transform.cuda.o 
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu(57): warning: variable "lane" was declared but never referenced
+[1/9] /usr/local/cuda-11.4/bin/nvcc  -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/AdamG012/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/dequantize.cu -o dequantize.cuda.o
+[2/9] /usr/local/cuda-11.4/bin/nvcc  -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/AdamG012/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/gelu.cu -o gelu.cuda.o
+[3/9] /usr/local/cuda-11.4/bin/nvcc  -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/AdamG012/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/relu.cu -o relu.cuda.o
+[4/9] /usr/local/cuda-11.4/bin/nvcc  -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/AdamG012/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/apply_rotary_pos_emb.cu -o apply_rotary_pos_emb.cuda.o
+[5/9] /usr/local/cuda-11.4/bin/nvcc  -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/AdamG012/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu -o transform.cuda.o
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu(57): warning: variable "lane" was declared but never referenced
 
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu(94): warning: variable "half_dim" was declared but never referenced
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu(94): warning: variable "half_dim" was declared but never referenced
 
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu(111): warning: variable "vals_half" was declared but never referenced
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu(111): warning: variable "vals_half" was declared but never referenced
 
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu(112): warning: variable "output_half" was declared but never referenced
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu(112): warning: variable "output_half" was declared but never referenced
 
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu(129): warning: variable "lane" was declared but never referenced
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/transform.cu(129): warning: variable "lane" was declared but never referenced
 
-[6/9] /usr/local/cuda-11.4/bin/nvcc  -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/minutiae/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/softmax.cu -o softmax.cuda.o 
-[7/9] /usr/local/cuda-11.4/bin/nvcc  -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/minutiae/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu -o layer_norm.cuda.o 
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
+[6/9] /usr/local/cuda-11.4/bin/nvcc  -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/AdamG012/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/softmax.cu -o softmax.cuda.o
+[7/9] /usr/local/cuda-11.4/bin/nvcc  -ccbin /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicc -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/AdamG012/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu -o layer_norm.cuda.o
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
           detected during:
             instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=1, maxThreads=256]" 
 (166): here
             instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
 (192): here
 
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
           detected during:
             instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=1, maxThreads=256]" 
 (166): here
             instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
 (192): here
 
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
           detected during:
             instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=2, maxThreads=256]" 
 (168): here
             instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
 (192): here
 
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
           detected during:
             instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=2, maxThreads=256]" 
 (168): here
             instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
 (192): here
 
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
           detected during:
             instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=4, maxThreads=256]" 
 (170): here
             instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
 (192): here
 
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
           detected during:
             instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=4, maxThreads=256]" 
 (170): here
             instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
 (192): here
 
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
           detected during:
             instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=8, maxThreads=256]" 
 (172): here
             instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
 (192): here
 
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
           detected during:
             instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=8, maxThreads=256]" 
 (172): here
             instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
 (192): here
 
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
           detected during:
             instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=16, maxThreads=256]" 
 (174): here
             instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
 (192): here
 
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
           detected during:
             instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=1, threadsPerGroup=16, maxThreads=256]" 
 (174): here
             instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
 (192): here
 
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
           detected during:
             instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=2, threadsPerGroup=256, maxThreads=256]" 
 (179): here
             instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
 (192): here
 
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
           detected during:
             instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=2, threadsPerGroup=256, maxThreads=256]" 
 (179): here
             instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
 (192): here
 
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
           detected during:
             instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=4, threadsPerGroup=256, maxThreads=256]" 
 (182): here
             instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
 (192): here
 
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
           detected during:
             instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=4, threadsPerGroup=256, maxThreads=256]" 
 (182): here
             instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
 (192): here
 
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
           detected during:
             instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=6, threadsPerGroup=256, maxThreads=256]" 
 (185): here
             instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
 (192): here
 
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
           detected during:
             instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=6, threadsPerGroup=256, maxThreads=256]" 
 (185): here
             instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
 (192): here
 
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
           detected during:
             instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=8, threadsPerGroup=256, maxThreads=256]" 
 (188): here
             instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
 (192): here
 
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
           detected during:
             instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=__half, unRoll=8, threadsPerGroup=256, maxThreads=256]" 
 (188): here
             instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=__half]" 
 (192): here
 
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
           detected during:
             instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=1, maxThreads=256]" 
 (166): here
             instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
 (200): here
 
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
           detected during:
             instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=1, maxThreads=256]" 
 (166): here
             instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
 (200): here
 
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
           detected during:
             instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=2, maxThreads=256]" 
 (168): here
             instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
 (200): here
 
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
           detected during:
             instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=2, maxThreads=256]" 
 (168): here
             instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
 (200): here
 
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
           detected during:
             instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=4, maxThreads=256]" 
 (170): here
             instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
 (200): here
 
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
           detected during:
             instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=4, maxThreads=256]" 
 (170): here
             instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
 (200): here
 
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
           detected during:
             instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=8, maxThreads=256]" 
 (172): here
             instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
 (200): here
 
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
           detected during:
             instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=8, maxThreads=256]" 
 (172): here
             instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
 (200): here
 
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
           detected during:
             instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=16, maxThreads=256]" 
 (174): here
             instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
 (200): here
 
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
           detected during:
             instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=1, threadsPerGroup=16, maxThreads=256]" 
 (174): here
             instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
 (200): here
 
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
           detected during:
             instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=4, threadsPerGroup=256, maxThreads=256]" 
 (179): here
             instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
 (200): here
 
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
           detected during:
             instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=4, threadsPerGroup=256, maxThreads=256]" 
 (179): here
             instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
 (200): here
 
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
           detected during:
             instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=8, threadsPerGroup=256, maxThreads=256]" 
 (182): here
             instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
 (200): here
 
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
           detected during:
             instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=8, threadsPerGroup=256, maxThreads=256]" 
 (182): here
             instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
 (200): here
 
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
           detected during:
             instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=12, threadsPerGroup=256, maxThreads=256]" 
 (185): here
             instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
 (200): here
 
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
           detected during:
             instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=12, threadsPerGroup=256, maxThreads=256]" 
 (185): here
             instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
 (200): here
 
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(60): warning: variable "residual_buffer" was declared but never referenced
           detected during:
             instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=16, threadsPerGroup=256, maxThreads=256]" 
 (188): here
             instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
 (200): here
 
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/layer_norm.cu(61): warning: variable "bias_buffer" was declared but never referenced
           detected during:
             instantiation of "void fused_ln<T,unRoll,threadsPerGroup,maxThreads>(T *, const T *, const T *, const T *, float, int) [with T=float, unRoll=16, threadsPerGroup=256, maxThreads=256]" 
 (188): here
             instantiation of "void launch_fused_ln(T *, const T *, const T *, const T *, float, int, int, cudaStream_t) [with T=float]" 
 (200): here
 
-[8/9] /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicxx -MMD -MF pt_binding.o.d -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/minutiae/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++14 -O3 -std=c++14 -g -Wno-reorder -c /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp -o pt_binding.o 
-In file included from /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:10:
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes/inference_context.h: In member function ‘void InferenceContext::GenWorkSpace(const unsigned int&, const unsigned int&, const size_t&, const size_t&, const size_t&, const unsigned int&, const bool&, const size_t&, const unsigned int&, unsigned int, unsigned int)’:
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes/inference_context.h:139:52: warning: format ‘%d’ expects argument of type ‘int’, but argument 2 has type ‘size_t’ {aka ‘long unsigned int’} [-Wformat=]
+[8/9] /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicxx -MMD -MF pt_binding.o.d -DTORCH_EXTENSION_NAME=transformer_inference -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes -I/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/includes -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/TH -isystem /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda-11.4/include -isystem /home/AdamG012/.conda/envs/py39/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++14 -O3 -std=c++14 -g -Wno-reorder -c /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp -o pt_binding.o
+In file included from /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:10:
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes/inference_context.h: In member function ‘void InferenceContext::GenWorkSpace(const unsigned int&, const unsigned int&, const size_t&, const size_t&, const size_t&, const unsigned int&, const bool&, const size_t&, const unsigned int&, unsigned int, unsigned int)’:
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/includes/inference_context.h:139:52: warning: format ‘%d’ expects argument of type ‘int’, but argument 2 has type ‘size_t’ {aka ‘long unsigned int’} [-Wformat=]
   139 |                 "Allocatable workspace available (%d tokens) is less than minimum requested "
       |                                                   ~^
       |                                                    |
@@ -930,43 +930,43 @@ In file included from /home/minutiae/.conda/envs/py39/lib/python3.9/site-package
       |                 ~~~~~~~~~~~~                        
       |                 |
       |                 size_t {aka long unsigned int}
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp: In instantiation of ‘std::vector<at::Tensor> ds_softmax_context(at::Tensor&, at::Tensor&, int, bool, bool, int, float, bool, bool, int, bool, unsigned int, unsigned int, at::Tensor&) [with T = float]’:
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:1750:99:   required from here
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:536:50: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp: In instantiation of ‘std::vector<at::Tensor> ds_softmax_context(at::Tensor&, at::Tensor&, int, bool, bool, int, float, bool, bool, int, bool, unsigned int, unsigned int, at::Tensor&) [with T = float]’:
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:1750:99:   required from here
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:536:50: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
   536 |                                      {hidden_dim * InferenceContext::Instance().GetMaxTokenLenght(),
       |                                       ~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:536:50: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:537:41: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:536:50: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:537:41: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
   537 |                                       k * InferenceContext::Instance().GetMaxTokenLenght(),
       |                                       ~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:537:41: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:545:38: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:537:41: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:545:38: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
   545 |                          {hidden_dim * InferenceContext::Instance().GetMaxTokenLenght(),
       |                           ~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:545:38: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:546:29: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:545:38: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:546:29: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
   546 |                           k * InferenceContext::Instance().GetMaxTokenLenght(),
       |                           ~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:546:29: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp: In instantiation of ‘std::vector<at::Tensor> ds_softmax_context(at::Tensor&, at::Tensor&, int, bool, bool, int, float, bool, bool, int, bool, unsigned int, unsigned int, at::Tensor&) [with T = __half]’:
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:1753:49:   required from here
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:536:50: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:546:29: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp: In instantiation of ‘std::vector<at::Tensor> ds_softmax_context(at::Tensor&, at::Tensor&, int, bool, bool, int, float, bool, bool, int, bool, unsigned int, unsigned int, at::Tensor&) [with T = __half]’:
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:1753:49:   required from here
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:536:50: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
   536 |                                      {hidden_dim * InferenceContext::Instance().GetMaxTokenLenght(),
       |                                       ~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:536:50: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:537:41: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:536:50: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:537:41: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
   537 |                                       k * InferenceContext::Instance().GetMaxTokenLenght(),
       |                                       ~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:537:41: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:545:38: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:537:41: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:545:38: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
   545 |                          {hidden_dim * InferenceContext::Instance().GetMaxTokenLenght(),
       |                           ~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:545:38: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:546:29: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:545:38: warning: narrowing conversion of ‘(((size_t)hidden_dim) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:546:29: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
   546 |                           k * InferenceContext::Instance().GetMaxTokenLenght(),
       |                           ~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:546:29: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
-[9/9] /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicxx pt_binding.o gelu.cuda.o relu.cuda.o layer_norm.cuda.o softmax.cuda.o dequantize.cuda.o apply_rotary_pos_emb.cuda.o transform.cuda.o -shared -lcurand -L/home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/torch/lib -lc10 -lc10_cuda -ltorch_cpu -ltorch_cuda_cu -ltorch_cuda_cpp -ltorch -ltorch_python -L/usr/local/cuda-11.4/lib64 -lcudart -o transformer_inference.so
+/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/deepspeed/ops/csrc/transformer/inference/csrc/pt_binding.cpp:546:29: warning: narrowing conversion of ‘(((size_t)k) * (& InferenceContext::Instance())->InferenceContext::GetMaxTokenLenght())’ from ‘size_t’ {aka ‘long unsigned int’} to ‘long int’ [-Wnarrowing]
+[9/9] /lus/theta-fs0/software/thetagpu/openmpi/openmpi-4.1.4_ucx-1.12.1_gcc-9.4.0/bin/mpicxx pt_binding.o gelu.cuda.o relu.cuda.o layer_norm.cuda.o softmax.cuda.o dequantize.cuda.o apply_rotary_pos_emb.cuda.o transform.cuda.o -shared -lcurand -L/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/torch/lib -lc10 -lc10_cuda -ltorch_cpu -ltorch_cuda_cu -ltorch_cuda_cpp -ltorch -ltorch_python -L/usr/local/cuda-11.4/lib64 -lcudart -o transformer_inference.so
 Loading extension module transformer_inference...
 Loading extension module transformer_inference...
 Time to load transformer_inference op: 33.57252836227417 seconds
@@ -1052,7 +1052,7 @@ huggingface/tokenizers: The current process just got forked, after parallelism h
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module transformer_inference, skipping build step...
 Loading extension module transformer_inference...
 Time to load transformer_inference op: 0.17304539680480957 seconds
@@ -1078,7 +1078,7 @@ huggingface/tokenizers: The current process just got forked, after parallelism h
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module transformer_inference, skipping build step...
 Loading extension module transformer_inference...
 Time to load transformer_inference op: 0.2198350429534912 seconds
@@ -1091,11 +1091,11 @@ To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
 Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module transformer_inference, skipping build step...
 Loading extension module transformer_inference...
 Time to load transformer_inference op: 0.23125028610229492 seconds
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module transformer_inference, skipping build step...
 Loading extension module transformer_inference...
 Time to load transformer_inference op: 0.23965048789978027 seconds
@@ -1108,7 +1108,7 @@ huggingface/tokenizers: The current process just got forked, after parallelism h
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module transformer_inference, skipping build step...
 Loading extension module transformer_inference...
 Time to load transformer_inference op: 0.24375414848327637 seconds
@@ -1116,7 +1116,7 @@ huggingface/tokenizers: The current process just got forked, after parallelism h
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module transformer_inference, skipping build step...
 Loading extension module transformer_inference...
 Time to load transformer_inference op: 0.25126194953918457 seconds
@@ -1145,7 +1145,7 @@ huggingface/tokenizers: The current process just got forked, after parallelism h
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module transformer_inference, skipping build step...
 Loading extension module transformer_inference...
 Time to load transformer_inference op: 0.31780099868774414 seconds
@@ -1153,7 +1153,7 @@ huggingface/tokenizers: The current process just got forked, after parallelism h
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module transformer_inference, skipping build step...
 Loading extension module transformer_inference...
 Time to load transformer_inference op: 0.1689906120300293 seconds
@@ -1161,7 +1161,7 @@ huggingface/tokenizers: The current process just got forked, after parallelism h
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
@@ -1201,7 +1201,7 @@ huggingface/tokenizers: The current process just got forked, after parallelism h
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module transformer_inference, skipping build step...
 Loading extension module transformer_inference...
 Time to load transformer_inference op: 0.2203667163848877 seconds
@@ -1210,7 +1210,7 @@ huggingface/tokenizers: The current process just got forked, after parallelism h
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module transformer_inference, skipping build step...
 Loading extension module transformer_inference...
 Time to load transformer_inference op: 0.2368457317352295 seconds
@@ -1218,7 +1218,7 @@ huggingface/tokenizers: The current process just got forked, after parallelism h
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module transformer_inference, skipping build step...
 Loading extension module transformer_inference...
 Time to load transformer_inference op: 0.2534804344177246 seconds
@@ -1226,7 +1226,7 @@ huggingface/tokenizers: The current process just got forked, after parallelism h
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module transformer_inference, skipping build step...
 Loading extension module transformer_inference...
 Time to load transformer_inference op: 0.2447066307067871 seconds
@@ -1234,7 +1234,7 @@ huggingface/tokenizers: The current process just got forked, after parallelism h
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module transformer_inference, skipping build step...
 Loading extension module transformer_inference...
 Time to load transformer_inference op: 0.27658915519714355 seconds
@@ -1248,17 +1248,17 @@ huggingface/tokenizers: The current process just got forked, after parallelism h
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module transformer_inference, skipping build step...
 Loading extension module transformer_inference...
 Time to load transformer_inference op: 0.3282308578491211 seconds
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module transformer_inference, skipping build step...
 Loading extension module transformer_inference...
 Time to load transformer_inference op: 0.8675589561462402 seconds
 *****************[end] Initialized Actor Model [end] (duration: 119.75s)******************
 *************************[start] Initializing Ref Model [start] **************************
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
@@ -1266,23 +1266,23 @@ Time to load utils op: 1.891409158706665 seconds
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
 Time to load utils op: 0.8250091075897217 seconds
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
 Time to load utils op: 0.0019402503967285156 seconds
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
 Time to load utils op: 0.0012993812561035156 seconds
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
 Time to load utils op: 0.0013997554779052734 seconds
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
 Time to load utils op: 0.0013012886047363281 seconds
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
 Time to load utils op: 0.0018343925476074219 seconds
@@ -1420,7 +1420,7 @@ Time to load utils op: 0.0018343925476074219 seconds
     "prescale_gradients": false, 
     "wall_clock_breakdown": false
 }
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
 Time to load utils op: 0.0015451908111572266 seconds
@@ -1431,7 +1431,7 @@ To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
 Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module fused_adam, skipping build step...
 Loading extension module fused_adam...
 Time to load fused_adam op: 0.0030541419982910156 seconds
@@ -1440,7 +1440,7 @@ To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
 Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module fused_adam, skipping build step...
 Loading extension module fused_adam...
 Time to load fused_adam op: 0.0025348663330078125 seconds
@@ -1449,7 +1449,7 @@ To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
 Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module fused_adam, skipping build step...
 Loading extension module fused_adam...
 Time to load fused_adam op: 0.0019736289978027344 seconds
@@ -1462,7 +1462,7 @@ To disable this warning, you can either:
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
 Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module fused_adam, skipping build step...
 Loading extension module fused_adam...
 Time to load fused_adam op: 0.0029854774475097656 seconds
@@ -1471,12 +1471,12 @@ To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
 Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module fused_adam, skipping build step...
 Loading extension module fused_adam...
 Time to load fused_adam op: 0.0022940635681152344 seconds
 Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module fused_adam, skipping build step...
 Loading extension module fused_adam...
 Time to load fused_adam op: 0.0029518604278564453 seconds
@@ -1485,15 +1485,15 @@ To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
 Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module fused_adam, skipping build step...
 Loading extension module fused_adam...
 Time to load fused_adam op: 0.002866029739379883 seconds
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
 Time to load utils op: 0.0012390613555908203 seconds
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
 Time to load utils op: 0.003458738327026367 seconds
@@ -1502,12 +1502,12 @@ To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
 Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module fused_adam, skipping build step...
 Loading extension module fused_adam...
 Time to load fused_adam op: 0.002884387969970703 seconds
 [2023-04-14 08:46:30,739] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed info: version=0.9.0, git-hash=unknown, git-branch=unknown
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
 Time to load utils op: 0.5726070404052734 seconds
@@ -1515,13 +1515,13 @@ Time to load utils op: 0.5726070404052734 seconds
 Rank: 6 partition count [8, 8] and sizes[(41365824, False), (33792, False)] 
 Rank: 7 partition count [8, 8] and sizes[(41365824, False), (33792, False)] 
 Rank: 1 partition count [8, 8] and sizes[(41365824, False), (33792, False)] 
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
 Time to load utils op: 0.0016791820526123047 seconds
 [2023-04-14 08:46:36,743] [INFO] [logging.py:96:log_dist] [Rank 0] Removing param_group that has no 'params' in the client Optimizer
 [2023-04-14 08:46:36,743] [INFO] [logging.py:96:log_dist] [Rank 0] Using client Optimizer as basic optimizer
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
 Time to load utils op: 0.002676725387573242 seconds
@@ -1532,15 +1532,15 @@ Time to load utils op: 0.002676725387573242 seconds
 [2023-04-14 08:46:36,763] [INFO] [stage_1_and_2.py:134:__init__] Allgather bucket size 500,000,000
 [2023-04-14 08:46:36,763] [INFO] [stage_1_and_2.py:135:__init__] CPU Offload: False
 [2023-04-14 08:46:36,763] [INFO] [stage_1_and_2.py:136:__init__] Round robin gradient partitioning: False
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
 Time to load utils op: 0.0006117820739746094 seconds
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
 Time to load utils op: 0.002028226852416992 seconds
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
 Time to load utils op: 0.0013179779052734375 seconds
@@ -1549,27 +1549,27 @@ Rank: 3 partition count [8, 8] and sizes[(41365824, False), (33792, False)]
 Rank: 4 partition count [8, 8] and sizes[(41365824, False), (33792, False)] 
 Rank: 5 partition count [8, 8] and sizes[(41365824, False), (33792, False)] 
 Rank: 2 partition count [8, 8] and sizes[(41365824, False), (33792, False)] 
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...Time to load utils op: 0.0013089179992675781 seconds
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...Time to load utils op: 0.0013089179992675781 seconds
 
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
 Time to load utils op: 0.0011861324310302734 seconds
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
-Time to load utils op: 0.0008509159088134766 secondsUsing /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Time to load utils op: 0.0008509159088134766 secondsUsing /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
 Time to load utils op: 0.0007336139678955078 seconds
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...Time to load utils op: 0.0008528232574462891 seconds
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...Time to load utils op: 0.0008528232574462891 seconds
 
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
@@ -1736,26 +1736,26 @@ Time to load utils op: 0.0012271404266357422 seconds
         "tp_gather_partition_size": 8
     }
 }
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
 Time to load utils op: 0.0017056465148925781 seconds
 *****************[end] Initialized Critic Model [end] (duration: 31.02s)******************
 ************************[start] Initializing Reward Model [start] ************************
 [2023-04-14 08:47:02,078] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed info: version=0.9.0, git-hash=unknown, git-branch=unknown
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
 Time to load utils op: 1.1240429878234863 seconds
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
 Time to load utils op: 0.42214012145996094 seconds
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
 Time to load utils op: 0.002139568328857422 seconds
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
 Time to load utils op: 0.0012676715850830078 seconds
@@ -1892,22 +1892,22 @@ Time to load utils op: 0.0012676715850830078 seconds
     "prescale_gradients": false, 
     "wall_clock_breakdown": false
 }
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
 Time to load utils op: 0.0011258125305175781 seconds
 *****************[end] Initialized Reward Model [end] (duration: 37.09s)******************
 ***** Running training *****
 Beginning of Epoch 1/1, Total Generation Batches 8260
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
 Time to load utils op: 0.009740114212036133 seconds
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
 Time to load utils op: 0.001196146011352539 seconds
-Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
+Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
 No modifications detected for re-loaded extension module utils, skipping build step...
 Loading extension module utils...
 Time to load utils op: 0.001905679702758789 seconds