add model

Browse files

Files changed (7) hide show

config.json +30 -0
model/merges.txt +0 -0
model/special_tokens_map.json +1 -0
model/tokenizer_config.json +1 -0
model/vocab.json +0 -0
run.sh +1 -1
run_model.py +32 -8

config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "activation_dropout": 0.0,
+  "activation_function": "relu",
+  "architectures": [
+    "OPTModel"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "d_model": 4096,
+  "decoder_layernorm": false,
+  "decoder_start_token_id": 2,
+  "dropout": 0.1,
+  "eos_token_id": 2,
+  "ffn_dim": 16384,
+  "init_std": 0.02,
+  "layerdrop": 0.0,
+  "max_position_embeddings": 2048,
+  "model_type": "opt",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "output_projection": true,
+  "pad_token_id": 1,
+  "scale_embedding": false,
+  "share_input_output_embed": true,
+  "torch_dtype": "float16",
+  "transformers_version": "4.19.0.dev0",
+  "use_cache": false,
+  "vocab_size": 50272,
+  "word_embed_proj_dim": 4096
+}

model/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"bos_token": {"content": "<\|endoftext\|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "<\|endoftext\|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "<\|endoftext\|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}}

model/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"errors": "replace", "unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "tokenizer_class": "GPT2Tokenizer"}

model/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run.sh CHANGED Viewed

	@@ -1,2 +1,2 @@
1	#!/usr/bin/env bash
2	- CUDA_VISIBLE_DEVICES="0" torchrun run_model.py --pipeline-model-parallel-size 1 --tensor-model-parallel-size 1


1	#!/usr/bin/env bash
2	+ CUDA_VISIBLE_DEVICES="0,3" torchrun run_model.py --pipeline-model-parallel-size 1 --tensor-model-parallel-size 1

run_model.py CHANGED Viewed

@@ -1,9 +1,9 @@
 #!/usr/bin/env python3
-#!/usr/bin/env python3
 import os
 from transformers import AutoTokenizer, GPT2Tokenizer
 from megatron.initialize import initialize_megatron
 from metaseq import checkpoint_utils
 import torch
 path = "./model"
@@ -34,32 +34,56 @@ checkpoint = checkpoint_utils.load_model_ensemble_and_task(
 )
 model = checkpoint[0][0].eval()
-model = model.cuda().half()
 # forward passes
 def single_batch_forward_logits(prompts):
     input_ids = tokenizer(prompts, return_tensors="pt").input_ids
     input_ids = torch.cat([torch.tensor([[0]]), input_ids], dim=-1)
-    input_ids = input_ids.cuda()
     with torch.no_grad():
         logits = model(input_ids)[0]
     return logits
 prompts = [
-    "Today is a beautiful day and I want to",
-    "In the city of",
-    "Paris is the capital of France and",
-    "Computers and mobile phones have taken",
 ]
 print("Next word generation")
 for prompt in prompts:
     print("-------------")
     print(f"Prompt: {prompt}...\n")
-    logits = single_batch_forward_logits(prompt)
     pred_next_token = torch.argmax(logits[0, -1], -1)
     next_token = tokenizer.convert_ids_to_tokens([pred_next_token])
     next_token = next_token[0].replace("Ġ", "")
     print(f"Next word: {next_token}")
     print("-------------")

 #!/usr/bin/env python3
 import os
 from transformers import AutoTokenizer, GPT2Tokenizer
 from megatron.initialize import initialize_megatron
 from metaseq import checkpoint_utils
+from transformers import OPTForCausalLM
 import torch
 path = "./model"
 )
 model = checkpoint[0][0].eval()
+model = model.to("cuda:0").half()
+hf_model = OPTForCausalLM.from_pretrained("../opt-6.7b").to("cuda:1").half()
 # forward passes
 def single_batch_forward_logits(prompts):
     input_ids = tokenizer(prompts, return_tensors="pt").input_ids
     input_ids = torch.cat([torch.tensor([[0]]), input_ids], dim=-1)
+    input_ids = input_ids.to("cuda:0")
     with torch.no_grad():
         logits = model(input_ids)[0]
     return logits
+# forward hf
+def forward_hf(prompts):
+    input_ids = tokenizer(prompts, return_tensors="pt").input_ids
+    input_ids = torch.cat([torch.tensor([[0]]), input_ids], dim=-1)
+    input_ids = input_ids.to("cuda:1")
+    with torch.no_grad():
+        logits = hf_model(input_ids)[0]
+    return logits
 prompts = [
+   "Today is a beautiful day and I want to",
+   "In the city of",
+   "Paris is the capital of France and",
+   "Computers and mobile phones have taken",
 ]
+prompts = [
+   "Today is a beautiful day and I want to",
+]
+#import ipdb; ipdb.set_trace()
 print("Next word generation")
 for prompt in prompts:
     print("-------------")
     print(f"Prompt: {prompt}...\n")
+    logits_fsq = single_batch_forward_logits(prompt)
+    pred_next_token = torch.argmax(logits_fsq[0, -1], -1)
+    next_token = tokenizer.convert_ids_to_tokens([pred_next_token])
+    next_token = next_token[0].replace("Ġ", "")
+    print(f"Next word: {next_token}")
+    print("-------------")
+    logits = forward_hf(prompt)
     pred_next_token = torch.argmax(logits[0, -1], -1)
     next_token = tokenizer.convert_ids_to_tokens([pred_next_token])
     next_token = next_token[0].replace("Ġ", "")
     print(f"Next word: {next_token}")
     print("-------------")
+torch.allclose(logits_fsq.cpu(), logits.cpu(), atol=1e-3)