up

Browse files

Files changed (6) hide show

model/dict.txt +0 -0
model/gpt2-merges.txt +0 -0
model/gpt2-vocab.json +0 -0
model/restored.pt +3 -0
run.sh +2 -0
run_model.py +65 -0

model/dict.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model/gpt2-merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model/gpt2-vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

model/restored.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:527a6e3e1b2446d6e9ecf436db8066430ef752935c1fd1ba9fa66f3cd8e307cd
+size 13317196205

run.sh ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ #!/usr/bin/env bash
2	+ CUDA_VISIBLE_DEVICES="0" torchrun run_model.py --pipeline-model-parallel-size 1 --tensor-model-parallel-size 1

run_model.py ADDED Viewed

	@@ -0,0 +1,65 @@

+#!/usr/bin/env python3
+#!/usr/bin/env python3
+import os
+from transformers import AutoTokenizer, GPT2Tokenizer
+from megatron.initialize import initialize_megatron
+from metaseq import checkpoint_utils
+import torch
+path = "./model"
+# just need to initialize args with something,
+# => doesn't need to correspond to the "correct" architecture for this checkpoint
+initialize_megatron(args_defaults={
+    "micro_batch_size": 1,
+    "num_layers": 12,
+    "hidden_size": 768,
+    "num_attention_heads": 12,
+    "max_position_embeddings": 2048,
+    "encoder_seq_length": 2048
+})
+vocab_file = os.path.join(path, "gpt2-vocab.json")
+merges_file = os.path.join(path, "gpt2-merges.txt")
+tokenizer = GPT2Tokenizer(vocab_file, merges_file)
+tokenizer.save_pretrained(path)
+checkpoint = checkpoint_utils.load_model_ensemble_and_task(
+    [os.path.join(path, "restored.pt")],
+    arg_overrides={
+        "vocab_filename": vocab_file,
+        "merges_filename": merges_file,
+    }
+)
+model = checkpoint[0][0].eval()
+model = model.cuda().half()
+# forward passes
+def single_batch_forward_logits(prompts):
+    input_ids = tokenizer(prompts, return_tensors="pt").input_ids
+    input_ids = torch.cat([torch.tensor([[0]]), input_ids], dim=-1)
+    input_ids = input_ids.cuda()
+    with torch.no_grad():
+        logits = model(input_ids)[0]
+    return logits
+prompts = [
+    "Today is a beautiful day and I want to",
+    "In the city of",
+    "Paris is the capital of France and",
+    "Computers and mobile phones have taken",
+]
+print("Next word generation")
+for prompt in prompts:
+    print("-------------")
+    print(f"Prompt: {prompt}...\n")
+    logits = single_batch_forward_logits(prompt)
+    pred_next_token = torch.argmax(logits[0, -1], -1)
+    next_token = tokenizer.convert_ids_to_tokens([pred_next_token])
+    next_token = next_token[0].replace("Ġ", "")
+    print(f"Next word: {next_token}")
+    print("-------------")