Hilbertmeng commited on
Commit
ad52ebd
·
1 Parent(s): b85543c

fix typo & dtype

Browse files
Files changed (3) hide show
  1. README.md +5 -4
  2. generation_demo.py +5 -4
  3. modeling_muddformer.py +1 -1
README.md CHANGED
@@ -30,13 +30,14 @@ import os
30
  os.environ['TOKENIZERS_PARALLELISM'] = 'false'
31
 
32
  device = torch.device('cuda:0')
 
33
  MAX_BATCH_SIZE = 1
34
  MAX_SEQ_LENGTH = 2048
35
  NUM_TOKENS_TO_GENERATE = 10
36
  COMPILE = True
37
- OPTIMIZED_COMPPILE = False
38
 
39
- if OPTIMIZED_COMPPILE:
40
  import torch._dynamo.config
41
  import torch._inductor.config
42
  torch._dynamo.config.cache_size_limit = 64
@@ -47,9 +48,9 @@ if OPTIMIZED_COMPPILE:
47
  tokenizer = AutoTokenizer.from_pretrained("Caiyun-AI/MUDDFormer-2.8B")
48
  model = AutoModelForCausalLM.from_pretrained("Caiyun-AI/MUDDFormer-2.8B", trust_remote_code=True)
49
 
50
- _ = model.to(device=device,dtype=torch.bfloat16)
51
  with torch.device(device):
52
- model.setup_caches(max_batch_size=MAX_BATCH_SIZE, max_seq_length=MAX_SEQ_LENGTH)
53
 
54
  def decode_one_token(model, cur_token, input_pos):
55
  logits = model(cur_token, input_pos=input_pos, return_tensor=True)
 
30
  os.environ['TOKENIZERS_PARALLELISM'] = 'false'
31
 
32
  device = torch.device('cuda:0')
33
+ dtype = torch.bfloat16
34
  MAX_BATCH_SIZE = 1
35
  MAX_SEQ_LENGTH = 2048
36
  NUM_TOKENS_TO_GENERATE = 10
37
  COMPILE = True
38
+ OPTIMIZED_COMPILE = False
39
 
40
+ if OPTIMIZED_COMPILE:
41
  import torch._dynamo.config
42
  import torch._inductor.config
43
  torch._dynamo.config.cache_size_limit = 64
 
48
  tokenizer = AutoTokenizer.from_pretrained("Caiyun-AI/MUDDFormer-2.8B")
49
  model = AutoModelForCausalLM.from_pretrained("Caiyun-AI/MUDDFormer-2.8B", trust_remote_code=True)
50
 
51
+ _ = model.to(device=device,dtype=dtype)
52
  with torch.device(device):
53
+ model.setup_caches(max_batch_size=MAX_BATCH_SIZE, max_seq_length=MAX_SEQ_LENGTH, dtype=dtype)
54
 
55
  def decode_one_token(model, cur_token, input_pos):
56
  logits = model(cur_token, input_pos=input_pos, return_tensor=True)
generation_demo.py CHANGED
@@ -6,13 +6,14 @@ import os
6
  os.environ['TOKENIZERS_PARALLELISM'] = 'false'
7
 
8
  device = torch.device('cuda:0')
 
9
  MAX_BATCH_SIZE = 1
10
  MAX_SEQ_LENGTH = 2048
11
  NUM_TOKENS_TO_GENERATE = 10
12
  COMPILE = True
13
- OPTIMIZED_COMPPILE = False
14
 
15
- if OPTIMIZED_COMPPILE:
16
  import torch._dynamo.config
17
  import torch._inductor.config
18
  torch._dynamo.config.cache_size_limit = 64
@@ -23,9 +24,9 @@ if OPTIMIZED_COMPPILE:
23
  tokenizer = AutoTokenizer.from_pretrained("Caiyun-AI/MUDDFormer-2.8B")
24
  model = AutoModelForCausalLM.from_pretrained("Caiyun-AI/MUDDFormer-2.8B", trust_remote_code=True)
25
 
26
- _ = model.to(device=device,dtype=torch.bfloat16)
27
  with torch.device(device):
28
- model.setup_caches(max_batch_size=MAX_BATCH_SIZE, max_seq_length=MAX_SEQ_LENGTH)
29
 
30
  def decode_one_token(model, cur_token, input_pos):
31
  logits = model(cur_token, input_pos=input_pos, return_tensor=True)
 
6
  os.environ['TOKENIZERS_PARALLELISM'] = 'false'
7
 
8
  device = torch.device('cuda:0')
9
+ dtype = torch.bfloat16
10
  MAX_BATCH_SIZE = 1
11
  MAX_SEQ_LENGTH = 2048
12
  NUM_TOKENS_TO_GENERATE = 10
13
  COMPILE = True
14
+ OPTIMIZED_COMPILE = False
15
 
16
+ if OPTIMIZED_COMPILE:
17
  import torch._dynamo.config
18
  import torch._inductor.config
19
  torch._dynamo.config.cache_size_limit = 64
 
24
  tokenizer = AutoTokenizer.from_pretrained("Caiyun-AI/MUDDFormer-2.8B")
25
  model = AutoModelForCausalLM.from_pretrained("Caiyun-AI/MUDDFormer-2.8B", trust_remote_code=True)
26
 
27
+ _ = model.to(device=device,dtype=dtype)
28
  with torch.device(device):
29
+ model.setup_caches(max_batch_size=MAX_BATCH_SIZE, max_seq_length=MAX_SEQ_LENGTH, dtype=dtype)
30
 
31
  def decode_one_token(model, cur_token, input_pos):
32
  logits = model(cur_token, input_pos=input_pos, return_tensor=True)
modeling_muddformer.py CHANGED
@@ -119,7 +119,7 @@ class MUDDFormer(PreTrainedModel):
119
  self.max_batch_size = max_batch_size
120
  if not self.config.is_training:
121
  if self.use_layer_cache:
122
- self.layer_cache = LayerCache(max_batch_size, self.config.n_layer, self.config.dim)
123
  for b in self.layers:
124
  b.attention.kv_cache = KVCache(max_batch_size, max_seq_length, self.config.n_local_heads, head_dim, dtype=dtype)
125
 
 
119
  self.max_batch_size = max_batch_size
120
  if not self.config.is_training:
121
  if self.use_layer_cache:
122
+ self.layer_cache = LayerCache(max_batch_size, self.config.n_layer, self.config.dim, dtype=dtype)
123
  for b in self.layers:
124
  b.attention.kv_cache = KVCache(max_batch_size, max_seq_length, self.config.n_local_heads, head_dim, dtype=dtype)
125