Safetensors
gemma2
kirigayahitsugi commited on
Commit
2c08ca8
1 Parent(s): a31d0f4

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +33 -17
README.md CHANGED
@@ -39,7 +39,10 @@ import torch
39
  import torch.nn as nn
40
  from transformers import AutoConfig, AutoModel, AutoModelForCausalLM
41
  import torch.nn.functional as F
42
- from transformers import AutoTokenizer
 
 
 
43
 
44
  def get_tokenizer(pretrain, model, padding_side="left", use_fast=True):
45
  tokenizer = AutoTokenizer.from_pretrained(pretrain, trust_remote_code=True, use_fast=use_fast)
@@ -50,22 +53,17 @@ def get_tokenizer(pretrain, model, padding_side="left", use_fast=True):
50
  model.config.pad_token_id = tokenizer.pad_token_id
51
  return tokenizer
52
 
53
- def get_reward_model(base_causal_model, base_llm_model, is_general_preference: bool=False, add_prompt_head: bool=False, value_head_dim: int=2):
54
  class CustomRewardModel(base_causal_model):
55
 
56
  def __init__(self, config: AutoConfig):
57
  super().__init__(config)
58
  setattr(self, self.base_model_prefix, base_llm_model(config))
59
- if not is_general_preference:
60
- self.value_head = nn.Linear(config.hidden_size, 1, bias=False)
61
- else:
62
- self.value_head = nn.Linear(config.hidden_size, value_head_dim, bias=False)
63
- if add_prompt_head:
64
- self.prompt_head = nn.Linear(config.hidden_size, value_head_dim // 2, bias=False)
65
-
66
- self.is_general_preference = is_general_preference
67
 
68
- self.post_init()
 
 
69
 
70
  def custom_forward(
71
  self,
@@ -102,7 +100,7 @@ def get_reward_model(base_causal_model, base_llm_model, is_general_preference: b
102
  eos_indices = attention_mask.size(1) - 1 - attention_mask.long().fliplr().argmax(dim=1)
103
  eos_indices = eos_indices.unsqueeze(1) # Change shape to [batch_size, 1]
104
  reward_list = []
105
- for dim in range(value_head_dim):
106
  reward_list.append(values[:,:,dim].gather(dim=1, index=eos_indices))
107
  reward = torch.cat(reward_list, dim=1)
108
  reward = F.normalize(reward, p=2, dim=-1) # Shape will be [batch_size, value_head_dim]
@@ -156,11 +154,10 @@ def generate_high_dim_result_with_prompt(model, value_head_dim, chosen_reward, r
156
  return result
157
 
158
  class GPMPipeline:
159
- def __init__(self, model_name_or_path, device=torch.device("cuda:0"), is_general_preference: bool=True, add_prompt_head: bool=True, value_head_dim: int=2, bf16: bool=True, truncation: bool=True, max_length: int=4096, padding: bool=True, tau: float=0.1):
160
  self.device = device
161
  self.is_general_preference = is_general_preference
162
- self.add_prompt_head = add_prompt_head
163
- self.value_head_dim = value_head_dim
164
  self.truncation = truncation
165
  self.max_length = max_length
166
  self.padding = padding
@@ -170,7 +167,24 @@ class GPMPipeline:
170
  config._attn_implementation = "flash_attention_2"
171
  base_class = AutoModel._model_mapping[type(config)]
172
  base_causal_class = AutoModelForCausalLM._model_mapping.get(type(config), None)
173
- cls_class = get_reward_model(base_causal_class, base_class, is_general_preference, add_prompt_head, value_head_dim)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
 
175
  # configure model
176
  self.model = cls_class.from_pretrained(
@@ -179,6 +193,7 @@ class GPMPipeline:
179
  trust_remote_code=True,
180
  torch_dtype=torch.bfloat16 if bf16 else "auto",
181
  )
 
182
  # configure tokenizer
183
  self.tokenizer = get_tokenizer(model_name_or_path, self.model, "left", use_fast=True)
184
  self.tokenizer.truncation_side = "right"
@@ -249,12 +264,13 @@ context2 = [
249
  {"role": "assistant", "content": response2}
250
  ]
251
 
252
- rm = GPMPipeline("general-preference/GPM-Gemma-2-9B-it", value_head_dim=4)
253
 
254
  reward1, prompt_hidden_state = rm([context1], return_prompt=True)
255
  reward2 = rm([context2])
256
 
257
  result = generate_high_dim_result_with_prompt(rm.model, rm.value_head_dim, reward1, reward2, prompt_hidden_state)
 
258
 
259
  result_batch = result.float().cpu().detach().numpy().tolist()
260
 
 
39
  import torch.nn as nn
40
  from transformers import AutoConfig, AutoModel, AutoModelForCausalLM
41
  import torch.nn.functional as F
42
+ from transformers import AutoTokenizer
43
+ import os
44
+ from safetensors.torch import load_file
45
+ from huggingface_hub import snapshot_download
46
 
47
  def get_tokenizer(pretrain, model, padding_side="left", use_fast=True):
48
  tokenizer = AutoTokenizer.from_pretrained(pretrain, trust_remote_code=True, use_fast=use_fast)
 
53
  model.config.pad_token_id = tokenizer.pad_token_id
54
  return tokenizer
55
 
56
+ def get_reward_model(base_causal_model, base_llm_model, value_head_dim: int, add_prompt_head: bool, is_general_preference: bool=False):
57
  class CustomRewardModel(base_causal_model):
58
 
59
  def __init__(self, config: AutoConfig):
60
  super().__init__(config)
61
  setattr(self, self.base_model_prefix, base_llm_model(config))
62
+ self.is_general_preference = is_general_preference
 
 
 
 
 
 
 
63
 
64
+ self.value_head = nn.Linear(config.hidden_size, value_head_dim, bias=False)
65
+ if add_prompt_head:
66
+ self.prompt_head = nn.Linear(config.hidden_size, value_head_dim // 2, bias=False)
67
 
68
  def custom_forward(
69
  self,
 
100
  eos_indices = attention_mask.size(1) - 1 - attention_mask.long().fliplr().argmax(dim=1)
101
  eos_indices = eos_indices.unsqueeze(1) # Change shape to [batch_size, 1]
102
  reward_list = []
103
+ for dim in range(self.value_head.out_features):
104
  reward_list.append(values[:,:,dim].gather(dim=1, index=eos_indices))
105
  reward = torch.cat(reward_list, dim=1)
106
  reward = F.normalize(reward, p=2, dim=-1) # Shape will be [batch_size, value_head_dim]
 
154
  return result
155
 
156
  class GPMPipeline:
157
+ def __init__(self, model_name_or_path, device=torch.device("cuda:0"), is_general_preference: bool=True, bf16: bool=True, truncation: bool=True, max_length: int=4096, padding: bool=True, tau: float=0.1):
158
  self.device = device
159
  self.is_general_preference = is_general_preference
160
+
 
161
  self.truncation = truncation
162
  self.max_length = max_length
163
  self.padding = padding
 
167
  config._attn_implementation = "flash_attention_2"
168
  base_class = AutoModel._model_mapping[type(config)]
169
  base_causal_class = AutoModelForCausalLM._model_mapping.get(type(config), None)
170
+
171
+ try:
172
+ dir_path = snapshot_download(repo_id=model_name_or_path)
173
+ except Exception as e:
174
+ dir_path = model_name_or_path
175
+ combined_weights = {}
176
+ for filename in os.listdir(dir_path):
177
+ if filename.endswith(".safetensors"):
178
+ file_path = os.path.join(dir_path, filename)
179
+ weights = load_file(file_path)
180
+ combined_weights.update(weights)
181
+
182
+ if "value_head.weight" in combined_weights:
183
+ self.value_head_dim = combined_weights["value_head.weight"].shape[0]
184
+
185
+ self.add_prompt_head = True if "prompt_head.weight" in combined_weights else False
186
+
187
+ cls_class = get_reward_model(base_causal_class, base_class, add_prompt_head=self.add_prompt_head, value_head_dim=self.value_head_dim, is_general_preference=is_general_preference)
188
 
189
  # configure model
190
  self.model = cls_class.from_pretrained(
 
193
  trust_remote_code=True,
194
  torch_dtype=torch.bfloat16 if bf16 else "auto",
195
  )
196
+
197
  # configure tokenizer
198
  self.tokenizer = get_tokenizer(model_name_or_path, self.model, "left", use_fast=True)
199
  self.tokenizer.truncation_side = "right"
 
264
  {"role": "assistant", "content": response2}
265
  ]
266
 
267
+ rm = GPMPipeline("general-preference/GPM-Gemma-2-9B")
268
 
269
  reward1, prompt_hidden_state = rm([context1], return_prompt=True)
270
  reward2 = rm([context2])
271
 
272
  result = generate_high_dim_result_with_prompt(rm.model, rm.value_head_dim, reward1, reward2, prompt_hidden_state)
273
+ # score = result / rm.tau
274
 
275
  result_batch = result.float().cpu().detach().numpy().tolist()
276