evan-nexusflow commited on
Commit
e71206c
·
verified ·
1 Parent(s): 60c338b

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +82 -0
README.md ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### Usage
2
+
3
+ ```python
4
+ from transformers import LlamaModel, LlamaPreTrainedModel, TextClassificationPipeline
5
+ from torch import nn
6
+ import torch
7
+ from typing import Dict
8
+
9
+ class AtheneForSequenceClassification(LlamaPreTrainedModel):
10
+ def __init__(self, config):
11
+ super().__init__(config)
12
+ self.model = LlamaModel(config)
13
+ self.v_head = nn.Linear(config.hidden_size, 1, bias=False)
14
+ self.CLS_ID = 128003
15
+ # Initialize weights and apply final processing
16
+ self.post_init()
17
+
18
+ def get_device(self):
19
+ return self.model.device
20
+
21
+ def forward(
22
+ self,
23
+ input_ids=None,
24
+ past_key_values=None,
25
+ attention_mask=None,
26
+ position_ids=None,
27
+ ):
28
+ transformer_outputs = self.model(
29
+ input_ids,
30
+ attention_mask=attention_mask,
31
+ position_ids=position_ids,
32
+ output_hidden_states=True,
33
+ )
34
+ hidden_states = transformer_outputs.hidden_states[-1]
35
+ scores = []
36
+ rewards = self.v_head(hidden_states).squeeze(-1)
37
+
38
+ bs = int(input_ids.shape[0])
39
+
40
+ for i in range(bs):
41
+ c_inds = (input_ids[i] == self.CLS_ID).nonzero()
42
+ c_ind = c_inds[-1].item()
43
+ scores.append(rewards[i, c_ind])
44
+ scores = torch.stack(scores)
45
+ return {"scores": scores}
46
+
47
+ # Make a pipeline to handle pre and post-processing
48
+ class AtheneRewardPipeline(TextClassificationPipeline):
49
+
50
+ def preprocess(self, inputs, **tokenizer_kwargs) -> Dict[str, torch.Tensor]:
51
+ return_tensors = self.framework
52
+
53
+ formatted = self.tokenizer.apply_chat_template(inputs, tokenize=False)
54
+
55
+ formatted = formatted + self.tokenizer.cls_token
56
+
57
+ return self.tokenizer(
58
+ formatted,
59
+ return_tensors=return_tensors,
60
+ max_length=4096,
61
+ padding="longest",
62
+ truncation=True,
63
+ )
64
+
65
+ def postprocess(self, model_outputs, function_to_apply=None, top_k=1, _legacy=True):
66
+ return model_outputs["scores"].cpu().float().item()
67
+
68
+ # Initialize the model
69
+ model = AtheneForSequenceClassification.from_pretrained("Nexusflow/Athene-RM-70B", torch_dtype=bfloat16)
70
+ tokenizer = AutoTokenizer.from_pretrained("Nexusflow/Athene-RM-70B")
71
+
72
+ # Initialize the pipeline
73
+ pipe = pipeline(
74
+ task="text-classification",
75
+ model=self.model,
76
+ tokenizer=self.tokenizer,
77
+ pipeline_class=AtheneRewardPipeline,
78
+ )
79
+
80
+
81
+
82
+ ```