Ontocord.AI
commited on
Commit
•
6d25feb
1
Parent(s):
2c3d005
Update README.md
Browse files
README.md
CHANGED
@@ -21,227 +21,61 @@ This model was generated by averaging the weights of the following models
|
|
21 |
|
22 |
- [Try demo on colab](https://colab.research.google.com/drive/1GgB8H30L5r0N--gexdEweK5f1yJfxMd_?usp=sharing)
|
23 |
|
24 |
-
|
25 |
-
import os
|
26 |
-
try:
|
27 |
-
import transformers, fasttext, huggingface_hub
|
28 |
-
except:
|
29 |
-
os.system("pip install transformers huggingface_hub fasttext")
|
30 |
-
|
31 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM
|
32 |
-
import fasttext
|
33 |
-
from huggingface_hub import hf_hub_download
|
34 |
-
import torch
|
35 |
-
from torch import nn
|
36 |
-
from transformers.models.gpt_neox.modeling_gpt_neox import GPTNeoXForCausalLM, GPTNeoXLayer
|
37 |
-
|
38 |
-
class GPTNeoXExpertsForCasualLM(GPTNeoXForCausalLM):
|
39 |
-
""" Stores various experts for layers 9, 10 """ # , 11
|
40 |
-
__expert_classifier = None
|
41 |
-
|
42 |
-
def __init__(self, config):
|
43 |
-
global __expert_classifier
|
44 |
-
super().__init__(config)
|
45 |
-
self.config = config
|
46 |
-
self.orig_chat = nn.ModuleList([GPTNeoXLayer(config) for _ in range(2)])
|
47 |
-
self.uspto_expert = nn.ModuleList([GPTNeoXLayer(config) for _ in range(2)])
|
48 |
-
self.github_expert = nn.ModuleList([GPTNeoXLayer(config) for _ in range(2)])
|
49 |
-
self.pubmed_abstracts_expert = nn.ModuleList([GPTNeoXLayer(config) for _ in range(2)])
|
50 |
-
self.freelaw_expert = nn.ModuleList([GPTNeoXLayer(config) for _ in range(2)])
|
51 |
-
self.arxiv_expert = nn.ModuleList([GPTNeoXLayer(config) for _ in range(2)])
|
52 |
-
self.merged_chat_expert = nn.ModuleList([GPTNeoXLayer(config) for _ in range(2)])
|
53 |
-
self.curr_expert = "MergedChat"
|
54 |
-
if GPTNeoXExpertsForCasualLM.__expert_classifier is None:
|
55 |
-
file_name = hf_hub_download(repo_id="Multi-Domain-Expert-Layers/MDEL-theblackcat-chat-5-experts", filename="expert_classify.ftz")
|
56 |
-
GPTNeoXExpertsForCasualLM.__expert_classifier = fasttext.load_model(file_name)
|
57 |
-
print (file_name)
|
58 |
-
|
59 |
-
def predict_expert(self, text):
|
60 |
-
"""
|
61 |
-
['__label__StackExchange',
|
62 |
-
'__label__PubMed-Abstracts',
|
63 |
-
'__label__Github',
|
64 |
-
'__label__USPTO-Backgrounds',
|
65 |
-
'__label__Pile-CC',
|
66 |
-
'__label__PubMed-Central',
|
67 |
-
'__label__OpenWebText2',
|
68 |
-
'__label__FreeLaw',
|
69 |
-
'__label__Wikipedia-(en)',
|
70 |
-
'__label__ArXiv',
|
71 |
-
'__label__DM-Mathematics',
|
72 |
-
'__label__NIH-ExPorter',
|
73 |
-
'__label__HackerNews',
|
74 |
-
'__label__Enron-Emails',
|
75 |
-
'__label__OpenSubtitles',
|
76 |
-
'__label__YoutubeSubtitles',
|
77 |
-
'__label__Books3',
|
78 |
-
'__label__EuroParl',
|
79 |
-
'__label__Gutenberg-(PG-19)',
|
80 |
-
'__label__PhilPapers',
|
81 |
-
'__label__BookCorpus2',
|
82 |
-
'__label__Ubuntu-IRC']
|
83 |
-
"""
|
84 |
-
text = text.replace("<human>: ", " ").replace("<bot>: ", " ").strip().replace("\n", " ").replace("\r", " ").replace(" ", " ")
|
85 |
-
answer = GPTNeoXExpertsForCasualLM.__expert_classifier.predict(text)
|
86 |
-
label = answer[0][0].replace("__label__", "")
|
87 |
-
score = answer[1][0]
|
88 |
-
return (label, score)
|
89 |
-
|
90 |
-
def generate_with_expert(self, text, tokenizer, expert="", return_answer_only=False, do_self_contrastive=True, max_length=128, min_length=1, max_return_sequences=1, do_sample=True, do_beam=False, device="cuda", target_lang=None):
|
91 |
-
"""Generates using one of the experts."""
|
92 |
-
tokenizer.pad_token = tokenizer.eos_token
|
93 |
-
|
94 |
-
if type(text) is str:
|
95 |
-
text = [text]
|
96 |
-
#hack - let's assume a single expert per batch
|
97 |
-
if not expert:
|
98 |
-
label, score = self.predict_expert(text[0])
|
99 |
-
if "PubMed" in label or "FreeLaw" in label or "ArXiv" in label or "Github" in label or "USPTO" in label:
|
100 |
-
if score > 0.8:
|
101 |
-
expert = label
|
102 |
-
elif score > 0.6:
|
103 |
-
expert = "MergedChat"
|
104 |
-
else:
|
105 |
-
expert = "OrigChat"
|
106 |
-
else:
|
107 |
-
expert = "OrigChat"
|
108 |
-
if expert != self.curr_expert:
|
109 |
-
print ("Switching to expert", expert)
|
110 |
-
self.curr_expert = expert
|
111 |
-
for layer_id in range(2):
|
112 |
-
if expert == "OrigChat":
|
113 |
-
self.gpt_neox.layers[layer_id+9] = self.orig_chat[layer_id]
|
114 |
-
elif "USPTO" in expert:
|
115 |
-
self.gpt_neox.layers[layer_id+9] = self.uspto_expert[layer_id]
|
116 |
-
elif "Github" in expert:
|
117 |
-
self.gpt_neox.layers[layer_id+9] = self.github_expert[layer_id]
|
118 |
-
elif "PubMed" in expert:
|
119 |
-
self.gpt_neox.layers[layer_id+9] = self.pubmed_abstracts_expert[layer_id]
|
120 |
-
elif "ArXiv" in expert:
|
121 |
-
self.gpt_neox.layers[layer_id+9] = self.arxiv_expert[layer_id]
|
122 |
-
elif "FreeLaw" in expert:
|
123 |
-
self.gpt_neox.layers[layer_id+9] = self.freelaw_expert[layer_id]
|
124 |
-
else:
|
125 |
-
self.gpt_neox.layers[layer_id+9] = self.merged_chat_expert[layer_id]
|
126 |
-
text = [p.strip() for p in text]
|
127 |
-
input_ids = tokenizer(text, return_tensors='pt',padding=True, truncation=True, max_length=max_length )
|
128 |
-
input_ids = input_ids.to(device)
|
129 |
-
with torch.no_grad():
|
130 |
-
outputs = self.generate(
|
131 |
-
**input_ids,
|
132 |
-
max_length=max_length,
|
133 |
-
repetition_penalty=1.1,
|
134 |
-
min_length=min_length,
|
135 |
-
do_sample=True,
|
136 |
-
top_p=0.95,
|
137 |
-
penalty_alpha=0.6 if do_self_contrastive else None,
|
138 |
-
top_k=10,
|
139 |
-
)
|
140 |
-
ret = []
|
141 |
-
for i in range(len(outputs)): # can use batch_decode, unless we want to do something special here
|
142 |
-
out = tokenizer.decode(outputs[i], skip_special_tokens=True)
|
143 |
-
if return_answer_only:
|
144 |
-
out = out[len(text[i]):].lstrip(".? \n\t")
|
145 |
-
ret.append(out)
|
146 |
-
|
147 |
-
return ret
|
148 |
-
|
149 |
-
tokenizer = AutoTokenizer.from_pretrained("theblackcat102/pythia-1b-deduped-sft")
|
150 |
-
|
151 |
-
tokenizer.pad_token = tokenizer.eos_token
|
152 |
-
|
153 |
-
model = GPTNeoXExpertsForCasualLM.from_pretrained("Multi-Domain-Expert-Layers/MDEL-theblackcat-chat-5-experts").half().cuda().eval()
|
154 |
-
|
155 |
-
print ('##')
|
156 |
-
print (model.generate_with_expert("source code for sorting a list <bot>:", tokenizer) [0])
|
157 |
-
print ('##')
|
158 |
-
print (model.generate_with_expert("When was Abraham Lincoln born? <bot>:", tokenizer) [0])
|
159 |
-
print ('##')
|
160 |
-
print (model.generate_with_expert("Medical journal article about ovarian cancer <bot>:", tokenizer) [0])
|
161 |
-
```
|
162 |
-
|
163 |
-
## Produces this output:
|
164 |
|
165 |
```
|
|
|
|
|
|
|
166 |
##
|
167 |
-
Switching to expert
|
168 |
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
|
169 |
-
|
170 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
##
|
172 |
-
Switching to expert
|
173 |
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
|
174 |
-
|
175 |
|
176 |
-
|
177 |
##
|
178 |
Switching to expert PubMed-Abstracts
|
179 |
-
|
|
|
|
|
|
|
|
|
|
|
180 |
|
181 |
-
In the 1990s and early 2000, there was an increase in the number of new patients with non-ovarian malignancy seen at the National Hospital for Women's Services (Nish) Cancer Screening Service. It is likely that the increase came from a screening programme in the Northern Ireland Health and Care Plan, where people who are not in employment were offered cancer screening by a group of local health care practitioners or nurses. This approach would be appropriate outside the Northern Ireland Cancer Screening Programme and I suspect it was the practice
|
182 |
```
|
183 |
|
184 |
-
|
|
|
185 |
|
186 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
187 |
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
model_github = AutoModelForCausalLM.from_pretrained("Multi-Domain-Expert-Layers/expert-github").float()
|
196 |
-
model_pubmed_abstracts = AutoModelForCausalLM.from_pretrained("Multi-Domain-Expert-Layers/expert-pubmed_abstracts").float()
|
197 |
-
model_freelaw = AutoModelForCausalLM.from_pretrained("Multi-Domain-Expert-Layers/expert-freelaw").float()
|
198 |
-
model_arxiv = AutoModelForCausalLM.from_pretrained("Multi-Domain-Expert-Layers/expert-arxiv").float()
|
199 |
-
|
200 |
-
model = AutoModelForCausalLM.from_pretrained("theblackcat102/pythia-1b-deduped-sft").float() # half().cuda().eval()
|
201 |
-
|
202 |
-
with torch.no_grad():
|
203 |
-
for layer_id in [9,10]: #9,10,11,12,13
|
204 |
-
model1.orig_chat[layer_id-9] = model.gpt_neox.layers[layer_id]
|
205 |
-
|
206 |
-
for layer_id in [9,10]: #9,10,11,12,13
|
207 |
-
for p1, p2, p3 in zip(model1.gpt_neox.layers[layer_id].parameters(), model2.gpt_neox.layers[layer_id].parameters(), model_uspto.gpt_neox.layers[layer_id].parameters()):
|
208 |
-
p1.data = p1.data*.6 + p2.data*0.3 + p3.data*0.1
|
209 |
-
model1.merged_chat_expert[layer_id-9] = model1.gpt_neox.layers[layer_id]
|
210 |
-
|
211 |
-
#model1.uspto_expert.layers_9_10_11 = []
|
212 |
-
for layer_id in [9,10]: #9,10,11,12,13
|
213 |
-
for p1, p2 in zip(model_uspto.gpt_neox.layers[layer_id].parameters(), model.gpt_neox.layers[layer_id].parameters()):
|
214 |
-
p1.data = p1.data*.6 + p2.data*0.4
|
215 |
-
model1.uspto_expert[layer_id-9] = model_uspto.gpt_neox.layers[layer_id]
|
216 |
-
|
217 |
-
#model1.github_expert.layers_9_10_11 = []
|
218 |
-
for layer_id in [9,10]: #9,10,11,12,13
|
219 |
-
for p1, p2 in zip(model_github.gpt_neox.layers[layer_id].parameters(), model.gpt_neox.layers[layer_id].parameters()):
|
220 |
-
p1.data = p1.data*.6 + p2.data*0.4
|
221 |
-
model1.github_expert[layer_id-9] = model_github.gpt_neox.layers[layer_id]
|
222 |
-
|
223 |
-
#model1.pubmed_abstracts_expert.layers_9_10_11 = []
|
224 |
-
for layer_id in [9,10]: #9,10,11,12,13
|
225 |
-
for p1, p2 in zip(model_pubmed_abstracts.gpt_neox.layers[layer_id].parameters(), model.gpt_neox.layers[layer_id].parameters()):
|
226 |
-
p1.data = p1.data*.6 + p2.data*0.4
|
227 |
-
model1.pubmed_abstracts_expert[layer_id-9] = model_pubmed_abstracts.gpt_neox.layers[layer_id]
|
228 |
-
|
229 |
-
#model1.freelaw_expert.layers_9_10_11 = []
|
230 |
-
for layer_id in [9,10]: #9,10,11,12,13
|
231 |
-
for p1, p2 in zip(model_freelaw.gpt_neox.layers[layer_id].parameters(), model.gpt_neox.layers[layer_id].parameters()):
|
232 |
-
p1.data = p1.data*.6 + p2.data*0.4
|
233 |
-
model1.freelaw_expert[layer_id-9] = model_freelaw.gpt_neox.layers[layer_id]
|
234 |
-
|
235 |
-
#model1.arxiv_expert.layers_9_10_11 = []
|
236 |
-
for layer_id in [9,10]: #9,10,11,12,13
|
237 |
-
for p1, p2 in zip(model_arxiv.gpt_neox.layers[layer_id].parameters(), model.gpt_neox.layers[layer_id].parameters()):
|
238 |
-
p1.data = p1.data*.6 + p2.data*0.4
|
239 |
-
model1.arxiv_expert[layer_id-9] = model_arxiv.gpt_neox.layers[layer_id]
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
model1 = model1.half().eval()
|
244 |
-
model1.save_pretrained("MDEL-theblackcat-chat-5-experts", torch_dtype=torch.float16)
|
245 |
-
model1.push_to_hub("Multi-Domain-Expert-Layers/MDEL-theblackcat-chat-5-experts")
|
246 |
-
return model1
|
247 |
-
```
|
|
|
21 |
|
22 |
- [Try demo on colab](https://colab.research.google.com/drive/1GgB8H30L5r0N--gexdEweK5f1yJfxMd_?usp=sharing)
|
23 |
|
24 |
+
## Using the automatic routing:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
```
|
27 |
+
#Example with reverse=False:
|
28 |
+
|
29 |
+
[2, 1, -1, -2, -3, -4, -5, -6, -7, -8, -9, -
|
30 |
##
|
31 |
+
Switching to expert USPTO-Backgrounds
|
32 |
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
|
33 |
+
Give me a patent for shoes made of water :
|
34 |
+
Field of the Invention: A shoe that is waterproof and water resistant.
|
35 |
+
|
36 |
+
Description: The shoe is waterproof and resistant to water, but it can be easily washed with soap or water. It also has a rubber-like coating on the bottom to make it waterproof. The rubber-like coating helps the shoe to stay dry when wet.
|
37 |
+
|
38 |
+
The rubber-like coating is used in the lining of a shoe to keep the shoe from getting damaged by water. The rubber-coated lining prevents the shoe from being scratched by any sharp objects.
|
39 |
+
|
40 |
+
The
|
41 |
##
|
42 |
+
Switching to expert FreeLaw
|
43 |
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
|
44 |
+
Court case of Nguyen vs. State of California : The California Supreme Court ruled that the "unreasonable search" clause in the federal constitution is not a violation of the Fourth Amendment, but rather a violation of the Fifth Amendment.
|
45 |
|
46 |
+
The court said that the "unconstitutional search" clause in Article I, Section 20, of the Constitution, which says that any person may be arrested without a warrant and without a warrant for any purpose, except when there is probable cause to believe that they have committed a crime, is a violation of the 5th Amendment, which says that no person shall be arrested without a *w
|
47 |
##
|
48 |
Switching to expert PubMed-Abstracts
|
49 |
+
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
|
50 |
+
Medical journal article about ovarian cancer :
|
51 |
+
|
52 |
+
_URL_0_
|
53 |
+
|
54 |
+
This is a very common thing in the medical field. It's called "endometrial cancer" and it's not actually a cancer, but it's a benign tumor that can be removed by a simple surgical procedure. The reason this is so common is because there are many different types of cancers that can cause this. There are two main types of cancer that I think are most commonly known as "endometrioid" and "endometroid". Endometrioid cancer is the one you're talking about here.
|
55 |
|
|
|
56 |
```
|
57 |
|
58 |
+
|
59 |
+
## Using the original chat model:
|
60 |
|
61 |
```
|
62 |
+
Compared to original chat model
|
63 |
+
##
|
64 |
+
Switching to expert OrigChat
|
65 |
+
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
|
66 |
+
source code for sorting a list :
|
67 |
+
sort(list, key=lambda x: x[1], reverse=True) #Warning: this code generation is experimental. Please inspect the code for any bugs before executing.
|
68 |
+
##
|
69 |
+
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
|
70 |
+
Give me a patent for shoes made of water :
|
71 |
+
Field of the Invention: The invention relates to a shoe with a sole and an upper, wherein the sole is formed by a first layer of material and the upper is formed by a second layer of material.
|
72 |
+
##
|
73 |
+
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
|
74 |
+
Court case of Nguyen vs. State of California :
|
75 |
|
76 |
+
The court ruled that the state's ban on same-sex marriage was unconstitutional, and that it had to be overturned by a higher court. The ruling came in a lawsuit brought by two gay men who were denied the right to marry because of their sexual orientation.
|
77 |
+
##
|
78 |
+
Medical journal article about ovarian cancer :
|
79 |
+
|
80 |
+
_URL_0_
|
81 |
+
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|