metadata
license: apache-2.0
tags:
- MDEL
Model Name
Multi-Domain-Expert-Layers/MDEL-theblackcat-chat-5-experts
Model Description
This model was generated by averaging the weights of the following models
- Multi-Domain-Expert-Layers/expert-pubmed_central
- Multi-Domain-Expert-Layers/expert-freelaw
- Multi-Domain-Expert-Layers/expert-github
- Multi-Domain-Expert-Layers/expert-uspto
- Multi-Domain-Expert-Layers/expert-arxiv
- theblackcat102/pythia-1b-deduped-sft
- We also keep a mixture that is primarily one of the above as an expert that can be loaded on demand.
NOTE: There is a mistake below where we are using a routed expert for pubmed-abstract, but we merged pubmed central
import os
try:
import transformers, fasttext, huggingface_hub
except:
os.system("pip install transformers huggingface_hub fasttext")
from transformers import AutoTokenizer, AutoModelForCausalLM
import fasttext
from huggingface_hub import hf_hub_download
import torch
from torch import nn
from transformers.models.gpt_neox.modeling_gpt_neox import GPTNeoXForCausalLM, GPTNeoXLayer
class GPTNeoXExpertsForCasualLM(GPTNeoXForCausalLM):
""" Stores various experts for layers 9, 10 """ # , 11
__expert_classifier = None
def __init__(self, config):
global __expert_classifier
super().__init__(config)
self.config = config
self.orig_chat = nn.ModuleList([GPTNeoXLayer(config) for _ in range(2)])
self.uspto_expert = nn.ModuleList([GPTNeoXLayer(config) for _ in range(2)])
self.github_expert = nn.ModuleList([GPTNeoXLayer(config) for _ in range(2)])
self.pubmed_abstracts_expert = nn.ModuleList([GPTNeoXLayer(config) for _ in range(2)])
self.freelaw_expert = nn.ModuleList([GPTNeoXLayer(config) for _ in range(2)])
self.arxiv_expert = nn.ModuleList([GPTNeoXLayer(config) for _ in range(2)])
self.merged_chat_expert = nn.ModuleList([GPTNeoXLayer(config) for _ in range(2)])
self.curr_expert = "MergedChat"
if GPTNeoXExpertsForCasualLM.__expert_classifier is None:
file_name = hf_hub_download(repo_id="Multi-Domain-Expert-Layers/MDEL-theblackcat-chat-5-experts", filename="expert_classify.ftz")
GPTNeoXExpertsForCasualLM.__expert_classifier = fasttext.load_model(file_name)
print (file_name)
def predict_expert(self, text):
"""
['__label__StackExchange',
'__label__PubMed-Abstracts',
'__label__Github',
'__label__USPTO-Backgrounds',
'__label__Pile-CC',
'__label__PubMed-Central',
'__label__OpenWebText2',
'__label__FreeLaw',
'__label__Wikipedia-(en)',
'__label__ArXiv',
'__label__DM-Mathematics',
'__label__NIH-ExPorter',
'__label__HackerNews',
'__label__Enron-Emails',
'__label__OpenSubtitles',
'__label__YoutubeSubtitles',
'__label__Books3',
'__label__EuroParl',
'__label__Gutenberg-(PG-19)',
'__label__PhilPapers',
'__label__BookCorpus2',
'__label__Ubuntu-IRC']
"""
text = text.replace("<human>: ", " ").replace("<bot>: ", " ").strip().replace("\n", " ").replace("\r", " ").replace(" ", " ")
answer = GPTNeoXExpertsForCasualLM.__expert_classifier.predict(text)
label = answer[0][0].replace("__label__", "")
score = answer[1][0]
return (label, score)
def generate_with_expert(self, text, tokenizer, expert="", return_answer_only=False, do_self_contrastive=True, max_length=128, min_length=1, max_return_sequences=1, do_sample=True, do_beam=False, device="cuda", target_lang=None):
"""Generates using one of the experts."""
tokenizer.pad_token = tokenizer.eos_token
if type(text) is str:
text = [text]
#hack - let's assume a single expert per batch
if not expert:
label, score = self.predict_expert(text[0])
if score > 0.8:
if "PubMed" in label or "FreeLaw" in label or "ArXiv" in label or "Github" in label or "USPTO" in label:
expert = label
else:
expert = "MergedChat"
else:
expert = "MergedChat"
if expert != self.curr_expert:
print ("Switching to expert", expert)
self.curr_expert = expert
for layer_id in range(2):
if expert == "OrigChat":
self.gpt_neox.layers[layer_id+9] = self.orig_chat[layer_id]
elif "USPTO" in expert:
self.gpt_neox.layers[layer_id+9] = self.uspto_expert[layer_id]
elif "Github" in expert:
self.gpt_neox.layers[layer_id+9] = self.github_expert[layer_id]
elif "PubMed" in expert:
self.gpt_neox.layers[layer_id+9] = self.pubmed_abstracts_expert[layer_id]
elif "ArXiv" in expert:
self.gpt_neox.layers[layer_id+9] = self.arxiv_expert[layer_id]
elif "FreeLaw" in expert:
self.gpt_neox.layers[layer_id+9] = self.freelaw_expert[layer_id]
else:
self.gpt_neox.layers[layer_id+9] = self.merged_chat_expert[layer_id]
text = [p.strip() for p in text]
input_ids = tokenizer(text, return_tensors='pt',padding=True, truncation=True, max_length=max_length )
input_ids = input_ids.to(device)
with torch.no_grad():
outputs = self.generate(
**input_ids,
max_length=max_length,
repetition_penalty=1.1,
min_length=min_length,
do_sample=True,
top_p=0.95,
penalty_alpha=0.6 if do_self_contrastive else None,
top_k=10,
)
ret = []
for i in range(len(outputs)): # can use batch_decode, unless we want to do something special here
out = tokenizer.decode(outputs[i], skip_special_tokens=True)
if return_answer_only:
out = out[len(text[i]):].lstrip(".? \n\t")
ret.append(out)
return ret
tokenizer = AutoTokenizer.from_pretrained("theblackcat102/pythia-1b-deduped-sft")
tokenizer.pad_token = tokenizer.eos_token
model = GPTNeoXExpertsForCasualLM.from_pretrained("Multi-Domain-Expert-Layers/MDEL-theblackcat-chat-5-experts").half().cuda().eval()
print ('##')
print (model.generate_with_expert("source code for sorting a list <bot>:", tokenizer) [0])
print ('##')
print (model.generate_with_expert("When was Abraham Lincoln born? <bot>:", tokenizer) [0])
print ('##')
print (model.generate_with_expert("Medical journal article about ovarian cancer <bot>:", tokenizer) [0])
Produces this output:
Warning : `load_model` does not return WordVectorModel or SupervisedModel any more, but a `FastText` object which is very similar.
/root/.cache/huggingface/hub/models--Multi-Domain-Expert-Layers--MDEL-theblackcat-chat-5-experts/snapshots/a8a5b15d85eb0342825063fa1d7b83465f9eefa6/expert_classify.ftz
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
##
Switching to expert Github
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
source code for sorting a list : sort( sort( sorted( sorted( sorted( sorted( sorted( sorted( [ x, y * x - 1 + y / y ])) ) ) )) ) ). unique(). sort( key ='sorted' ). sort_key( sort_key = 0, reverse = True ) # [ 5.1.2, 5.1.3, 3, 2, 1] asd, 6, 7 # { 0.4 } asd # [ 5.2.7, 4.5.4, 6
##
Switching to expert MergedChat
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
When was Abraham Lincoln born? : This is an interesting topic to be asked about and I will try to give a bit of information, but it is very important that you should read the book by Robert O'Brien (1834-1858). [Abraham Lincoln and His Family](_URL_0_) which I highly recommend. This is a biography of a man who lived during the American civil war and had been a US President for 8 years in 1860. There are quite a lot of books available on Lincoln's life and his family. It would help me if you were more specific as I only knew him
##
Switching to expert PubMed-Abstracts
Medical journal article about ovarian cancer :
"Cancer of the Ova" by Susan K. Hines, PhD - On April 10th, 2007, researchers from the National Cancer Institute discovered a mechanism called [Cullin Kin Reductions](_URL_0_) that can reduce and prevent this deadly disease in humans. This is called an "Cullin-R family". These proteins are involved in protein synthesis in the cell that is responsible for cellular maintenance and repair, and these two groups (Kin reduction and Mutagenesis) work on this process all the way to DNA replication during DNA synthesis. In a
To recreate the expert, modify this script. We can also extend to do dynamic merging and/or experitment with different weights for different layers.
def recreate_merged_expert():
model1 = GPTNeoXExpertsForCasualLM.from_pretrained("theblackcat102/pythia-1b-deduped-sft").float()
model2 = AutoModelForCausalLM.from_pretrained("stillerman/MDEL-pubmed-feelaw-github-arxiv").float()
model_uspto = AutoModelForCausalLM.from_pretrained("Multi-Domain-Expert-Layers/expert-uspto").float()
model_github = AutoModelForCausalLM.from_pretrained("Multi-Domain-Expert-Layers/expert-github").float()
model_pubmed_abstracts = AutoModelForCausalLM.from_pretrained("Multi-Domain-Expert-Layers/expert-pubmed_abstracts").float()
model_freelaw = AutoModelForCausalLM.from_pretrained("Multi-Domain-Expert-Layers/expert-freelaw").float()
model_arxiv = AutoModelForCausalLM.from_pretrained("Multi-Domain-Expert-Layers/expert-arxiv").float()
model = AutoModelForCausalLM.from_pretrained("theblackcat102/pythia-1b-deduped-sft").float() # half().cuda().eval()
with torch.no_grad():
for layer_id in [9,10]: #9,10,11,12,13
model1.orig_chat[layer_id-9] = model.gpt_neox.layers[layer_id]
for layer_id in [9,10]: #9,10,11,12,13
for p1, p2, p3 in zip(model1.gpt_neox.layers[layer_id].parameters(), model2.gpt_neox.layers[layer_id].parameters(), model_uspto.gpt_neox.layers[layer_id].parameters()):
p1.data = p1.data*.6 + p2.data*0.3 + p3.data*0.1
model1.merged_chat_expert[layer_id-9] = model1.gpt_neox.layers[layer_id]
#model1.uspto_expert.layers_9_10_11 = []
for layer_id in [9,10]: #9,10,11,12,13
for p1, p2 in zip(model_uspto.gpt_neox.layers[layer_id].parameters(), model.gpt_neox.layers[layer_id].parameters()):
p1.data = p1.data*.6 + p2.data*0.4
model1.uspto_expert[layer_id-9] = model_uspto.gpt_neox.layers[layer_id]
#model1.github_expert.layers_9_10_11 = []
for layer_id in [9,10]: #9,10,11,12,13
for p1, p2 in zip(model_github.gpt_neox.layers[layer_id].parameters(), model.gpt_neox.layers[layer_id].parameters()):
p1.data = p1.data*.6 + p2.data*0.4
model1.github_expert[layer_id-9] = model_github.gpt_neox.layers[layer_id]
#model1.pubmed_abstracts_expert.layers_9_10_11 = []
for layer_id in [9,10]: #9,10,11,12,13
for p1, p2 in zip(model_pubmed_abstracts.gpt_neox.layers[layer_id].parameters(), model.gpt_neox.layers[layer_id].parameters()):
p1.data = p1.data*.6 + p2.data*0.4
model1.pubmed_abstracts_expert[layer_id-9] = model_pubmed_abstracts.gpt_neox.layers[layer_id]
#model1.freelaw_expert.layers_9_10_11 = []
for layer_id in [9,10]: #9,10,11,12,13
for p1, p2 in zip(model_freelaw.gpt_neox.layers[layer_id].parameters(), model.gpt_neox.layers[layer_id].parameters()):
p1.data = p1.data*.6 + p2.data*0.4
model1.freelaw_expert[layer_id-9] = model_freelaw.gpt_neox.layers[layer_id]
#model1.arxiv_expert.layers_9_10_11 = []
for layer_id in [9,10]: #9,10,11,12,13
for p1, p2 in zip(model_arxiv.gpt_neox.layers[layer_id].parameters(), model.gpt_neox.layers[layer_id].parameters()):
p1.data = p1.data*.6 + p2.data*0.4
model1.arxiv_expert[layer_id-9] = model_arxiv.gpt_neox.layers[layer_id]
model1 = model1.half().eval()
model1.save_pretrained("MDEL-theblackcat-chat-5-experts", torch_dtype=torch.float16)
model1.push_to_hub("Multi-Domain-Expert-Layers/MDEL-theblackcat-chat-5-experts")
return model1