--- library_name: transformers tags: [] --- # Model Card for b1ade-1b Instruction fine tuned 1B parameter model; pass in: 1. `context: <...>` 2. `question: <...>` and expect an `answer: <...>` See implemetation example below (also see https://huggingface.co/spaces/w601sxs/b1ade-1b): ``` import torch import transformers import os, time import tempfile from transformers import AutoTokenizer, AutoModelForCausalLM BASE_MODEL = "w601sxs/b1ade-1b-bf16" tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL) model = AutoModelForCausalLM.from_pretrained(BASE_MODEL, torch_dtype=torch.bfloat16, device_map="auto", offload_folder="offload") model.eval() from transformers import StoppingCriteria, AutoModelForCausalLM, AutoTokenizer, StoppingCriteriaList class KeywordsStoppingCriteria(StoppingCriteria): def __init__(self, keywords_ids:list): self.keywords = keywords_ids def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool: if input_ids[0][-1] in self.keywords: return True return False stop_words = ['>', ' >','> '] stop_ids = [tokenizer.encode(w)[0] for w in stop_words] stop_criteria = StoppingCriteriaList([KeywordsStoppingCriteria(keywords_ids = stop_ids)]) def predict(text): inputs = tokenizer(text, return_tensors="pt").to('cuda') with torch.no_grad(): outputs = model.generate(input_ids=inputs["input_ids"], max_new_tokens=128, stopping_criteria=stop_criteria) out_text = tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0].split("answer:")[-1] return print(out_text.split(text)[-1]) predict("context: \n question: \n") ```