Hyde LLaMa 2 7B Legal
Model Details
Backbone Model: meta-llama/Llama-2-7b-chat
Input: Models input text only.
Output: Models generate text only.
Inference
def hyde_gen(
topic:str,
model:object,
tokenizer:object,
device:object
):
prompt = (
f"Write legal facts about the following topic:\n{topic}\n"
)
len_prompt = len(prompt)
output = model.generate(
**tokenizer(prompt,
return_tensors="pt",
return_token_type_ids=False).to(device
),
max_new_tokens=300,
early_stopping=True,
do_sample=True,
top_k=10,
top_p=0.98,
no_repeat_ngram_size=3,
eos_token_id=2,
repetition_penalty=1.1,
num_beams=3,
)
return tokenizer.decode(output[0])[len_prompt:]
def hyde_infer(input_topic):
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
model_pth = "akkshay/hyde-llama-7b"
model = AutoModelForCausalLM.from_pretrained(
model_pth,
device_map={"": 0},
torch_dtype=torch.float16,
low_cpu_mem_usage=True
)
tokenizer = AutoTokenizer.from_pretrained(model_pth)
model.eval()
model.config.use_cache = (True)
tokenizer.pad_token = tokenizer.eos_token
output = hyde_gen(
topic=input_topic,
model=model,
tokenizer=tokenizer,
device=device
)
return output
if __name__ == "__main__":
fact = hyde_infer("VW emissions scandal")
print(fact)
Since Hyde Llama 2 uses 'FastTokenizer' provided by HF tokenizers NOT sentencepiece package, it is required to use use_fast=True
option when we initialize the tokenizer.
Lastly Apple M1/M2 chips does not support BF16 computing, so use CPU instead.
- Downloads last month
- 10
Inference Providers
NEW
This model is not currently available via any of the supported third-party Inference Providers, and
the model is not deployed on the HF Inference API.