Uploaded model
- Developed by: ak0327
- License: apache-2.0
- Finetuned from model : llm-jp/llm-jp-3-13b
This llama model was trained 2x faster with Unsloth and Huggingface's TRL library.
How to use
def load_model(model_name):
# QLoRA config
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
bnb_4bit_use_double_quant=False,
)
# Load model
model = AutoModelForCausalLM.from_pretrained(
model_name,
quantization_config=bnb_config,
device_map="auto",
token=HF_TOKEN
)
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(
model_name,
trust_remote_code=True,
token=HF_TOKEN
)
return model, tokenizer
def inference(datasets, model, tokenizer):
_results = []
for data in tqdm(datasets):
input = data["input"]
prompt = f"""### 指示
{input}
### 回答:
"""
encoded_input = tokenizer.encode_plus(
prompt,
add_special_tokens=False,
return_tensors="pt",
padding=True,
truncation=True,
).to(model.device)
tokenized_input = encoded_input["input_ids"]
attention_mask = encoded_input["attention_mask"]
with torch.no_grad():
outputs = model.generate(
tokenized_input,
attention_mask=attention_mask,
max_new_tokens=100,
do_sample=False,
repetition_penalty=1.2,
pad_token_id=tokenizer.pad_token_id
)[0]
output = tokenizer.decode(
outputs[tokenized_input.size(1):],
skip_special_tokens=True
)
_results.append({
"task_id": data["task_id"],
"input": input,
"output": output
})
return _results
model_name = "ak0327/llm-jp-3-13b-finetune-2"
model, tokenizer = load_model(model_name)
datasets = load_test_datasets() # your datasets
results = inference(model_name, datasets, model, tokenizer)
Model tree for ak0327/llm-jp-3-13b-finetune-2
Base model
llm-jp/llm-jp-3-13b