|
# 经测试,此版本的效果较好😀 |
|
|
|
I use the 50k [Chinese data](https://huggingface.co/datasets/Chinese-Vicuna/instruct_chat_50k.jsonl), which is the combination of alpaca_chinese_instruction_dataset and the Chinese conversation data from sharegpt-90k data. I finetune the model for 3 epochs use a single 4090 GPU with cutoff_len=1024. |
|
|
|
**Use in Python**: |
|
|
|
from transformers import LlamaForCausalLM, LlamaTokenizer |
|
from peft import PeftModel |
|
import torch |
|
|
|
|
|
tokenizer = LlamaTokenizer.from_pretrained("decapoda-research/llama-7b-hf") |
|
|
|
model = LlamaForCausalLM.from_pretrained( |
|
"decapoda-research/llama-7b-hf", |
|
load_in_8bit=True, |
|
torch_dtype=torch.float16, |
|
device_map="auto", |
|
) |
|
model = PeftModel.from_pretrained( |
|
model, |
|
"Laurie/lora-instruct-chat-50k-cn-en", |
|
torch_dtype=torch.float16, |
|
device_map={'': 0} |
|
) |
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
inputs = tokenizer("什么是自然语言处理?",return_tensors="pt" ) |
|
|
|
model.to(device) |
|
|
|
with torch.no_grad(): |
|
inputs = {k: v.to(device) for k, v in inputs.items()} |
|
outputs = model.generate(input_ids=inputs["input_ids"], max_new_tokens=129) |
|
print(tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)) |
|
|