Update README.md
Browse files
README.md
CHANGED
@@ -17,20 +17,20 @@ This checkpoint is trained with: https://github.com/hiyouga/LLaMA-Efficient-Tuni
|
|
17 |
Usage:
|
18 |
|
19 |
```python
|
20 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
21 |
from peft import PeftModel
|
22 |
|
|
|
23 |
tokenizer = AutoTokenizer.from_pretrained("baichuan-inc/baichuan-7B", trust_remote_code=True)
|
24 |
model = AutoModelForCausalLM.from_pretrained("baichuan-inc/baichuan-7B", device_map="auto", trust_remote_code=True)
|
25 |
model = PeftModel.from_pretrained(model, "hiyouga/baichuan-7b-sft")
|
|
|
26 |
|
27 |
query = "晚上睡不着怎么办"
|
28 |
|
29 |
inputs = tokenizer(["<human>:{}\n<bot>:".format(query)], return_tensors="pt")
|
30 |
inputs = inputs.to("cuda")
|
31 |
-
generate_ids = model.generate(**inputs)
|
32 |
-
output = tokenizer.batch_decode(generate_ids)[0]
|
33 |
-
print(output)
|
34 |
```
|
35 |
|
36 |
You could also alternatively launch a CLI demo by using the script in https://github.com/hiyouga/LLaMA-Efficient-Tuning
|
|
|
17 |
Usage:
|
18 |
|
19 |
```python
|
20 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
|
21 |
from peft import PeftModel
|
22 |
|
23 |
+
|
24 |
tokenizer = AutoTokenizer.from_pretrained("baichuan-inc/baichuan-7B", trust_remote_code=True)
|
25 |
model = AutoModelForCausalLM.from_pretrained("baichuan-inc/baichuan-7B", device_map="auto", trust_remote_code=True)
|
26 |
model = PeftModel.from_pretrained(model, "hiyouga/baichuan-7b-sft")
|
27 |
+
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
|
28 |
|
29 |
query = "晚上睡不着怎么办"
|
30 |
|
31 |
inputs = tokenizer(["<human>:{}\n<bot>:".format(query)], return_tensors="pt")
|
32 |
inputs = inputs.to("cuda")
|
33 |
+
generate_ids = model.generate(**inputs, max_new_tokens=256, streamer=streamer)
|
|
|
|
|
34 |
```
|
35 |
|
36 |
You could also alternatively launch a CLI demo by using the script in https://github.com/hiyouga/LLaMA-Efficient-Tuning
|