upd README - new tests, append BOS token, native backend fix (#12)
Browse files- upd README - new tests, append BOS token, native backend fix (61faf3bc9756f98fcf11259a73c212402bb9232b)
Co-authored-by: Nikita Zuev <[email protected]>
README.md
CHANGED
@@ -7,42 +7,83 @@ This xLSTM-7B was pre-trained on the DCLM and selected high-quality data for in
|
|
7 |
|
8 |
|
9 |
## How to use it
|
10 |
-
First, install `xlstm`, which now uses the `mlstm_kernels` package for triton kernels:
|
11 |
|
12 |
```bash
|
13 |
pip install xlstm
|
14 |
-
pip install
|
|
|
15 |
```
|
16 |
|
17 |
-
|
18 |
```bash
|
19 |
-
pip install '
|
20 |
```
|
21 |
|
22 |
Use this model as:
|
23 |
```python
|
24 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
25 |
|
26 |
xlstm = AutoModelForCausalLM.from_pretrained("NX-AI/xLSTM-7b", device_map="auto")
|
27 |
|
28 |
# this is a fork of EleutherAI/gpt-neox-20b
|
29 |
tokenizer = AutoTokenizer.from_pretrained("NX-AI/xLSTM-7b")
|
30 |
|
31 |
-
tokens = tokenizer("
|
32 |
|
33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
print(tokenizer.decode(out[0]))
|
36 |
```
|
37 |
|
38 |
If you cannot or do not want to use the triton kernels, you can change them to native PyTorch implementations:
|
39 |
```python
|
|
|
|
|
|
|
40 |
xlstm_config = AutoConfig.from_pretrained("NX-AI/xLSTM-7b")
|
41 |
xlstm_config.step_kernel = "native"
|
42 |
xlstm_config.chunkwise_kernel = "chunkwise--native_autograd"
|
43 |
xlstm_config.sequence_kernel = "native_sequence__native"
|
44 |
|
45 |
-
xlstm = AutoModelForCausalLM.from_pretrained("NX-AI/xLSTM-7b",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
# verify selected kernels
|
48 |
from pprint import pprint
|
@@ -71,3 +112,4 @@ Using HuggingFace's `lighteval` in the Leaderboard-v1 settings:
|
|
71 |
|
72 |
## License
|
73 |
NXAI Community License (see `LICENSE` file)
|
|
|
|
7 |
|
8 |
|
9 |
## How to use it
|
10 |
+
First, install `xlstm`, which now uses the `mlstm_kernels` package for triton kernels (tested on python 3.11):
|
11 |
|
12 |
```bash
|
13 |
pip install xlstm
|
14 |
+
pip install accelerate
|
15 |
+
pip install 'transformers @ git+https://github.com/huggingface/transformers.git@main'
|
16 |
```
|
17 |
|
18 |
+
If you get an error regarding triton library:
|
19 |
```bash
|
20 |
+
pip install 'triton @ git+https://github.com/triton-lang/triton.git@main'
|
21 |
```
|
22 |
|
23 |
Use this model as:
|
24 |
```python
|
25 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
26 |
+
import torch
|
27 |
|
28 |
xlstm = AutoModelForCausalLM.from_pretrained("NX-AI/xLSTM-7b", device_map="auto")
|
29 |
|
30 |
# this is a fork of EleutherAI/gpt-neox-20b
|
31 |
tokenizer = AutoTokenizer.from_pretrained("NX-AI/xLSTM-7b")
|
32 |
|
33 |
+
tokens = tokenizer("Explain quantum computing in simple terms.", return_tensors='pt')['input_ids'].to(device="cuda")
|
34 |
|
35 |
+
# Get the BOS token ID from the tokenizer
|
36 |
+
bos_id = tokenizer.bos_token_id
|
37 |
+
|
38 |
+
# Prepend BOS
|
39 |
+
bos_tensor = torch.tensor([[bos_id]], device=tokens.device, dtype=tokens.dtype)
|
40 |
+
tokens_with_bos = torch.cat([bos_tensor, tokens], dim=1)
|
41 |
+
|
42 |
+
out = xlstm.generate(tokens_with_bos, max_new_tokens=20)
|
43 |
|
44 |
print(tokenizer.decode(out[0]))
|
45 |
```
|
46 |
|
47 |
If you cannot or do not want to use the triton kernels, you can change them to native PyTorch implementations:
|
48 |
```python
|
49 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
|
50 |
+
import torch
|
51 |
+
|
52 |
xlstm_config = AutoConfig.from_pretrained("NX-AI/xLSTM-7b")
|
53 |
xlstm_config.step_kernel = "native"
|
54 |
xlstm_config.chunkwise_kernel = "chunkwise--native_autograd"
|
55 |
xlstm_config.sequence_kernel = "native_sequence__native"
|
56 |
|
57 |
+
xlstm = AutoModelForCausalLM.from_pretrained("NX-AI/xLSTM-7b",
|
58 |
+
config=xlstm_config, device_map="auto")
|
59 |
+
|
60 |
+
# Load the tokenizer
|
61 |
+
tokenizer = AutoTokenizer.from_pretrained("NX-AI/xLSTM-7b")
|
62 |
+
|
63 |
+
# Your prompt
|
64 |
+
prompt = "Explain quantum computing in simple terms."
|
65 |
+
|
66 |
+
# Tokenize and send to the same device as the model
|
67 |
+
inputs = tokenizer(prompt, return_tensors="pt")['input_ids'].to(xlstm.device)
|
68 |
+
|
69 |
+
# Get the BOS token ID from the tokenizer
|
70 |
+
bos_id = tokenizer.bos_token_id
|
71 |
+
|
72 |
+
# Prepend BOS
|
73 |
+
bos_tensor = torch.tensor([[bos_id]], device=xlstm.device, dtype=inputs.dtype)
|
74 |
+
tokens_with_bos = torch.cat([bos_tensor, inputs], dim=1)
|
75 |
+
|
76 |
+
# Generate
|
77 |
+
outputs = xlstm.generate(
|
78 |
+
tokens_with_bos,
|
79 |
+
max_new_tokens=200, # adjust for output length
|
80 |
+
temperature=0.7, # randomness
|
81 |
+
top_p=0.9, # nucleus sampling
|
82 |
+
do_sample=True
|
83 |
+
)
|
84 |
+
|
85 |
+
# Decode and print
|
86 |
+
print(tokenizer.decode(outputs[0]))
|
87 |
|
88 |
# verify selected kernels
|
89 |
from pprint import pprint
|
|
|
112 |
|
113 |
## License
|
114 |
NXAI Community License (see `LICENSE` file)
|
115 |
+
|