upd README - new tests, append BOS token, native backend fix (#12)

Browse files

- upd README - new tests, append BOS token, native backend fix (61faf3bc9756f98fcf11259a73c212402bb9232b)

Co-authored-by: Nikita Zuev <[email protected]>

Files changed (1) hide show

README.md +49 -7

README.md CHANGED Viewed

@@ -7,42 +7,83 @@ This xLSTM-7B was pre-trained on the DCLM and selected high-quality data for in
 ## How to use it
-First, install `xlstm`, which now uses the `mlstm_kernels` package for triton kernels:
 ```bash
 pip install xlstm
-pip install mlstm_kernels
 ```
-For now, install the transformers repositiory fork from NX-AI (until it is merged):
 ```bash
-pip install 'transformers @ git+https://github.com/NX-AI/transformers.git@integrate_xlstm'
 ```
 Use this model as:
 ```python
 from transformers import AutoModelForCausalLM, AutoTokenizer
 xlstm = AutoModelForCausalLM.from_pretrained("NX-AI/xLSTM-7b", device_map="auto")
 # this is a fork of EleutherAI/gpt-neox-20b
 tokenizer = AutoTokenizer.from_pretrained("NX-AI/xLSTM-7b")
-tokens = tokenizer("Hello xLSTM, how are you doing?", return_tensors='pt')['input_ids'].to(device="cuda")
-out = xlstm.generate(tokens, max_new_tokens=20)
 print(tokenizer.decode(out[0]))
 ```
 If you cannot or do not want to use the triton kernels, you can change them to native PyTorch implementations:
 ```python
 xlstm_config = AutoConfig.from_pretrained("NX-AI/xLSTM-7b")
 xlstm_config.step_kernel = "native"
 xlstm_config.chunkwise_kernel = "chunkwise--native_autograd"
 xlstm_config.sequence_kernel = "native_sequence__native"
-xlstm = AutoModelForCausalLM.from_pretrained("NX-AI/xLSTM-7b", config=xlstm_config, device_map="auto")
 # verify selected kernels
 from pprint import pprint
@@ -71,3 +112,4 @@ Using HuggingFace's `lighteval` in the Leaderboard-v1 settings:
 ## License
 NXAI Community License (see `LICENSE` file)

 ## How to use it
+First, install `xlstm`, which now uses the `mlstm_kernels` package for triton kernels (tested on python 3.11):
 ```bash
 pip install xlstm
+pip install accelerate
+pip install 'transformers @ git+https://github.com/huggingface/transformers.git@main'
 ```
+If you get an error regarding triton library:
 ```bash
+pip install 'triton @ git+https://github.com/triton-lang/triton.git@main'
 ```
 Use this model as:
 ```python
 from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
 xlstm = AutoModelForCausalLM.from_pretrained("NX-AI/xLSTM-7b", device_map="auto")
 # this is a fork of EleutherAI/gpt-neox-20b
 tokenizer = AutoTokenizer.from_pretrained("NX-AI/xLSTM-7b")
+tokens = tokenizer("Explain quantum computing in simple terms.", return_tensors='pt')['input_ids'].to(device="cuda")
+# Get the BOS token ID from the tokenizer
+bos_id = tokenizer.bos_token_id
+# Prepend BOS
+bos_tensor = torch.tensor([[bos_id]], device=tokens.device, dtype=tokens.dtype)
+tokens_with_bos = torch.cat([bos_tensor, tokens], dim=1)
+out = xlstm.generate(tokens_with_bos, max_new_tokens=20)
 print(tokenizer.decode(out[0]))
 ```
 If you cannot or do not want to use the triton kernels, you can change them to native PyTorch implementations:
 ```python
+from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
+import torch
 xlstm_config = AutoConfig.from_pretrained("NX-AI/xLSTM-7b")
 xlstm_config.step_kernel = "native"
 xlstm_config.chunkwise_kernel = "chunkwise--native_autograd"
 xlstm_config.sequence_kernel = "native_sequence__native"
+xlstm = AutoModelForCausalLM.from_pretrained("NX-AI/xLSTM-7b",
+                                             config=xlstm_config, device_map="auto")
+# Load the tokenizer
+tokenizer = AutoTokenizer.from_pretrained("NX-AI/xLSTM-7b")
+# Your prompt
+prompt = "Explain quantum computing in simple terms."
+# Tokenize and send to the same device as the model
+inputs = tokenizer(prompt, return_tensors="pt")['input_ids'].to(xlstm.device)
+# Get the BOS token ID from the tokenizer
+bos_id = tokenizer.bos_token_id
+# Prepend BOS
+bos_tensor = torch.tensor([[bos_id]], device=xlstm.device, dtype=inputs.dtype)
+tokens_with_bos = torch.cat([bos_tensor, inputs], dim=1)
+# Generate
+outputs = xlstm.generate(
+    tokens_with_bos,
+    max_new_tokens=200,   # adjust for output length
+    temperature=0.7,      # randomness
+    top_p=0.9,             # nucleus sampling
+    do_sample=True
+)
+# Decode and print
+print(tokenizer.decode(outputs[0]))
 # verify selected kernels
 from pprint import pprint
 ## License
 NXAI Community License (see `LICENSE` file)