Update README.md
Browse files
README.md
CHANGED
@@ -34,6 +34,7 @@ from mlx_lm import load, generate
|
|
34 |
model, tokenizer = load("mlx-community/Nous-Hermes-2-Mixtral-8x7B-DPO-4bit")
|
35 |
response = generate(model, tokenizer, prompt="hello", verbose=True)
|
36 |
```
|
|
|
37 |
|
38 |
```bash
|
39 |
python3 -m mlx_lm.generate --model mlx-community/Nous-Hermes-2-Mixtral-8x7B-DPO-4bit --prompt "<|im_start|>system\nYou are an accurate, educational, and helpful information assistant<|im_end|>\n<|im_start|>user\nWhat is the difference between awq vs gptq quantitization?<|im_end|>\n<|im_start|>assistant\n" --max-tokens 2048
|
|
|
34 |
model, tokenizer = load("mlx-community/Nous-Hermes-2-Mixtral-8x7B-DPO-4bit")
|
35 |
response = generate(model, tokenizer, prompt="hello", verbose=True)
|
36 |
```
|
37 |
+
## Use with mlx_lm cli
|
38 |
|
39 |
```bash
|
40 |
python3 -m mlx_lm.generate --model mlx-community/Nous-Hermes-2-Mixtral-8x7B-DPO-4bit --prompt "<|im_start|>system\nYou are an accurate, educational, and helpful information assistant<|im_end|>\n<|im_start|>user\nWhat is the difference between awq vs gptq quantitization?<|im_end|>\n<|im_start|>assistant\n" --max-tokens 2048
|