Update README.md
Browse files
README.md
CHANGED
@@ -46,7 +46,7 @@ If you want to use the gpu instead:
|
|
46 |
CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python --force-reinstall --upgrade --no-cache-dir
|
47 |
```
|
48 |
|
49 |
-
And then use this code to see a response to the prompt.
|
50 |
|
51 |
```python
|
52 |
from llama_cpp import Llama
|
@@ -56,7 +56,7 @@ llm = Llama(
|
|
56 |
model_path="path/to/model.gguf", # Download the model file first
|
57 |
n_ctx=2048, # The max sequence length to use - note that longer sequence lengths require much more resources
|
58 |
n_threads=8, # The number of CPU threads to use, tailor to your system and the resulting performance
|
59 |
-
n_gpu_layers=
|
60 |
)
|
61 |
|
62 |
# Simple inference example
|
|
|
46 |
CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python --force-reinstall --upgrade --no-cache-dir
|
47 |
```
|
48 |
|
49 |
+
And then use this code to see a response to the prompt.
|
50 |
|
51 |
```python
|
52 |
from llama_cpp import Llama
|
|
|
56 |
model_path="path/to/model.gguf", # Download the model file first
|
57 |
n_ctx=2048, # The max sequence length to use - note that longer sequence lengths require much more resources
|
58 |
n_threads=8, # The number of CPU threads to use, tailor to your system and the resulting performance
|
59 |
+
n_gpu_layers=0 # The number of layers to offload to GPU, if you have GPU acceleration available
|
60 |
)
|
61 |
|
62 |
# Simple inference example
|