from llama_cpp import Llama | |
import os | |
model_path = "llama-3.2-1B-it-Ecommerce-ChatBot-merged-F16.gguf" | |
n_threads = os.cpu_count() | |
llm = Llama( | |
model_path=model_path, | |
n_ctx=512, | |
n_batch=64, | |
n_threads=n_threads, | |
n_gpu_layers=-1, | |
chat_format="llama-3" | |
) | |