import logging from llama_index.llms import LLM, MockLLM from app._config import settings from app.enums import LLMMode from app.paths import models_path logger = logging.getLogger(__name__) class LLMComponent: llm: LLM def __init__(self) -> None: llm_mode = settings.LLM_MODE logger.info(f"Initializing the LLM in mode={llm_mode}") match settings.LLM_MODE: case LLMMode.OPENAI: from llama_index.llms import OpenAI self.llm = OpenAI( api_key=settings.OPENAI_API_KEY, model=settings.OPENAI_MODEL, ) case LLMMode.MOCK: self.llm = MockLLM() case LLMMode.LOCAL: from llama_index.llms import LlamaCPP from llama_index.llms.llama_utils import ( completion_to_prompt, messages_to_prompt, ) self.llm = LlamaCPP( model_path=str(models_path / settings.LOCAL_HF_LLM_MODEL_FILE), temperature=settings.LLM_TEMPERATURE, max_new_tokens=settings.LLM_MAX_NEW_TOKENS, context_window=settings.LLM_CONTEXT_WINDOW, generate_kwargs={}, # set to at least 1 to use GPU # set to -1 for all gpu # set to 0 for cpu model_kwargs={"n_gpu_layers": 0}, # transform inputs into Llama2 format messages_to_prompt=messages_to_prompt, completion_to_prompt=completion_to_prompt, verbose=True, )