import os from importlib.metadata import version from inspect import currentframe, getframeinfo from pathlib import Path from decouple import config from ktem.utils.lang import SUPPORTED_LANGUAGE_MAP from theflow.settings.default import * # noqa cur_frame = currentframe() if cur_frame is None: raise ValueError("Cannot get the current frame.") this_file = getframeinfo(cur_frame).filename this_dir = Path("/data/kotaemon/ktem_app_data") #this_dir = Path(this_file).parent # change this if your app use a different name KH_PACKAGE_NAME = "kotaemon_app" KH_APP_VERSION = config("KH_APP_VERSION", None) if not KH_APP_VERSION: try: # Caution: This might produce the wrong version # https://stackoverflow.com/a/59533071 KH_APP_VERSION = version(KH_PACKAGE_NAME) except Exception: KH_APP_VERSION = "local" KH_GRADIO_SHARE = config("KH_GRADIO_SHARE", default=False, cast=bool) KH_ENABLE_FIRST_SETUP = config("KH_ENABLE_FIRST_SETUP", default=True, cast=bool) KH_DEMO_MODE = config("KH_DEMO_MODE", default=False, cast=bool) KH_OLLAMA_URL = config("KH_OLLAMA_URL", default="http://localhost:11434/v1/") # App can be ran from anywhere and it's not trivial to decide where to store app data. # So let's use the same directory as the flowsetting.py file. KH_APP_DATA_DIR = this_dir / "ktem_app_data" KH_APP_DATA_EXISTS = KH_APP_DATA_DIR.exists() KH_APP_DATA_DIR.mkdir(parents=True, exist_ok=True) # User data directory KH_USER_DATA_DIR = KH_APP_DATA_DIR / "user_data" KH_USER_DATA_DIR.mkdir(parents=True, exist_ok=True) # markdown output directory KH_MARKDOWN_OUTPUT_DIR = KH_APP_DATA_DIR / "markdown_cache_dir" KH_MARKDOWN_OUTPUT_DIR.mkdir(parents=True, exist_ok=True) # chunks output directory KH_CHUNKS_OUTPUT_DIR = KH_APP_DATA_DIR / "chunks_cache_dir" KH_CHUNKS_OUTPUT_DIR.mkdir(parents=True, exist_ok=True) # zip output directory KH_ZIP_OUTPUT_DIR = KH_APP_DATA_DIR / "zip_cache_dir" KH_ZIP_OUTPUT_DIR.mkdir(parents=True, exist_ok=True) # zip input directory KH_ZIP_INPUT_DIR = KH_APP_DATA_DIR / "zip_cache_dir_in" KH_ZIP_INPUT_DIR.mkdir(parents=True, exist_ok=True) # HF models can be big, let's store them in the app data directory so that it's easier # for users to manage their storage. # ref: https://huggingface.co/docs/huggingface_hub/en/guides/manage-cache os.environ["HF_HOME"] = str(KH_APP_DATA_DIR / "huggingface") os.environ["HF_HUB_CACHE"] = str(KH_APP_DATA_DIR / "huggingface") # doc directory KH_DOC_DIR = this_dir / "docs" KH_MODE = "dev" KH_SSO_ENABLED = config("KH_SSO_ENABLED", default=False, cast=bool) KH_FEATURE_CHAT_SUGGESTION = config( "KH_FEATURE_CHAT_SUGGESTION", default=False, cast=bool ) KH_FEATURE_USER_MANAGEMENT = config( "KH_FEATURE_USER_MANAGEMENT", default=True, cast=bool ) KH_USER_CAN_SEE_PUBLIC = None KH_FEATURE_USER_MANAGEMENT_ADMIN = str( config("KH_FEATURE_USER_MANAGEMENT_ADMIN", default="admin") ) KH_FEATURE_USER_MANAGEMENT_PASSWORD = str( config("KH_FEATURE_USER_MANAGEMENT_PASSWORD", default="admin") ) KH_ENABLE_ALEMBIC = False KH_DATABASE = f"sqlite:///{KH_USER_DATA_DIR / 'sql.db'}" KH_FILESTORAGE_PATH = str(KH_USER_DATA_DIR / "files") KH_WEB_SEARCH_BACKEND = ( "kotaemon.indices.retrievers.tavily_web_search.WebSearch" # "kotaemon.indices.retrievers.jina_web_search.WebSearch" ) KH_DOCSTORE = { # "__type__": "kotaemon.storages.ElasticsearchDocumentStore", # "__type__": "kotaemon.storages.SimpleFileDocumentStore", "__type__": "kotaemon.storages.LanceDBDocumentStore", "path": str(KH_USER_DATA_DIR / "docstore"), } KH_VECTORSTORE = { # "__type__": "kotaemon.storages.LanceDBVectorStore", "__type__": "kotaemon.storages.ChromaVectorStore", # "__type__": "kotaemon.storages.MilvusVectorStore", # "__type__": "kotaemon.storages.QdrantVectorStore", "path": str(KH_USER_DATA_DIR / "vectorstore"), } KH_LLMS = {} KH_EMBEDDINGS = {} KH_RERANKINGS = {} # populate options from config if config("AZURE_OPENAI_API_KEY", default="") and config( "AZURE_OPENAI_ENDPOINT", default="" ): if config("AZURE_OPENAI_CHAT_DEPLOYMENT", default=""): KH_LLMS["azure"] = { "spec": { "__type__": "kotaemon.llms.AzureChatOpenAI", "temperature": 0, "azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""), "api_key": config("AZURE_OPENAI_API_KEY", default=""), "api_version": config("OPENAI_API_VERSION", default="") or "2024-02-15-preview", "azure_deployment": config("AZURE_OPENAI_CHAT_DEPLOYMENT", default=""), "timeout": 20, }, "default": False, } if config("AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT", default=""): KH_EMBEDDINGS["azure"] = { "spec": { "__type__": "kotaemon.embeddings.AzureOpenAIEmbeddings", "azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""), "api_key": config("AZURE_OPENAI_API_KEY", default=""), "api_version": config("OPENAI_API_VERSION", default="") or "2024-02-15-preview", "azure_deployment": config( "AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT", default="" ), "timeout": 10, }, "default": False, } OPENAI_DEFAULT = "" OPENAI_API_KEY = config("OPENAI_API_KEY", default=OPENAI_DEFAULT) GOOGLE_API_KEY = config("GOOGLE_API_KEY", default="your-key") IS_OPENAI_DEFAULT = len(OPENAI_API_KEY) > 0 and OPENAI_API_KEY != OPENAI_DEFAULT if OPENAI_API_KEY: KH_LLMS["openai"] = { "spec": { "__type__": "kotaemon.llms.ChatOpenAI", "temperature": 0, "base_url": config("OPENAI_API_BASE", default="") or "https://api.openai.com/v1", "api_key": OPENAI_API_KEY, "model": config("OPENAI_CHAT_MODEL", default="gpt-4o-mini"), "timeout": 20, }, "default": IS_OPENAI_DEFAULT, } KH_EMBEDDINGS["openai"] = { "spec": { "__type__": "kotaemon.embeddings.OpenAIEmbeddings", "base_url": config("OPENAI_API_BASE", default="https://api.openai.com/v1"), "api_key": OPENAI_API_KEY, "model": config( "OPENAI_EMBEDDINGS_MODEL", default="text-embedding-3-large" ), "timeout": 10, "context_length": 8191, }, "default": IS_OPENAI_DEFAULT, } if config("LOCAL_MODEL", default=""): KH_LLMS["ollama"] = { "spec": { "__type__": "kotaemon.llms.ChatOpenAI", "base_url": KH_OLLAMA_URL, "model": config("LOCAL_MODEL", default="qwen2.5:7b"), "api_key": "ollama", }, "default": False, } KH_LLMS["ollama-long-context"] = { "spec": { "__type__": "kotaemon.llms.LCOllamaChat", "base_url": KH_OLLAMA_URL.replace("v1/", ""), "model": config("LOCAL_MODEL", default="qwen2.5:7b"), "num_ctx": 8192, }, "default": False, } KH_EMBEDDINGS["ollama"] = { "spec": { "__type__": "kotaemon.embeddings.OpenAIEmbeddings", "base_url": KH_OLLAMA_URL, "model": config("LOCAL_MODEL_EMBEDDINGS", default="nomic-embed-text"), "api_key": "ollama", }, "default": False, } KH_EMBEDDINGS["fast_embed"] = { "spec": { "__type__": "kotaemon.embeddings.FastEmbedEmbeddings", "model_name": "BAAI/bge-base-en-v1.5", }, "default": False, } # additional LLM configurations KH_LLMS["claude"] = { "spec": { "__type__": "kotaemon.llms.chats.LCAnthropicChat", "model_name": "claude-3-5-sonnet-20240620", "api_key": "your-key", }, "default": False, } KH_LLMS["google"] = { "spec": { "__type__": "kotaemon.llms.chats.LCGeminiChat", "model_name": "gemini-1.5-flash", "api_key": GOOGLE_API_KEY, }, "default": not IS_OPENAI_DEFAULT, } KH_LLMS["groq"] = { "spec": { "__type__": "kotaemon.llms.ChatOpenAI", "base_url": "https://api.groq.com/openai/v1", "model": "llama-3.1-8b-instant", "api_key": "your-key", }, "default": False, } KH_LLMS["cohere"] = { "spec": { "__type__": "kotaemon.llms.chats.LCCohereChat", "model_name": "command-r-plus-08-2024", "api_key": config("COHERE_API_KEY", default="your-key"), }, "default": False, } # additional embeddings configurations KH_EMBEDDINGS["cohere"] = { "spec": { "__type__": "kotaemon.embeddings.LCCohereEmbeddings", "model": "embed-multilingual-v3.0", "cohere_api_key": config("COHERE_API_KEY", default="your-key"), "user_agent": "default", }, "default": False, } KH_EMBEDDINGS["google"] = { "spec": { "__type__": "kotaemon.embeddings.LCGoogleEmbeddings", "model": "models/text-embedding-004", "google_api_key": GOOGLE_API_KEY, }, "default": not IS_OPENAI_DEFAULT, } # KH_EMBEDDINGS["huggingface"] = { # "spec": { # "__type__": "kotaemon.embeddings.LCHuggingFaceEmbeddings", # "model_name": "sentence-transformers/all-mpnet-base-v2", # }, # "default": False, # } # default reranking models KH_RERANKINGS["cohere"] = { "spec": { "__type__": "kotaemon.rerankings.CohereReranking", "model_name": "rerank-multilingual-v2.0", "cohere_api_key": config("COHERE_API_KEY", default=""), }, "default": True, } KH_REASONINGS = [ "ktem.reasoning.simple.FullQAPipeline", "ktem.reasoning.simple.FullDecomposeQAPipeline", "ktem.reasoning.react.ReactAgentPipeline", "ktem.reasoning.rewoo.RewooAgentPipeline", ] KH_REASONINGS_USE_MULTIMODAL = config("USE_MULTIMODAL", default=False, cast=bool) KH_VLM_ENDPOINT = "{0}/openai/deployments/{1}/chat/completions?api-version={2}".format( config("AZURE_OPENAI_ENDPOINT", default=""), config("OPENAI_VISION_DEPLOYMENT_NAME", default="gpt-4o"), config("OPENAI_API_VERSION", default=""), ) SETTINGS_APP: dict[str, dict] = {} SETTINGS_REASONING = { "use": { "name": "Reasoning options", "value": None, "choices": [], "component": "radio", }, "lang": { "name": "Language", "value": "en", "choices": [(lang, code) for code, lang in SUPPORTED_LANGUAGE_MAP.items()], "component": "dropdown", }, "max_context_length": { "name": "Max context length (LLM)", "value": 32000, "component": "number", }, } USE_GLOBAL_GRAPHRAG = config("USE_GLOBAL_GRAPHRAG", default=True, cast=bool) USE_NANO_GRAPHRAG = config("USE_NANO_GRAPHRAG", default=False, cast=bool) USE_LIGHTRAG = config("USE_LIGHTRAG", default=True, cast=bool) USE_MS_GRAPHRAG = config("USE_MS_GRAPHRAG", default=True, cast=bool) GRAPHRAG_INDEX_TYPES = [] if USE_MS_GRAPHRAG: GRAPHRAG_INDEX_TYPES.append("ktem.index.file.graph.GraphRAGIndex") if USE_NANO_GRAPHRAG: GRAPHRAG_INDEX_TYPES.append("ktem.index.file.graph.NanoGraphRAGIndex") if USE_LIGHTRAG: GRAPHRAG_INDEX_TYPES.append("ktem.index.file.graph.LightRAGIndex") KH_INDEX_TYPES = [ "ktem.index.file.FileIndex", *GRAPHRAG_INDEX_TYPES, ] GRAPHRAG_INDICES = [ { "name": graph_type.split(".")[-1].replace("Index", "") + " Collection", # get last name "config": { "supported_file_types": ( ".png, .jpeg, .jpg, .tiff, .tif, .pdf, .xls, .xlsx, .doc, .docx, " ".pptx, .csv, .html, .mhtml, .txt, .md, .zip" ), "private": True, }, "index_type": graph_type, } for graph_type in GRAPHRAG_INDEX_TYPES ] KH_INDICES = [ { "name": "File Collection", "config": { "supported_file_types": ( ".png, .jpeg, .jpg, .tiff, .tif, .pdf, .xls, .xlsx, .doc, .docx, " ".pptx, .csv, .html, .mhtml, .txt, .md, .zip" ), "private": True, }, "index_type": "ktem.index.file.FileIndex", }, *GRAPHRAG_INDICES, ]