RAG3_Voice / config.py
jeongsoo's picture
Add voice recognition feature with Naver Clova API
14586a6
"""
๋ฒกํ„ฐ ์Šคํ† ์–ด, ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ, LLM ๋“ฑ ๊ตฌ์„ฑ ์š”์†Œ ์„ค์ •
ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ๋ฐ .env ํŒŒ์ผ ํ™œ์šฉ ๊ฐœ์„  ๋ฒ„์ „ - HuggingFace ํ™˜๊ฒฝ ์ง€์› ์ถ”๊ฐ€
"""
import os
import logging
import sys
import re
import requests
import json
from pathlib import Path
from typing import Dict, Any
from dotenv import load_dotenv
# ๋กœ๊น… ์„ค์ •
logger = logging.getLogger("Config")
# ํ˜„์žฌ ์‹คํ–‰ ์œ„์น˜ ํ™•์ธ (๋””๋ฒ„๊น…์šฉ)
script_dir = os.path.dirname(os.path.abspath(__file__))
logger.info(f"์Šคํฌ๋ฆฝํŠธ ๋””๋ ‰ํ† ๋ฆฌ: {script_dir}")
logger.info(f"ํ˜„์žฌ ์ž‘์—… ๋””๋ ‰ํ† ๋ฆฌ: {os.getcwd()}")
logger.info(f"์šด์˜ ์ฒด์ œ: {os.name}")
# ํ™˜๊ฒฝ ๊ฐ์ง€ - HuggingFace Space ํ™˜๊ฒฝ์ธ์ง€ ํ™•์ธ
IS_HUGGINGFACE = False
if os.getenv('SPACE_ID') is not None or os.getenv('SYSTEM') == 'spaces':
IS_HUGGINGFACE = True
logger.info("HuggingFace Spaces ํ™˜๊ฒฝ์ด ๊ฐ์ง€๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
else:
# ๋กœ์ปฌ ํ™˜๊ฒฝ์ธ ๊ฒฝ์šฐ .env ํŒŒ์ผ ๋กœ๋“œ
# .env ํŒŒ์ผ ์œ„์น˜ ํ›„๋ณด๋“ค
env_paths = [
".env", # ํ˜„์žฌ ๋””๋ ‰ํ† ๋ฆฌ
os.path.join(script_dir, ".env"), # ์Šคํฌ๋ฆฝํŠธ ๋””๋ ‰ํ† ๋ฆฌ
os.path.join(script_dir, "config", ".env"), # config ํ•˜์œ„ ๋””๋ ‰ํ† ๋ฆฌ
os.path.join(os.path.dirname(script_dir), ".env"), # ์ƒ์œ„ ๋””๋ ‰ํ† ๋ฆฌ
]
# .env ํŒŒ์ผ ์ฐพ์•„์„œ ๋กœ๋“œ
env_loaded = False
for env_path in env_paths:
if os.path.isfile(env_path):
logger.info(f".env ํŒŒ์ผ ๋ฐœ๊ฒฌ: {env_path}")
env_loaded = load_dotenv(env_path, verbose=True)
if env_loaded:
logger.info(f".env ํŒŒ์ผ ๋กœ๋“œ ์„ฑ๊ณต: {env_path}")
break
if not env_loaded:
logger.warning(".env ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. ๊ธฐ๋ณธ๊ฐ’ ๋˜๋Š” ์‹œ์Šคํ…œ ํ™˜๊ฒฝ ๋ณ€์ˆ˜๋ฅผ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.")
logger.info(f"๋กœ์ปฌ ํ™˜๊ฒฝ์—์„œ ์‹คํ–‰ ์ค‘์ž…๋‹ˆ๋‹ค. (OS: {'Windows' if os.name == 'nt' else 'Unix/Linux/MacOS'})")
# Windows ํ™˜๊ฒฝ ๊ฐ์ง€
IS_WINDOWS = os.name == 'nt'
# ์œ ํ‹ธ๋ฆฌํ‹ฐ ํ•จ์ˆ˜: ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ๊ฐ€์ ธ์˜ค๊ธฐ (HuggingFace ํ™˜๊ฒฝ๊ณผ ๋กœ์ปฌ ํ™˜๊ฒฝ ๊ตฌ๋ถ„)
def get_env(key: str, default: Any = None, required: bool = False) -> Any:
"""
ํ™˜๊ฒฝ ๋ณ€์ˆ˜๋ฅผ ๊ฐ€์ ธ์˜ค๋Š” ์œ ํ‹ธ๋ฆฌํ‹ฐ ํ•จ์ˆ˜ (HuggingFace ํ™˜๊ฒฝ ์ง€์›)
Args:
key: ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ํ‚ค
default: ํ™˜๊ฒฝ ๋ณ€์ˆ˜๊ฐ€ ์—†์„ ๊ฒฝ์šฐ ๊ธฐ๋ณธ๊ฐ’
required: ํ™˜๊ฒฝ ๋ณ€์ˆ˜๊ฐ€ ํ•„์ˆ˜์ ์ธ์ง€ ์—ฌ๋ถ€
Returns:
ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ๊ฐ’ ๋˜๋Š” ๊ธฐ๋ณธ๊ฐ’
"""
# HuggingFace Spaces ํ™˜๊ฒฝ์—์„œ๋Š” ๋‚ด๋ถ€ ํ™˜๊ฒฝ๋ณ€์ˆ˜ ํ™œ์šฉ
if IS_HUGGINGFACE:
# HuggingFace Spaces์—์„œ๋Š” ์‹œํฌ๋ฆฟ ๊ฐ’์„ ์ง์ ‘ ์‚ฌ์šฉ
# HF_SECRET_<KEY> ํ˜•์‹์œผ๋กœ ์ €์žฅ๋œ ์‹œํฌ๋ฆฟ ํ™•์ธ
hf_secret_key = f"HF_SECRET_{key.upper()}"
value = os.getenv(hf_secret_key)
# ์‹œํฌ๋ฆฟ์ด ์—†์œผ๋ฉด ์ผ๋ฐ˜ ํ™˜๊ฒฝ๋ณ€์ˆ˜ ํ™•์ธ
if value is None:
value = os.getenv(key, default)
else:
# ๋กœ์ปฌ ํ™˜๊ฒฝ์—์„œ๋Š” ์ผ๋ฐ˜์ ์ธ ๋ฐฉ์‹์œผ๋กœ ํ™˜๊ฒฝ๋ณ€์ˆ˜ ๊ฐ€์ ธ์˜ค๊ธฐ
value = os.getenv(key, default)
if required and value is None:
if IS_HUGGINGFACE:
error_msg = f"ํ•„์ˆ˜ ํ™˜๊ฒฝ ๋ณ€์ˆ˜ {key}๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. HuggingFace Space์—์„œ ์‹œํฌ๋ฆฟ์„ ์„ค์ •ํ•ด์ฃผ์„ธ์š”."
logger.error(error_msg)
raise ValueError(error_msg)
else:
error_msg = f"ํ•„์ˆ˜ ํ™˜๊ฒฝ ๋ณ€์ˆ˜ {key}๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. .env ํŒŒ์ผ์— ์ถ”๊ฐ€ํ•ด์ฃผ์„ธ์š”."
logger.error(error_msg)
raise ValueError(error_msg)
return value
# ๊ฒฝ๋กœ ์ƒ์„ฑ ์œ ํ‹ธ๋ฆฌํ‹ฐ ํ•จ์ˆ˜
def ensure_absolute_path(path_str: str) -> str:
"""
์ƒ๋Œ€ ๊ฒฝ๋กœ๋ฅผ ์ ˆ๋Œ€ ๊ฒฝ๋กœ๋กœ ๋ณ€ํ™˜ (Windows ๊ฒฝ๋กœ ์ง€์›)
Args:
path_str: ๋ณ€ํ™˜ํ•  ๊ฒฝ๋กœ ๋ฌธ์ž์—ด
Returns:
์ ˆ๋Œ€ ๊ฒฝ๋กœ
"""
# Windows ๋“œ๋ผ์ด๋ธŒ ๋ฌธ์ž(C:\ ๋“ฑ)๋กœ ์‹œ์ž‘ํ•˜๋Š” ๊ฒฝ๋กœ ํ™•์ธ
if IS_WINDOWS and re.match(r'^[a-zA-Z]:\\', path_str):
logger.info(f"Windows ์ ˆ๋Œ€ ๊ฒฝ๋กœ ๊ฐ์ง€: {path_str}")
# Windows ์ ˆ๋Œ€ ๊ฒฝ๋กœ๋Š” ๊ทธ๋Œ€๋กœ ์‚ฌ์šฉ
return path_str
path = Path(path_str)
if path.is_absolute():
return str(path)
# ์Šคํฌ๋ฆฝํŠธ ๋””๋ ‰ํ† ๋ฆฌ ๊ธฐ์ค€ ๊ฒฝ๋กœ
script_based_path = Path(script_dir) / path
# ํ˜„์žฌ ์ž‘์—… ๋””๋ ‰ํ† ๋ฆฌ ๊ธฐ์ค€ ๊ฒฝ๋กœ
cwd_based_path = Path.cwd() / path
# ๋‘ ๊ฒฝ๋กœ ์ค‘ ์กด์žฌํ•˜๋Š” ๊ฒฝ๋กœ ์šฐ์„  ์‚ฌ์šฉ
if script_based_path.exists():
return str(script_based_path)
elif cwd_based_path.exists():
return str(cwd_based_path)
else:
# ๊ธฐ๋ณธ์ ์œผ๋กœ ํ˜„์žฌ ์ž‘์—… ๋””๋ ‰ํ† ๋ฆฌ ๊ธฐ์ค€ ๊ฒฝ๋กœ ๋ฐ˜ํ™˜
return str(cwd_based_path)
# Windows ๊ฒฝ๋กœ ์ฒ˜๋ฆฌ๋ฅผ ์œ„ํ•œ ์œ ํ‹ธ๋ฆฌํ‹ฐ ํ•จ์ˆ˜
def normalize_path(path_str: str) -> str:
"""
๊ฒฝ๋กœ ๋ฌธ์ž์—ด์„ ์ •๊ทœํ™”ํ•˜์—ฌ OS์— ๋งž๊ฒŒ ๋ณ€ํ™˜
Args:
path_str: ๋ณ€ํ™˜ํ•  ๊ฒฝ๋กœ ๋ฌธ์ž์—ด
Returns:
์ •๊ทœํ™”๋œ ๊ฒฝ๋กœ
"""
# Windows ๊ฒฝ๋กœ ํ˜•์‹('\')์„ OS์— ๋งž๊ฒŒ ๋ณ€ํ™˜
return os.path.normpath(path_str)
# ๊ธฐ๋ณธ ๋””๋ ‰ํ† ๋ฆฌ ์„ค์ • (์ ˆ๋Œ€ ๊ฒฝ๋กœ๋กœ ๋ณ€ํ™˜)
PDF_DIRECTORY_RAW = get_env("PDF_DIRECTORY", "documents")
# Windows ๋ฐฑ์Šฌ๋ž˜์‹œ ์ด์ค‘ ์ฒ˜๋ฆฌ๋ฅผ ์œ„ํ•ด ์ •๊ทœํ™”
PDF_DIRECTORY_RAW = normalize_path(PDF_DIRECTORY_RAW)
PDF_DIRECTORY = ensure_absolute_path(PDF_DIRECTORY_RAW)
CACHE_DIRECTORY_RAW = get_env("CACHE_DIRECTORY", "cached_data")
CACHE_DIRECTORY_RAW = normalize_path(CACHE_DIRECTORY_RAW)
CACHE_DIRECTORY = ensure_absolute_path(CACHE_DIRECTORY_RAW)
logger.info(f"PDF ๋””๋ ‰ํ† ๋ฆฌ (์›๋ณธ): {PDF_DIRECTORY_RAW}")
logger.info(f"PDF ๋””๋ ‰ํ† ๋ฆฌ (์ ˆ๋Œ€): {PDF_DIRECTORY}")
logger.info(f"์บ์‹œ ๋””๋ ‰ํ† ๋ฆฌ (์›๋ณธ): {CACHE_DIRECTORY_RAW}")
logger.info(f"์บ์‹œ ๋””๋ ‰ํ† ๋ฆฌ (์ ˆ๋Œ€): {CACHE_DIRECTORY}")
# ์ฒญํ‚น ์„ค์ •
CHUNK_SIZE = int(get_env("CHUNK_SIZE", "1000"))
CHUNK_OVERLAP = int(get_env("CHUNK_OVERLAP", "200"))
# API ํ‚ค ๋ฐ ํ™˜๊ฒฝ ์„ค์ •
OPENAI_API_KEY = get_env("OPENAI_API_KEY", "")
LANGFUSE_PUBLIC_KEY = get_env("LANGFUSE_PUBLIC_KEY", "")
LANGFUSE_SECRET_KEY = get_env("LANGFUSE_SECRET_KEY", "")
LANGFUSE_HOST = get_env("LANGFUSE_HOST", "https://cloud.langfuse.com")
# DeepSeek ๊ด€๋ จ ์„ค์ • ์ถ”๊ฐ€
DEEPSEEK_API_KEY = get_env("DEEPSEEK_API_KEY", "")
DEEPSEEK_ENDPOINT = get_env("DEEPSEEK_ENDPOINT", "https://api.deepseek.com/v1/chat/completions")
DEEPSEEK_MODEL = get_env("DEEPSEEK_MODEL", "deepseek-chat")
# ํ—ˆ๊น…ํŽ˜์ด์Šค ํ™˜๊ฒฝ์—์„œ API ํ‚ค ํ™•์ธ ๋ฐ ๋กœ๊ทธ ์ถœ๋ ฅ
if IS_HUGGINGFACE:
logger.info(f"ํ—ˆ๊น…ํŽ˜์ด์Šค ํ™˜๊ฒฝ์—์„œ DeepSeek API ํ‚ค ์กด์žฌ ์—ฌ๋ถ€: {bool(DEEPSEEK_API_KEY)}")
# ๋ณด์•ˆ์„ ์œ„ํ•ด API ํ‚ค ์ฒซ 4์ž๋ฆฌ์™€ ๋งˆ์ง€๋ง‰ 4์ž๋ฆฌ๋งŒ ํ‘œ์‹œ (ํ‚ค๊ฐ€ ์กด์žฌํ•˜๋Š” ๊ฒฝ์šฐ)
if DEEPSEEK_API_KEY:
masked_key = DEEPSEEK_API_KEY[:4] + "****" + DEEPSEEK_API_KEY[-4:] if len(DEEPSEEK_API_KEY) > 8 else "****"
logger.info(f"DeepSeek API ํ‚ค: {masked_key}")
logger.info(f"DeepSeek ๋ชจ๋ธ: {DEEPSEEK_MODEL}")
logger.info(f"DeepSeek ์—”๋“œํฌ์ธํŠธ: {DEEPSEEK_ENDPOINT}")
# Milvus ๋ฒกํ„ฐ DB ์„ค์ •
MILVUS_HOST = get_env("MILVUS_HOST", "localhost")
MILVUS_PORT = get_env("MILVUS_PORT", "19530")
MILVUS_COLLECTION = get_env("MILVUS_COLLECTION", "pdf_documents")
# ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ์„ค์ •
EMBEDDING_MODEL = get_env("EMBEDDING_MODEL", "Alibaba-NLP/gte-multilingual-base") # ๋‹ค๊ตญ์–ด ์ง€์› ๋ชจ๋ธ
RERANKER_MODEL = get_env("RERANKER_MODEL", "Alibaba-NLP/gte-multilingual-reranker-base") # ๋‹ค๊ตญ์–ด ์ง€์› ๋ฆฌ๋žญ์ปค
# LLM ๋ชจ๋ธ ์„ค์ • (ํ™˜๊ฒฝ์— ๋”ฐ๋ผ ์ž๋™ ์„ ํƒ)
USE_OPENAI = get_env("USE_OPENAI", "False").lower() == "true"
USE_DEEPSEEK = get_env("USE_DEEPSEEK", "False").lower() == "true"
# ํ—ˆ๊น…ํŽ˜์ด์Šค ํ™˜๊ฒฝ์—์„œ๋Š” DeepSeek ์šฐ์„  ์‚ฌ์šฉ
if IS_HUGGINGFACE:
# ํ—ˆ๊น…ํŽ˜์ด์Šค ํ™˜๊ฒฝ์—์„œ DeepSeek API ํ‚ค๊ฐ€ ์žˆ๋Š”์ง€ ํ™•์ธ
if DEEPSEEK_API_KEY:
USE_DEEPSEEK = True
USE_OPENAI = False
LLM_MODEL = DEEPSEEK_MODEL
logger.info("HuggingFace Spaces ํ™˜๊ฒฝ: DeepSeek ๋ชจ๋ธ ์‚ฌ์šฉ")
else:
logger.warning("HuggingFace Spaces ํ™˜๊ฒฝ์—์„œ DeepSeek API ํ‚ค๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
USE_DEEPSEEK = False
USE_OPENAI = False # ๊ธฐ๋ณธ์ ์œผ๋กœ API ํ‚ค๊ฐ€ ์—†์œผ๋ฉด ๋น„ํ™œ์„ฑํ™”
LLM_MODEL = get_env("LLM_MODEL", "gemma3:latest") # ๋Œ€์ฒด ๋ชจ๋ธ ์„ค์ •
logger.info(f"HuggingFace Spaces ํ™˜๊ฒฝ: DeepSeek API ํ‚ค ์—†์Œ, LLM ๋ชจ๋ธ: {LLM_MODEL}")
else:
# ๋กœ์ปฌ ํ™˜๊ฒฝ์—์„œ๋Š” ์„ค์ •์— ๋”ฐ๋ผ LLM ์„ ํƒ
if USE_DEEPSEEK:
LLM_MODEL = DEEPSEEK_MODEL
logger.info(f"๋กœ์ปฌ ํ™˜๊ฒฝ: DeepSeek ๋ชจ๋ธ ์‚ฌ์šฉ ({DEEPSEEK_MODEL})")
elif USE_OPENAI:
LLM_MODEL = get_env("LLM_MODEL", "gpt-3.5-turbo")
logger.info(f"๋กœ์ปฌ ํ™˜๊ฒฝ: OpenAI ๋ชจ๋ธ ์‚ฌ์šฉ ({LLM_MODEL})")
else:
LLM_MODEL = get_env("LLM_MODEL", "gemma3:latest")
OLLAMA_HOST = get_env("OLLAMA_HOST", "http://localhost:11434")
logger.info(f"๋กœ์ปฌ ํ™˜๊ฒฝ: Ollama ๋ชจ๋ธ ์‚ฌ์šฉ ({LLM_MODEL})")
# API ํ‚ค ๊ฒ€์ฆ (๋กœ์ปฌ ํ™˜๊ฒฝ๋งŒ)
if not IS_HUGGINGFACE:
if USE_DEEPSEEK and not DEEPSEEK_API_KEY:
logger.warning("DeepSeek ๋ชจ๋ธ์ด ์„ ํƒ๋˜์—ˆ์ง€๋งŒ API ํ‚ค๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
USE_DEEPSEEK = False
USE_OPENAI = False
LLM_MODEL = get_env("LLM_MODEL", "gemma3:latest")
logger.info("DeepSeek API ํ‚ค๊ฐ€ ์—†์–ด Ollama๋กœ ํด๋ฐฑํ•ฉ๋‹ˆ๋‹ค.")
elif USE_OPENAI and not OPENAI_API_KEY:
logger.warning("OpenAI ๋ชจ๋ธ์ด ์„ ํƒ๋˜์—ˆ์ง€๋งŒ API ํ‚ค๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
logger.warning("OpenAI API ํ‚ค๊ฐ€ ์—†์–ด Ollama๋กœ ํด๋ฐฑํ•ฉ๋‹ˆ๋‹ค.")
USE_OPENAI = False
LLM_MODEL = get_env("LLM_MODEL", "gemma3:latest")
# DeepSeek API ํ…Œ์ŠคํŠธ ํ•จ์ˆ˜
def test_deepseek_connection():
"""
DeepSeek API ์—ฐ๊ฒฐ ํ…Œ์ŠคํŠธ
Returns:
ํ…Œ์ŠคํŠธ ๊ฒฐ๊ณผ ๋”•์…”๋„ˆ๋ฆฌ (์„ฑ๊ณต ์—ฌ๋ถ€ ๋ฐ ๋ฉ”์‹œ์ง€)
"""
if not DEEPSEEK_API_KEY:
logger.warning("DeepSeek API ํ‚ค๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•„ ํ…Œ์ŠคํŠธ๋ฅผ ๊ฑด๋„ˆ๋œ๋‹ˆ๋‹ค.")
return {
"success": False,
"message": "API ํ‚ค๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.",
"status_code": None
}
try:
logger.info(f"DeepSeek API ์—ฐ๊ฒฐ ํ…Œ์ŠคํŠธ ์‹œ์ž‘: {DEEPSEEK_ENDPOINT}, ๋ชจ๋ธ: {DEEPSEEK_MODEL}")
# ํ…Œ์ŠคํŠธ์šฉ ๊ฐ„๋‹จํ•œ ํ”„๋กฌํ”„ํŠธ
test_prompt = "Hello, please respond with a short greeting."
# API ์š”์ฒญ ํ—ค๋” ๋ฐ ๋ฐ์ดํ„ฐ
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {DEEPSEEK_API_KEY}"
}
payload = {
"model": DEEPSEEK_MODEL,
"messages": [{"role": "user", "content": test_prompt}],
"temperature": 0.7,
"max_tokens": 50
}
# API ์š”์ฒญ ์ „์†ก
response = requests.post(
DEEPSEEK_ENDPOINT,
headers=headers,
json=payload,
timeout=10 # 10์ดˆ ํƒ€์ž„์•„์›ƒ
)
# ์‘๋‹ต ํ™•์ธ
if response.status_code == 200:
logger.info("DeepSeek API ์—ฐ๊ฒฐ ์„ฑ๊ณต")
return {
"success": True,
"message": "API ์—ฐ๊ฒฐ ์„ฑ๊ณต",
"status_code": response.status_code
}
else:
logger.error(f"DeepSeek API ์˜ค๋ฅ˜: ์ƒํƒœ ์ฝ”๋“œ {response.status_code}")
error_message = ""
try:
error_data = response.json()
error_message = error_data.get("error", {}).get("message", str(error_data))
except:
error_message = response.text
return {
"success": False,
"message": f"API ์˜ค๋ฅ˜: {error_message}",
"status_code": response.status_code
}
except requests.exceptions.Timeout:
logger.error("DeepSeek API ์š”์ฒญ ์‹œ๊ฐ„ ์ดˆ๊ณผ")
return {
"success": False,
"message": "API ์š”์ฒญ ์‹œ๊ฐ„ ์ดˆ๊ณผ",
"status_code": None
}
except requests.exceptions.ConnectionError:
logger.error("DeepSeek API ์—ฐ๊ฒฐ ์‹คํŒจ")
return {
"success": False,
"message": "API ์„œ๋ฒ„ ์—ฐ๊ฒฐ ์‹คํŒจ",
"status_code": None
}
except Exception as e:
logger.error(f"DeepSeek API ํ…Œ์ŠคํŠธ ์ค‘ ์˜ˆ์ƒ์น˜ ๋ชปํ•œ ์˜ค๋ฅ˜: {e}", exc_info=True)
return {
"success": False,
"message": f"์˜ˆ์ƒ์น˜ ๋ชปํ•œ ์˜ค๋ฅ˜: {str(e)}",
"status_code": None
}
# ๋ฒกํ„ฐ ๊ฒ€์ƒ‰ ์„ค์ •
TOP_K_RETRIEVAL = int(get_env("TOP_K_RETRIEVAL", "5")) # ๋ฒกํ„ฐ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ์ˆ˜
TOP_K_RERANK = int(get_env("TOP_K_RERANK", "3")) # ๋ฆฌ๋žญํ‚น ํ›„ ์„ ํƒํ•  ๊ฒฐ๊ณผ ์ˆ˜
# ๋กœ๊น… ์„ค์ •
LOG_LEVEL = get_env("LOG_LEVEL", "INFO")
LOG_FILE = get_env("LOG_FILE", "autorag.log")
# ์„ค์ • ์ •๋ณด ์ถœ๋ ฅ (๋””๋ฒ„๊น…์šฉ)
def print_config():
"""ํ˜„์žฌ ์„ค์ • ์ •๋ณด๋ฅผ ๋กœ๊ทธ์— ์ถœ๋ ฅ"""
logger.info("===== ํ˜„์žฌ ์„ค์ • ์ •๋ณด =====")
logger.info(f"์‹คํ–‰ ํ™˜๊ฒฝ: {'HuggingFace Spaces' if IS_HUGGINGFACE else '๋กœ์ปฌ'}")
logger.info(f"๋ฌธ์„œ ๋””๋ ‰ํ† ๋ฆฌ: {PDF_DIRECTORY}")
logger.info(f"์บ์‹œ ๋””๋ ‰ํ† ๋ฆฌ: {CACHE_DIRECTORY}")
logger.info(f"์ฒญํฌ ํฌ๊ธฐ: {CHUNK_SIZE}, ์˜ค๋ฒ„๋žฉ: {CHUNK_OVERLAP}")
logger.info(f"OpenAI ์‚ฌ์šฉ: {USE_OPENAI}")
logger.info(f"DeepSeek ์‚ฌ์šฉ: {USE_DEEPSEEK}")
logger.info(f"LLM ๋ชจ๋ธ: {LLM_MODEL}")
if not USE_OPENAI and not USE_DEEPSEEK and not IS_HUGGINGFACE:
logger.info(f"Ollama ํ˜ธ์ŠคํŠธ: {OLLAMA_HOST}")
logger.info(f"์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ: {EMBEDDING_MODEL}")
logger.info(f"๋ฆฌ๋žญ์ปค ๋ชจ๋ธ: {RERANKER_MODEL}")
logger.info(f"TOP_K ๊ฒ€์ƒ‰: {TOP_K_RETRIEVAL}, ๋ฆฌ๋žญํ‚น: {TOP_K_RERANK}")
logger.info("=========================")
# ์„ค์ • ์œ ํšจ์„ฑ ๊ฒ€์‚ฌ
def validate_config() -> Dict[str, Any]:
"""
ํ˜„์žฌ ์„ค์ •์˜ ์œ ํšจ์„ฑ์„ ๊ฒ€์‚ฌํ•˜๊ณ  ๊ฒฝ๊ณ ๋‚˜ ์˜ค๋ฅ˜๋ฅผ ๋กœ๊ทธ์— ๊ธฐ๋ก
Returns:
๊ฒ€์ฆ ๊ฒฐ๊ณผ (status: ์ƒํƒœ, warnings: ๊ฒฝ๊ณ  ๋ชฉ๋ก)
"""
warnings = []
# ๋””๋ ‰ํ† ๋ฆฌ ํ™•์ธ
if not os.path.exists(PDF_DIRECTORY):
warnings.append(f"PDF ๋””๋ ‰ํ† ๋ฆฌ({PDF_DIRECTORY})๊ฐ€ ์กด์žฌํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค.")
# API ํ‚ค ํ™•์ธ (ํ—ˆ๊น…ํŽ˜์ด์Šค์™€ ๋กœ์ปฌ ํ™˜๊ฒฝ ๊ตฌ๋ถ„)
if IS_HUGGINGFACE:
if USE_DEEPSEEK and not DEEPSEEK_API_KEY:
warnings.append("ํ—ˆ๊น…ํŽ˜์ด์Šค ํ™˜๊ฒฝ์—์„œ DeepSeek ์‚ฌ์šฉ์ด ์„ค์ •๋˜์—ˆ์ง€๋งŒ API ํ‚ค๊ฐ€ ์ œ๊ณต๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
else:
if USE_OPENAI and not OPENAI_API_KEY:
warnings.append("OpenAI ์‚ฌ์šฉ์ด ์„ค์ •๋˜์—ˆ์ง€๋งŒ API ํ‚ค๊ฐ€ ์ œ๊ณต๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
if USE_DEEPSEEK and not DEEPSEEK_API_KEY:
warnings.append("DeepSeek ์‚ฌ์šฉ์ด ์„ค์ •๋˜์—ˆ์ง€๋งŒ API ํ‚ค๊ฐ€ ์ œ๊ณต๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
# ๋ชจ๋ธ ๋ฐ ์„ค์ • ๊ฐ’ ํ™•์ธ
if CHUNK_SIZE <= CHUNK_OVERLAP:
warnings.append(f"์ฒญํฌ ํฌ๊ธฐ({CHUNK_SIZE})๊ฐ€ ์˜ค๋ฒ„๋žฉ({CHUNK_OVERLAP})๋ณด๋‹ค ์ž‘๊ฑฐ๋‚˜ ๊ฐ™์Šต๋‹ˆ๋‹ค.")
# DeepSeek API ์—ฐ๊ฒฐ ํ™•์ธ (์„ค์ •๋œ ๊ฒฝ์šฐ)
if USE_DEEPSEEK and DEEPSEEK_API_KEY:
deepseek_test_result = test_deepseek_connection()
if not deepseek_test_result["success"]:
warnings.append(f"DeepSeek API ์—ฐ๊ฒฐ ํ…Œ์ŠคํŠธ ์‹คํŒจ: {deepseek_test_result['message']}")
# ๊ฒฐ๊ณผ ๊ธฐ๋ก
if warnings:
for warning in warnings:
logger.warning(warning)
return {
"status": "valid" if not warnings else "warnings",
"warnings": warnings
}
# ์„ค์ • ๋กœ๋“œ ์‹œ ์‹คํ–‰
print_config()
config_status = validate_config()