Spaces:
Paused
Paused
""" | |
๋ฒกํฐ ์คํ ์ด, ์๋ฒ ๋ฉ ๋ชจ๋ธ, LLM ๋ฑ ๊ตฌ์ฑ ์์ ์ค์ | |
ํ๊ฒฝ ๋ณ์ ๋ฐ .env ํ์ผ ํ์ฉ ๊ฐ์ ๋ฒ์ - HuggingFace ํ๊ฒฝ ์ง์ ์ถ๊ฐ | |
""" | |
import os | |
import logging | |
import sys | |
import re | |
import requests | |
import json | |
from pathlib import Path | |
from typing import Dict, Any | |
from dotenv import load_dotenv | |
# ๋ก๊น ์ค์ | |
logger = logging.getLogger("Config") | |
# ํ์ฌ ์คํ ์์น ํ์ธ (๋๋ฒ๊น ์ฉ) | |
script_dir = os.path.dirname(os.path.abspath(__file__)) | |
logger.info(f"์คํฌ๋ฆฝํธ ๋๋ ํ ๋ฆฌ: {script_dir}") | |
logger.info(f"ํ์ฌ ์์ ๋๋ ํ ๋ฆฌ: {os.getcwd()}") | |
logger.info(f"์ด์ ์ฒด์ : {os.name}") | |
# ํ๊ฒฝ ๊ฐ์ง - HuggingFace Space ํ๊ฒฝ์ธ์ง ํ์ธ | |
IS_HUGGINGFACE = False | |
if os.getenv('SPACE_ID') is not None or os.getenv('SYSTEM') == 'spaces': | |
IS_HUGGINGFACE = True | |
logger.info("HuggingFace Spaces ํ๊ฒฝ์ด ๊ฐ์ง๋์์ต๋๋ค.") | |
else: | |
# ๋ก์ปฌ ํ๊ฒฝ์ธ ๊ฒฝ์ฐ .env ํ์ผ ๋ก๋ | |
# .env ํ์ผ ์์น ํ๋ณด๋ค | |
env_paths = [ | |
".env", # ํ์ฌ ๋๋ ํ ๋ฆฌ | |
os.path.join(script_dir, ".env"), # ์คํฌ๋ฆฝํธ ๋๋ ํ ๋ฆฌ | |
os.path.join(script_dir, "config", ".env"), # config ํ์ ๋๋ ํ ๋ฆฌ | |
os.path.join(os.path.dirname(script_dir), ".env"), # ์์ ๋๋ ํ ๋ฆฌ | |
] | |
# .env ํ์ผ ์ฐพ์์ ๋ก๋ | |
env_loaded = False | |
for env_path in env_paths: | |
if os.path.isfile(env_path): | |
logger.info(f".env ํ์ผ ๋ฐ๊ฒฌ: {env_path}") | |
env_loaded = load_dotenv(env_path, verbose=True) | |
if env_loaded: | |
logger.info(f".env ํ์ผ ๋ก๋ ์ฑ๊ณต: {env_path}") | |
break | |
if not env_loaded: | |
logger.warning(".env ํ์ผ์ ์ฐพ์ ์ ์์ต๋๋ค. ๊ธฐ๋ณธ๊ฐ ๋๋ ์์คํ ํ๊ฒฝ ๋ณ์๋ฅผ ์ฌ์ฉํฉ๋๋ค.") | |
logger.info(f"๋ก์ปฌ ํ๊ฒฝ์์ ์คํ ์ค์ ๋๋ค. (OS: {'Windows' if os.name == 'nt' else 'Unix/Linux/MacOS'})") | |
# Windows ํ๊ฒฝ ๊ฐ์ง | |
IS_WINDOWS = os.name == 'nt' | |
# ์ ํธ๋ฆฌํฐ ํจ์: ํ๊ฒฝ ๋ณ์ ๊ฐ์ ธ์ค๊ธฐ (HuggingFace ํ๊ฒฝ๊ณผ ๋ก์ปฌ ํ๊ฒฝ ๊ตฌ๋ถ) | |
def get_env(key: str, default: Any = None, required: bool = False) -> Any: | |
""" | |
ํ๊ฒฝ ๋ณ์๋ฅผ ๊ฐ์ ธ์ค๋ ์ ํธ๋ฆฌํฐ ํจ์ (HuggingFace ํ๊ฒฝ ์ง์) | |
Args: | |
key: ํ๊ฒฝ ๋ณ์ ํค | |
default: ํ๊ฒฝ ๋ณ์๊ฐ ์์ ๊ฒฝ์ฐ ๊ธฐ๋ณธ๊ฐ | |
required: ํ๊ฒฝ ๋ณ์๊ฐ ํ์์ ์ธ์ง ์ฌ๋ถ | |
Returns: | |
ํ๊ฒฝ ๋ณ์ ๊ฐ ๋๋ ๊ธฐ๋ณธ๊ฐ | |
""" | |
# HuggingFace Spaces ํ๊ฒฝ์์๋ ๋ด๋ถ ํ๊ฒฝ๋ณ์ ํ์ฉ | |
if IS_HUGGINGFACE: | |
# HuggingFace Spaces์์๋ ์ํฌ๋ฆฟ ๊ฐ์ ์ง์ ์ฌ์ฉ | |
# HF_SECRET_<KEY> ํ์์ผ๋ก ์ ์ฅ๋ ์ํฌ๋ฆฟ ํ์ธ | |
hf_secret_key = f"HF_SECRET_{key.upper()}" | |
value = os.getenv(hf_secret_key) | |
# ์ํฌ๋ฆฟ์ด ์์ผ๋ฉด ์ผ๋ฐ ํ๊ฒฝ๋ณ์ ํ์ธ | |
if value is None: | |
value = os.getenv(key, default) | |
else: | |
# ๋ก์ปฌ ํ๊ฒฝ์์๋ ์ผ๋ฐ์ ์ธ ๋ฐฉ์์ผ๋ก ํ๊ฒฝ๋ณ์ ๊ฐ์ ธ์ค๊ธฐ | |
value = os.getenv(key, default) | |
if required and value is None: | |
if IS_HUGGINGFACE: | |
error_msg = f"ํ์ ํ๊ฒฝ ๋ณ์ {key}๊ฐ ์ค์ ๋์ง ์์์ต๋๋ค. HuggingFace Space์์ ์ํฌ๋ฆฟ์ ์ค์ ํด์ฃผ์ธ์." | |
logger.error(error_msg) | |
raise ValueError(error_msg) | |
else: | |
error_msg = f"ํ์ ํ๊ฒฝ ๋ณ์ {key}๊ฐ ์ค์ ๋์ง ์์์ต๋๋ค. .env ํ์ผ์ ์ถ๊ฐํด์ฃผ์ธ์." | |
logger.error(error_msg) | |
raise ValueError(error_msg) | |
return value | |
# ๊ฒฝ๋ก ์์ฑ ์ ํธ๋ฆฌํฐ ํจ์ | |
def ensure_absolute_path(path_str: str) -> str: | |
""" | |
์๋ ๊ฒฝ๋ก๋ฅผ ์ ๋ ๊ฒฝ๋ก๋ก ๋ณํ (Windows ๊ฒฝ๋ก ์ง์) | |
Args: | |
path_str: ๋ณํํ ๊ฒฝ๋ก ๋ฌธ์์ด | |
Returns: | |
์ ๋ ๊ฒฝ๋ก | |
""" | |
# Windows ๋๋ผ์ด๋ธ ๋ฌธ์(C:\ ๋ฑ)๋ก ์์ํ๋ ๊ฒฝ๋ก ํ์ธ | |
if IS_WINDOWS and re.match(r'^[a-zA-Z]:\\', path_str): | |
logger.info(f"Windows ์ ๋ ๊ฒฝ๋ก ๊ฐ์ง: {path_str}") | |
# Windows ์ ๋ ๊ฒฝ๋ก๋ ๊ทธ๋๋ก ์ฌ์ฉ | |
return path_str | |
path = Path(path_str) | |
if path.is_absolute(): | |
return str(path) | |
# ์คํฌ๋ฆฝํธ ๋๋ ํ ๋ฆฌ ๊ธฐ์ค ๊ฒฝ๋ก | |
script_based_path = Path(script_dir) / path | |
# ํ์ฌ ์์ ๋๋ ํ ๋ฆฌ ๊ธฐ์ค ๊ฒฝ๋ก | |
cwd_based_path = Path.cwd() / path | |
# ๋ ๊ฒฝ๋ก ์ค ์กด์ฌํ๋ ๊ฒฝ๋ก ์ฐ์ ์ฌ์ฉ | |
if script_based_path.exists(): | |
return str(script_based_path) | |
elif cwd_based_path.exists(): | |
return str(cwd_based_path) | |
else: | |
# ๊ธฐ๋ณธ์ ์ผ๋ก ํ์ฌ ์์ ๋๋ ํ ๋ฆฌ ๊ธฐ์ค ๊ฒฝ๋ก ๋ฐํ | |
return str(cwd_based_path) | |
# Windows ๊ฒฝ๋ก ์ฒ๋ฆฌ๋ฅผ ์ํ ์ ํธ๋ฆฌํฐ ํจ์ | |
def normalize_path(path_str: str) -> str: | |
""" | |
๊ฒฝ๋ก ๋ฌธ์์ด์ ์ ๊ทํํ์ฌ OS์ ๋ง๊ฒ ๋ณํ | |
Args: | |
path_str: ๋ณํํ ๊ฒฝ๋ก ๋ฌธ์์ด | |
Returns: | |
์ ๊ทํ๋ ๊ฒฝ๋ก | |
""" | |
# Windows ๊ฒฝ๋ก ํ์('\')์ OS์ ๋ง๊ฒ ๋ณํ | |
return os.path.normpath(path_str) | |
# ๊ธฐ๋ณธ ๋๋ ํ ๋ฆฌ ์ค์ (์ ๋ ๊ฒฝ๋ก๋ก ๋ณํ) | |
PDF_DIRECTORY_RAW = get_env("PDF_DIRECTORY", "documents") | |
# Windows ๋ฐฑ์ฌ๋์ ์ด์ค ์ฒ๋ฆฌ๋ฅผ ์ํด ์ ๊ทํ | |
PDF_DIRECTORY_RAW = normalize_path(PDF_DIRECTORY_RAW) | |
PDF_DIRECTORY = ensure_absolute_path(PDF_DIRECTORY_RAW) | |
CACHE_DIRECTORY_RAW = get_env("CACHE_DIRECTORY", "cached_data") | |
CACHE_DIRECTORY_RAW = normalize_path(CACHE_DIRECTORY_RAW) | |
CACHE_DIRECTORY = ensure_absolute_path(CACHE_DIRECTORY_RAW) | |
logger.info(f"PDF ๋๋ ํ ๋ฆฌ (์๋ณธ): {PDF_DIRECTORY_RAW}") | |
logger.info(f"PDF ๋๋ ํ ๋ฆฌ (์ ๋): {PDF_DIRECTORY}") | |
logger.info(f"์บ์ ๋๋ ํ ๋ฆฌ (์๋ณธ): {CACHE_DIRECTORY_RAW}") | |
logger.info(f"์บ์ ๋๋ ํ ๋ฆฌ (์ ๋): {CACHE_DIRECTORY}") | |
# ์ฒญํน ์ค์ | |
CHUNK_SIZE = int(get_env("CHUNK_SIZE", "1000")) | |
CHUNK_OVERLAP = int(get_env("CHUNK_OVERLAP", "200")) | |
# API ํค ๋ฐ ํ๊ฒฝ ์ค์ | |
OPENAI_API_KEY = get_env("OPENAI_API_KEY", "") | |
LANGFUSE_PUBLIC_KEY = get_env("LANGFUSE_PUBLIC_KEY", "") | |
LANGFUSE_SECRET_KEY = get_env("LANGFUSE_SECRET_KEY", "") | |
LANGFUSE_HOST = get_env("LANGFUSE_HOST", "https://cloud.langfuse.com") | |
# DeepSeek ๊ด๋ จ ์ค์ ์ถ๊ฐ | |
DEEPSEEK_API_KEY = get_env("DEEPSEEK_API_KEY", "") | |
DEEPSEEK_ENDPOINT = get_env("DEEPSEEK_ENDPOINT", "https://api.deepseek.com/v1/chat/completions") | |
DEEPSEEK_MODEL = get_env("DEEPSEEK_MODEL", "deepseek-chat") | |
# ํ๊น ํ์ด์ค ํ๊ฒฝ์์ API ํค ํ์ธ ๋ฐ ๋ก๊ทธ ์ถ๋ ฅ | |
if IS_HUGGINGFACE: | |
logger.info(f"ํ๊น ํ์ด์ค ํ๊ฒฝ์์ DeepSeek API ํค ์กด์ฌ ์ฌ๋ถ: {bool(DEEPSEEK_API_KEY)}") | |
# ๋ณด์์ ์ํด API ํค ์ฒซ 4์๋ฆฌ์ ๋ง์ง๋ง 4์๋ฆฌ๋ง ํ์ (ํค๊ฐ ์กด์ฌํ๋ ๊ฒฝ์ฐ) | |
if DEEPSEEK_API_KEY: | |
masked_key = DEEPSEEK_API_KEY[:4] + "****" + DEEPSEEK_API_KEY[-4:] if len(DEEPSEEK_API_KEY) > 8 else "****" | |
logger.info(f"DeepSeek API ํค: {masked_key}") | |
logger.info(f"DeepSeek ๋ชจ๋ธ: {DEEPSEEK_MODEL}") | |
logger.info(f"DeepSeek ์๋ํฌ์ธํธ: {DEEPSEEK_ENDPOINT}") | |
# Milvus ๋ฒกํฐ DB ์ค์ | |
MILVUS_HOST = get_env("MILVUS_HOST", "localhost") | |
MILVUS_PORT = get_env("MILVUS_PORT", "19530") | |
MILVUS_COLLECTION = get_env("MILVUS_COLLECTION", "pdf_documents") | |
# ์๋ฒ ๋ฉ ๋ชจ๋ธ ์ค์ | |
EMBEDDING_MODEL = get_env("EMBEDDING_MODEL", "Alibaba-NLP/gte-multilingual-base") # ๋ค๊ตญ์ด ์ง์ ๋ชจ๋ธ | |
RERANKER_MODEL = get_env("RERANKER_MODEL", "Alibaba-NLP/gte-multilingual-reranker-base") # ๋ค๊ตญ์ด ์ง์ ๋ฆฌ๋ญ์ปค | |
# LLM ๋ชจ๋ธ ์ค์ (ํ๊ฒฝ์ ๋ฐ๋ผ ์๋ ์ ํ) | |
USE_OPENAI = get_env("USE_OPENAI", "False").lower() == "true" | |
USE_DEEPSEEK = get_env("USE_DEEPSEEK", "False").lower() == "true" | |
# ํ๊น ํ์ด์ค ํ๊ฒฝ์์๋ DeepSeek ์ฐ์ ์ฌ์ฉ | |
if IS_HUGGINGFACE: | |
# ํ๊น ํ์ด์ค ํ๊ฒฝ์์ DeepSeek API ํค๊ฐ ์๋์ง ํ์ธ | |
if DEEPSEEK_API_KEY: | |
USE_DEEPSEEK = True | |
USE_OPENAI = False | |
LLM_MODEL = DEEPSEEK_MODEL | |
logger.info("HuggingFace Spaces ํ๊ฒฝ: DeepSeek ๋ชจ๋ธ ์ฌ์ฉ") | |
else: | |
logger.warning("HuggingFace Spaces ํ๊ฒฝ์์ DeepSeek API ํค๊ฐ ์ค์ ๋์ง ์์์ต๋๋ค.") | |
USE_DEEPSEEK = False | |
USE_OPENAI = False # ๊ธฐ๋ณธ์ ์ผ๋ก API ํค๊ฐ ์์ผ๋ฉด ๋นํ์ฑํ | |
LLM_MODEL = get_env("LLM_MODEL", "gemma3:latest") # ๋์ฒด ๋ชจ๋ธ ์ค์ | |
logger.info(f"HuggingFace Spaces ํ๊ฒฝ: DeepSeek API ํค ์์, LLM ๋ชจ๋ธ: {LLM_MODEL}") | |
else: | |
# ๋ก์ปฌ ํ๊ฒฝ์์๋ ์ค์ ์ ๋ฐ๋ผ LLM ์ ํ | |
if USE_DEEPSEEK: | |
LLM_MODEL = DEEPSEEK_MODEL | |
logger.info(f"๋ก์ปฌ ํ๊ฒฝ: DeepSeek ๋ชจ๋ธ ์ฌ์ฉ ({DEEPSEEK_MODEL})") | |
elif USE_OPENAI: | |
LLM_MODEL = get_env("LLM_MODEL", "gpt-3.5-turbo") | |
logger.info(f"๋ก์ปฌ ํ๊ฒฝ: OpenAI ๋ชจ๋ธ ์ฌ์ฉ ({LLM_MODEL})") | |
else: | |
LLM_MODEL = get_env("LLM_MODEL", "gemma3:latest") | |
OLLAMA_HOST = get_env("OLLAMA_HOST", "http://localhost:11434") | |
logger.info(f"๋ก์ปฌ ํ๊ฒฝ: Ollama ๋ชจ๋ธ ์ฌ์ฉ ({LLM_MODEL})") | |
# API ํค ๊ฒ์ฆ (๋ก์ปฌ ํ๊ฒฝ๋ง) | |
if not IS_HUGGINGFACE: | |
if USE_DEEPSEEK and not DEEPSEEK_API_KEY: | |
logger.warning("DeepSeek ๋ชจ๋ธ์ด ์ ํ๋์์ง๋ง API ํค๊ฐ ์ค์ ๋์ง ์์์ต๋๋ค.") | |
USE_DEEPSEEK = False | |
USE_OPENAI = False | |
LLM_MODEL = get_env("LLM_MODEL", "gemma3:latest") | |
logger.info("DeepSeek API ํค๊ฐ ์์ด Ollama๋ก ํด๋ฐฑํฉ๋๋ค.") | |
elif USE_OPENAI and not OPENAI_API_KEY: | |
logger.warning("OpenAI ๋ชจ๋ธ์ด ์ ํ๋์์ง๋ง API ํค๊ฐ ์ค์ ๋์ง ์์์ต๋๋ค.") | |
logger.warning("OpenAI API ํค๊ฐ ์์ด Ollama๋ก ํด๋ฐฑํฉ๋๋ค.") | |
USE_OPENAI = False | |
LLM_MODEL = get_env("LLM_MODEL", "gemma3:latest") | |
# DeepSeek API ํ ์คํธ ํจ์ | |
def test_deepseek_connection(): | |
""" | |
DeepSeek API ์ฐ๊ฒฐ ํ ์คํธ | |
Returns: | |
ํ ์คํธ ๊ฒฐ๊ณผ ๋์ ๋๋ฆฌ (์ฑ๊ณต ์ฌ๋ถ ๋ฐ ๋ฉ์์ง) | |
""" | |
if not DEEPSEEK_API_KEY: | |
logger.warning("DeepSeek API ํค๊ฐ ์ค์ ๋์ง ์์ ํ ์คํธ๋ฅผ ๊ฑด๋๋๋๋ค.") | |
return { | |
"success": False, | |
"message": "API ํค๊ฐ ์ค์ ๋์ง ์์์ต๋๋ค.", | |
"status_code": None | |
} | |
try: | |
logger.info(f"DeepSeek API ์ฐ๊ฒฐ ํ ์คํธ ์์: {DEEPSEEK_ENDPOINT}, ๋ชจ๋ธ: {DEEPSEEK_MODEL}") | |
# ํ ์คํธ์ฉ ๊ฐ๋จํ ํ๋กฌํํธ | |
test_prompt = "Hello, please respond with a short greeting." | |
# API ์์ฒญ ํค๋ ๋ฐ ๋ฐ์ดํฐ | |
headers = { | |
"Content-Type": "application/json", | |
"Authorization": f"Bearer {DEEPSEEK_API_KEY}" | |
} | |
payload = { | |
"model": DEEPSEEK_MODEL, | |
"messages": [{"role": "user", "content": test_prompt}], | |
"temperature": 0.7, | |
"max_tokens": 50 | |
} | |
# API ์์ฒญ ์ ์ก | |
response = requests.post( | |
DEEPSEEK_ENDPOINT, | |
headers=headers, | |
json=payload, | |
timeout=10 # 10์ด ํ์์์ | |
) | |
# ์๋ต ํ์ธ | |
if response.status_code == 200: | |
logger.info("DeepSeek API ์ฐ๊ฒฐ ์ฑ๊ณต") | |
return { | |
"success": True, | |
"message": "API ์ฐ๊ฒฐ ์ฑ๊ณต", | |
"status_code": response.status_code | |
} | |
else: | |
logger.error(f"DeepSeek API ์ค๋ฅ: ์ํ ์ฝ๋ {response.status_code}") | |
error_message = "" | |
try: | |
error_data = response.json() | |
error_message = error_data.get("error", {}).get("message", str(error_data)) | |
except: | |
error_message = response.text | |
return { | |
"success": False, | |
"message": f"API ์ค๋ฅ: {error_message}", | |
"status_code": response.status_code | |
} | |
except requests.exceptions.Timeout: | |
logger.error("DeepSeek API ์์ฒญ ์๊ฐ ์ด๊ณผ") | |
return { | |
"success": False, | |
"message": "API ์์ฒญ ์๊ฐ ์ด๊ณผ", | |
"status_code": None | |
} | |
except requests.exceptions.ConnectionError: | |
logger.error("DeepSeek API ์ฐ๊ฒฐ ์คํจ") | |
return { | |
"success": False, | |
"message": "API ์๋ฒ ์ฐ๊ฒฐ ์คํจ", | |
"status_code": None | |
} | |
except Exception as e: | |
logger.error(f"DeepSeek API ํ ์คํธ ์ค ์์์น ๋ชปํ ์ค๋ฅ: {e}", exc_info=True) | |
return { | |
"success": False, | |
"message": f"์์์น ๋ชปํ ์ค๋ฅ: {str(e)}", | |
"status_code": None | |
} | |
# ๋ฒกํฐ ๊ฒ์ ์ค์ | |
TOP_K_RETRIEVAL = int(get_env("TOP_K_RETRIEVAL", "5")) # ๋ฒกํฐ ๊ฒ์ ๊ฒฐ๊ณผ ์ | |
TOP_K_RERANK = int(get_env("TOP_K_RERANK", "3")) # ๋ฆฌ๋ญํน ํ ์ ํํ ๊ฒฐ๊ณผ ์ | |
# ๋ก๊น ์ค์ | |
LOG_LEVEL = get_env("LOG_LEVEL", "INFO") | |
LOG_FILE = get_env("LOG_FILE", "autorag.log") | |
# ์ค์ ์ ๋ณด ์ถ๋ ฅ (๋๋ฒ๊น ์ฉ) | |
def print_config(): | |
"""ํ์ฌ ์ค์ ์ ๋ณด๋ฅผ ๋ก๊ทธ์ ์ถ๋ ฅ""" | |
logger.info("===== ํ์ฌ ์ค์ ์ ๋ณด =====") | |
logger.info(f"์คํ ํ๊ฒฝ: {'HuggingFace Spaces' if IS_HUGGINGFACE else '๋ก์ปฌ'}") | |
logger.info(f"๋ฌธ์ ๋๋ ํ ๋ฆฌ: {PDF_DIRECTORY}") | |
logger.info(f"์บ์ ๋๋ ํ ๋ฆฌ: {CACHE_DIRECTORY}") | |
logger.info(f"์ฒญํฌ ํฌ๊ธฐ: {CHUNK_SIZE}, ์ค๋ฒ๋ฉ: {CHUNK_OVERLAP}") | |
logger.info(f"OpenAI ์ฌ์ฉ: {USE_OPENAI}") | |
logger.info(f"DeepSeek ์ฌ์ฉ: {USE_DEEPSEEK}") | |
logger.info(f"LLM ๋ชจ๋ธ: {LLM_MODEL}") | |
if not USE_OPENAI and not USE_DEEPSEEK and not IS_HUGGINGFACE: | |
logger.info(f"Ollama ํธ์คํธ: {OLLAMA_HOST}") | |
logger.info(f"์๋ฒ ๋ฉ ๋ชจ๋ธ: {EMBEDDING_MODEL}") | |
logger.info(f"๋ฆฌ๋ญ์ปค ๋ชจ๋ธ: {RERANKER_MODEL}") | |
logger.info(f"TOP_K ๊ฒ์: {TOP_K_RETRIEVAL}, ๋ฆฌ๋ญํน: {TOP_K_RERANK}") | |
logger.info("=========================") | |
# ์ค์ ์ ํจ์ฑ ๊ฒ์ฌ | |
def validate_config() -> Dict[str, Any]: | |
""" | |
ํ์ฌ ์ค์ ์ ์ ํจ์ฑ์ ๊ฒ์ฌํ๊ณ ๊ฒฝ๊ณ ๋ ์ค๋ฅ๋ฅผ ๋ก๊ทธ์ ๊ธฐ๋ก | |
Returns: | |
๊ฒ์ฆ ๊ฒฐ๊ณผ (status: ์ํ, warnings: ๊ฒฝ๊ณ ๋ชฉ๋ก) | |
""" | |
warnings = [] | |
# ๋๋ ํ ๋ฆฌ ํ์ธ | |
if not os.path.exists(PDF_DIRECTORY): | |
warnings.append(f"PDF ๋๋ ํ ๋ฆฌ({PDF_DIRECTORY})๊ฐ ์กด์ฌํ์ง ์์ต๋๋ค.") | |
# API ํค ํ์ธ (ํ๊น ํ์ด์ค์ ๋ก์ปฌ ํ๊ฒฝ ๊ตฌ๋ถ) | |
if IS_HUGGINGFACE: | |
if USE_DEEPSEEK and not DEEPSEEK_API_KEY: | |
warnings.append("ํ๊น ํ์ด์ค ํ๊ฒฝ์์ DeepSeek ์ฌ์ฉ์ด ์ค์ ๋์์ง๋ง API ํค๊ฐ ์ ๊ณต๋์ง ์์์ต๋๋ค.") | |
else: | |
if USE_OPENAI and not OPENAI_API_KEY: | |
warnings.append("OpenAI ์ฌ์ฉ์ด ์ค์ ๋์์ง๋ง API ํค๊ฐ ์ ๊ณต๋์ง ์์์ต๋๋ค.") | |
if USE_DEEPSEEK and not DEEPSEEK_API_KEY: | |
warnings.append("DeepSeek ์ฌ์ฉ์ด ์ค์ ๋์์ง๋ง API ํค๊ฐ ์ ๊ณต๋์ง ์์์ต๋๋ค.") | |
# ๋ชจ๋ธ ๋ฐ ์ค์ ๊ฐ ํ์ธ | |
if CHUNK_SIZE <= CHUNK_OVERLAP: | |
warnings.append(f"์ฒญํฌ ํฌ๊ธฐ({CHUNK_SIZE})๊ฐ ์ค๋ฒ๋ฉ({CHUNK_OVERLAP})๋ณด๋ค ์๊ฑฐ๋ ๊ฐ์ต๋๋ค.") | |
# DeepSeek API ์ฐ๊ฒฐ ํ์ธ (์ค์ ๋ ๊ฒฝ์ฐ) | |
if USE_DEEPSEEK and DEEPSEEK_API_KEY: | |
deepseek_test_result = test_deepseek_connection() | |
if not deepseek_test_result["success"]: | |
warnings.append(f"DeepSeek API ์ฐ๊ฒฐ ํ ์คํธ ์คํจ: {deepseek_test_result['message']}") | |
# ๊ฒฐ๊ณผ ๊ธฐ๋ก | |
if warnings: | |
for warning in warnings: | |
logger.warning(warning) | |
return { | |
"status": "valid" if not warnings else "warnings", | |
"warnings": warnings | |
} | |
# ์ค์ ๋ก๋ ์ ์คํ | |
print_config() | |
config_status = validate_config() |