|
""" |
|
๊ฐ์ ๋ ๋ฒกํฐ ์คํ ์ด ๋ชจ๋ - Milvus ์ค์ ์ต์ ํ ๋ฐ ์์ธ ์ฒ๋ฆฌ ๊ฐํ |
|
""" |
|
import os |
|
import logging |
|
from typing import List, Dict, Any, Optional |
|
import uuid |
|
from langchain.schema import Document |
|
|
|
|
|
logger = logging.getLogger("VectorStore") |
|
|
|
|
|
class VectorStoreInitError(Exception): |
|
"""๋ฒกํฐ ์คํ ์ด ์ด๊ธฐํ ์ค ๋ฐ์ํ ์ค๋ฅ""" |
|
pass |
|
|
|
class EmbeddingModelError(Exception): |
|
"""์๋ฒ ๋ฉ ๋ชจ๋ธ ์ด๊ธฐํ ์ค ๋ฐ์ํ ์ค๋ฅ""" |
|
pass |
|
|
|
class DocumentIndexError(Exception): |
|
"""๋ฌธ์ ์ธ๋ฑ์ฑ ์ค ๋ฐ์ํ ์ค๋ฅ""" |
|
pass |
|
|
|
class VectorSearchError(Exception): |
|
"""๋ฒกํฐ ๊ฒ์ ์ค ๋ฐ์ํ ์ค๋ฅ""" |
|
pass |
|
|
|
class PersistenceError(Exception): |
|
"""์ธ๋ฑ์ค ์ ์ฅ/๋ก๋ ์ค ๋ฐ์ํ ์ค๋ฅ""" |
|
pass |
|
|
|
|
|
try: |
|
|
|
from langchain_milvus import Milvus |
|
from langchain_community.vectorstores import FAISS |
|
from langchain_huggingface import HuggingFaceEmbeddings |
|
MODERN_IMPORTS = True |
|
logger.info("์ต์ langchain ํจํค์ง ์ํฌํธ ์ฑ๊ณต") |
|
except ImportError: |
|
try: |
|
|
|
from langchain_community.vectorstores import Milvus, FAISS |
|
from langchain_community.embeddings import HuggingFaceEmbeddings |
|
MODERN_IMPORTS = False |
|
logger.info("๋ ๊ฑฐ์ langchain_community ํจํค์ง ์ฌ์ฉ") |
|
except ImportError as e: |
|
logger.error(f"ํ์ ๋ฒกํฐ ์คํ ์ด ๋ผ์ด๋ธ๋ฌ๋ฆฌ๋ฅผ ์ํฌํธํ ์ ์์ต๋๋ค: {e}") |
|
raise VectorStoreInitError(f"ํ์ ๋ฒกํฐ ์คํ ์ด ๋ผ์ด๋ธ๋ฌ๋ฆฌ๋ฅผ ์ํฌํธํ ์ ์์ต๋๋ค: {str(e)}") |
|
|
|
from config import MILVUS_HOST, MILVUS_PORT, MILVUS_COLLECTION, EMBEDDING_MODEL |
|
|
|
class VectorStore: |
|
def __init__(self, use_milvus: bool = True): |
|
""" |
|
๋ฒกํฐ ์คํ ์ด ์ด๊ธฐํ |
|
|
|
Args: |
|
use_milvus: Milvus ์ฌ์ฉ ์ฌ๋ถ (False์ด๋ฉด FAISS ์ฌ์ฉ) |
|
""" |
|
self.use_milvus = use_milvus |
|
self.vector_store = None |
|
|
|
|
|
logger.info(f"์๋ฒ ๋ฉ ๋ชจ๋ธ ๋ก๋ ์ค: {EMBEDDING_MODEL}") |
|
model_kwargs = { |
|
"device": "cpu", |
|
"trust_remote_code": True |
|
} |
|
encode_kwargs = {"normalize_embeddings": True} |
|
|
|
try: |
|
self.embeddings = HuggingFaceEmbeddings( |
|
model_name=EMBEDDING_MODEL, |
|
model_kwargs=model_kwargs, |
|
encode_kwargs=encode_kwargs |
|
) |
|
logger.info(f"์๋ฒ ๋ฉ ๋ชจ๋ธ ์ด๊ธฐํ ์๋ฃ: {EMBEDDING_MODEL}") |
|
except Exception as e: |
|
logger.error(f"์๋ฒ ๋ฉ ๋ชจ๋ธ ์ด๊ธฐํ ์คํจ: {e}", exc_info=True) |
|
raise EmbeddingModelError(f"์๋ฒ ๋ฉ ๋ชจ๋ธ '{EMBEDDING_MODEL}' ์ด๊ธฐํ ์คํจ: {str(e)}") |
|
|
|
def init_milvus(self) -> Milvus: |
|
""" |
|
Milvus ๋ฒกํฐ ์คํ ์ด ์ด๊ธฐํ |
|
|
|
Returns: |
|
Milvus ๋ฒกํฐ ์คํ ์ด ์ธ์คํด์ค |
|
""" |
|
try: |
|
connection_args = { |
|
"host": MILVUS_HOST, |
|
"port": MILVUS_PORT, |
|
} |
|
|
|
|
|
index_params = { |
|
"index_type": "FLAT", |
|
"metric_type": "COSINE", |
|
"params": {} |
|
} |
|
|
|
logger.info(f"Milvus ์ฐ๊ฒฐ ์๋ ์ค: {MILVUS_HOST}:{MILVUS_PORT}") |
|
milvus_store = Milvus( |
|
embedding_function=self.embeddings, |
|
collection_name=MILVUS_COLLECTION, |
|
connection_args=connection_args, |
|
index_params=index_params |
|
) |
|
logger.info(f"Milvus ์ฐ๊ฒฐ ์ฑ๊ณต: {MILVUS_COLLECTION}") |
|
return milvus_store |
|
except Exception as e: |
|
logger.error(f"Milvus ์ด๊ธฐํ ์คํจ: {e}", exc_info=True) |
|
raise VectorStoreInitError(f"Milvus ๋ฒกํฐ ์คํ ์ด ์ด๊ธฐํ ์คํจ: {str(e)}") |
|
|
|
def init_faiss(self) -> FAISS: |
|
""" |
|
FAISS ๋ฒกํฐ ์คํ ์ด ์ด๊ธฐํ (๋ก์ปฌ ๋์ฒด์ฉ) |
|
|
|
Returns: |
|
FAISS ๋ฒกํฐ ์คํ ์ด ์ธ์คํด์ค |
|
""" |
|
try: |
|
logger.info("FAISS ๋ฒกํฐ ์คํ ์ด ์ด๊ธฐํ ์ค") |
|
faiss_store = FAISS.from_documents([], self.embeddings) |
|
logger.info("FAISS ๋ฒกํฐ ์คํ ์ด ์ด๊ธฐํ ์๋ฃ") |
|
return faiss_store |
|
except Exception as e: |
|
logger.error(f"FAISS ์ด๊ธฐํ ์คํจ: {e}", exc_info=True) |
|
raise VectorStoreInitError(f"FAISS ๋ฒกํฐ ์คํ ์ด ์ด๊ธฐํ ์คํจ: {str(e)}") |
|
|
|
def create_or_load(self, documents: Optional[List[Document]] = None) -> Any: |
|
""" |
|
๋ฒกํฐ ์คํ ์ด ์์ฑ ๋๋ ๋ก๋ |
|
|
|
Args: |
|
documents: ์ ์ฅํ ๋ฌธ์ ๋ฆฌ์คํธ (None์ด๋ฉด ๋น ์คํ ์ด ์์ฑ) |
|
|
|
Returns: |
|
๋ฒกํฐ ์คํ ์ด ์ธ์คํด์ค |
|
""" |
|
if self.use_milvus: |
|
if documents: |
|
|
|
try: |
|
|
|
connection_args = { |
|
"host": MILVUS_HOST, |
|
"port": MILVUS_PORT, |
|
} |
|
|
|
|
|
index_params = { |
|
"index_type": "FLAT", |
|
"metric_type": "COSINE", |
|
"params": {} |
|
} |
|
|
|
logger.info(f"Milvus ์ปฌ๋ ์
์์ฑ ์ค: {MILVUS_COLLECTION} (๊ธฐ์กด ์ปฌ๋ ์
์ญ์ )") |
|
|
|
|
|
self.vector_store = Milvus.from_documents( |
|
documents=documents, |
|
embedding=self.embeddings, |
|
collection_name=MILVUS_COLLECTION, |
|
connection_args=connection_args, |
|
index_params=index_params, |
|
drop_old=True |
|
) |
|
|
|
logger.info(f"Milvus ์ปฌ๋ ์
์์ฑ ์๋ฃ: {len(documents)}๊ฐ ๋ฌธ์ ์ธ๋ฑ์ฑ๋จ") |
|
|
|
except Exception as e: |
|
logger.error(f"Milvus ์ปฌ๋ ์
์์ฑ ์คํจ: {e}", exc_info=True) |
|
|
|
logger.warning("Milvus ์คํจ๋ก FAISS๋ก ๋์ฒดํฉ๋๋ค") |
|
self.use_milvus = False |
|
try: |
|
self.vector_store = FAISS.from_documents(documents, self.embeddings) |
|
logger.info(f"FAISS๋ก ๋์ฒด ์ฑ๊ณต: {len(documents)}๊ฐ ๋ฌธ์ ์ธ๋ฑ์ฑ๋จ") |
|
except Exception as faiss_err: |
|
logger.error(f"FAISS ๋์ฒด ์คํจ: {faiss_err}", exc_info=True) |
|
raise DocumentIndexError(f"๋ฌธ์ ์ธ๋ฑ์ฑ ์คํจ (Milvus ๋ฐ FAISS): {str(e)} / {str(faiss_err)}") |
|
else: |
|
|
|
try: |
|
self.vector_store = self.init_milvus() |
|
except VectorStoreInitError as e: |
|
logger.error(f"Milvus ์ปฌ๋ ์
๋ก๋ ์คํจ: {e}") |
|
|
|
logger.warning("Milvus ์คํจ๋ก FAISS๋ก ๋์ฒดํฉ๋๋ค") |
|
self.use_milvus = False |
|
try: |
|
self.vector_store = self.init_faiss() |
|
except VectorStoreInitError as faiss_err: |
|
logger.error(f"FAISS ๋์ฒด ์คํจ: {faiss_err}", exc_info=True) |
|
raise VectorStoreInitError(f"๋ฒกํฐ ์คํ ์ด ์ด๊ธฐํ ์คํจ (Milvus ๋ฐ FAISS): {str(e)} / {str(faiss_err)}") |
|
else: |
|
|
|
if documents: |
|
try: |
|
logger.info(f"FAISS ์ธ๋ฑ์ค ์์ฑ ์ค: {len(documents)}๊ฐ ๋ฌธ์") |
|
self.vector_store = FAISS.from_documents(documents, self.embeddings) |
|
logger.info("FAISS ์ธ๋ฑ์ค ์์ฑ ์๋ฃ") |
|
except Exception as e: |
|
logger.error(f"FAISS ์ธ๋ฑ์ค ์์ฑ ์คํจ: {e}", exc_info=True) |
|
raise DocumentIndexError(f"FAISS ๋ฌธ์ ์ธ๋ฑ์ฑ ์คํจ: {str(e)}") |
|
else: |
|
try: |
|
self.vector_store = self.init_faiss() |
|
except VectorStoreInitError as e: |
|
|
|
raise |
|
|
|
return self.vector_store |
|
|
|
def add_documents(self, documents: List[Document]) -> None: |
|
""" |
|
๋ฒกํฐ ์คํ ์ด์ ๋ฌธ์ ์ถ๊ฐ |
|
|
|
Args: |
|
documents: ์ถ๊ฐํ ๋ฌธ์ ๋ฆฌ์คํธ |
|
""" |
|
if not documents: |
|
logger.warning("์ถ๊ฐํ ๋ฌธ์๊ฐ ์์ต๋๋ค") |
|
return |
|
|
|
try: |
|
if self.vector_store is None: |
|
logger.info("๋ฒกํฐ ์คํ ์ด๊ฐ ์ด๊ธฐํ๋์ง ์์์ต๋๋ค. ์ ๋ฒกํฐ ์คํ ์ด๋ฅผ ์์ฑํฉ๋๋ค.") |
|
self.create_or_load(documents) |
|
else: |
|
logger.info(f"{len(documents)}๊ฐ ๋ฌธ์๋ฅผ ๊ธฐ์กด ๋ฒกํฐ ์คํ ์ด์ ์ถ๊ฐํฉ๋๋ค") |
|
self.vector_store.add_documents(documents) |
|
logger.info(f"{len(documents)}๊ฐ ๋ฌธ์ ์ถ๊ฐ ์๋ฃ") |
|
except Exception as e: |
|
logger.error(f"๋ฌธ์ ์ถ๊ฐ ์คํจ: {e}", exc_info=True) |
|
raise DocumentIndexError(f"๋ฒกํฐ ์คํ ์ด์ ๋ฌธ์ ์ถ๊ฐ ์คํจ: {str(e)}") |
|
|
|
def similarity_search(self, query: str, k: int = 5) -> List[Document]: |
|
""" |
|
๋ฒกํฐ ์ ์ฌ๋ ๊ฒ์ ์ํ |
|
|
|
Args: |
|
query: ๊ฒ์ ์ฟผ๋ฆฌ |
|
k: ๋ฐํํ ๊ฒฐ๊ณผ ์ |
|
|
|
Returns: |
|
์ ์ฌ๋๊ฐ ๋์ ๋ฌธ์ ๋ฆฌ์คํธ |
|
""" |
|
if not query or not query.strip(): |
|
logger.warning("๋น ์ฟผ๋ฆฌ๋ก ๊ฒ์ ์๋") |
|
return [] |
|
|
|
if self.vector_store is None: |
|
logger.error("๋ฒกํฐ ์คํ ์ด๊ฐ ์ด๊ธฐํ๋์ง ์์์ต๋๋ค") |
|
raise VectorSearchError("๋ฒกํฐ ์คํ ์ด๊ฐ ์ด๊ธฐํ๋์ง ์์์ต๋๋ค") |
|
|
|
try: |
|
logger.info(f"๊ฒ์ ์ฟผ๋ฆฌ ์คํ: '{query[:50]}{'...' if len(query) > 50 else ''}', ์์ {k}๊ฐ ๊ฒฐ๊ณผ ์์ฒญ") |
|
results = self.vector_store.similarity_search(query, k=k) |
|
logger.info(f"๊ฒ์ ์๋ฃ: {len(results)}๊ฐ ๊ฒฐ๊ณผ ์ฐพ์") |
|
return results |
|
except Exception as e: |
|
logger.error(f"๊ฒ์ ์ค ์ค๋ฅ ๋ฐ์: {e}", exc_info=True) |
|
raise VectorSearchError(f"๋ฒกํฐ ๊ฒ์ ์คํจ: {str(e)}") |
|
|
|
def save_local(self, path: str = "faiss_index") -> bool: |
|
""" |
|
FAISS ์ธ๋ฑ์ค ๋ก์ปฌ ์ ์ฅ (Milvus ์ฌ์ฉ ์ ํ ๊ฒฝ์ฐ) |
|
|
|
Args: |
|
path: ์ ์ฅ ๊ฒฝ๋ก |
|
|
|
Returns: |
|
์ ์ฅ ์ฑ๊ณต ์ฌ๋ถ |
|
""" |
|
if self.vector_store is None: |
|
logger.error("์ ์ฅํ ๋ฒกํฐ ์คํ ์ด๊ฐ ์ด๊ธฐํ๋์ง ์์์ต๋๋ค") |
|
raise PersistenceError("์ ์ฅํ ๋ฒกํฐ ์คํ ์ด๊ฐ ์ด๊ธฐํ๋์ง ์์์ต๋๋ค") |
|
|
|
|
|
if not self.use_milvus: |
|
try: |
|
|
|
os.makedirs(os.path.dirname(path) if os.path.dirname(path) else path, exist_ok=True) |
|
|
|
self.vector_store.save_local(path) |
|
logger.info(f"FAISS ์ธ๋ฑ์ค ๋ก์ปฌ ์ ์ฅ ์๋ฃ: {path}") |
|
return True |
|
except Exception as e: |
|
logger.error(f"FAISS ์ธ๋ฑ์ค ์ ์ฅ ์คํจ: {e}", exc_info=True) |
|
raise PersistenceError(f"๋ฒกํฐ ์ธ๋ฑ์ค ์ ์ฅ ์คํจ: {str(e)}") |
|
else: |
|
logger.info("Milvus๋ ๋ก์ปฌ ์ ์ฅ์ด ํ์ํ์ง ์์ต๋๋ค") |
|
return True |
|
|
|
def load_local(self, path: str = "faiss_index") -> bool: |
|
""" |
|
FAISS ์ธ๋ฑ์ค ๋ก์ปฌ ๋ก๋ (Milvus ์ฌ์ฉ ์ ํ ๊ฒฝ์ฐ) |
|
|
|
Args: |
|
path: ๋ก๋ํ ์ธ๋ฑ์ค ๊ฒฝ๋ก |
|
|
|
Returns: |
|
๋ก๋ ์ฑ๊ณต ์ฌ๋ถ |
|
""" |
|
if self.use_milvus: |
|
logger.info("Milvus ์ฌ์ฉ ์ค์ด๋ฏ๋ก ๋ก์ปฌ ๋ก๋๋ฅผ ๊ฑด๋๋๋๋ค") |
|
try: |
|
|
|
self.vector_store = self.init_milvus() |
|
return True |
|
except Exception as e: |
|
logger.error(f"Milvus ์ฐ๊ฒฐ ์คํจ, FAISS๋ก ๋์ฒด: {e}") |
|
self.use_milvus = False |
|
|
|
|
|
if not os.path.exists(path): |
|
logger.warning(f"์ธ๋ฑ์ค ๊ฒฝ๋ก๊ฐ ์กด์ฌํ์ง ์์: {path}") |
|
raise FileNotFoundError(f"๋ฒกํฐ ์ธ๋ฑ์ค ๊ฒฝ๋ก๊ฐ ์กด์ฌํ์ง ์์: {path}") |
|
|
|
try: |
|
logger.info(f"FAISS ์ธ๋ฑ์ค ๋ก๋ ์ค: {path}") |
|
|
|
|
|
self.vector_store = FAISS.load_local( |
|
path, |
|
self.embeddings, |
|
allow_dangerous_deserialization=True |
|
) |
|
logger.info(f"FAISS ์ธ๋ฑ์ค ๋ก๋ ์๋ฃ: {path}") |
|
return True |
|
except FileNotFoundError as e: |
|
logger.error(f"FAISS ์ธ๋ฑ์ค ํ์ผ์ ์ฐพ์ ์ ์์: {e}") |
|
raise PersistenceError(f"๋ฒกํฐ ์ธ๋ฑ์ค ํ์ผ์ ์ฐพ์ ์ ์์: {str(e)}") |
|
except Exception as e: |
|
logger.error(f"FAISS ์ธ๋ฑ์ค ๋ก๋ ์คํจ: {e}", exc_info=True) |
|
|
|
|
|
import traceback |
|
logger.error(f"์์ธ ์ค๋ฅ: {traceback.format_exc()}") |
|
|
|
|
|
logger.warning("์ธ๋ฑ์ค ๋ก๋ ์คํจ๋ก ์ FAISS ์ธ๋ฑ์ค ์ด๊ธฐํ") |
|
self.vector_store = self.init_faiss() |
|
return False |