File size: 1,534 Bytes
6669581 daa3a87 6669581 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_community.chat_models import ChatOllama
from langchain_core.prompts import ChatPromptTemplate
from langchain_pinecone import PineconeVectorStore
from langchain_community.embeddings import SentenceTransformerEmbeddings
import os
from dotenv import load_dotenv
from langchain_community.retrievers import BM25Retriever
from langchain.retrievers import EnsembleRetriever
from kiwipiepy import Kiwi
load_dotenv()
kiwi = Kiwi()
def kiwi_tokenize(text):
return [token.form for token in kiwi.tokenize(text)]
# embedding_model = SentenceTransformerEmbeddings(model_name='BM-K/KoSimCSE-roberta-multitask', model_kwargs={"trust_remote_code":True})
def retriever(pc, bm25):
pcretriever = pc.as_retriever(search_kwargs={'k':4})
kiwi_bm25 = BM25Retriever.from_documents(bm25,preprocess_func=kiwi_tokenize)
kiwi_bm25.k=4
kiwibm25_pc_37 = EnsembleRetriever(
retrievers=[kiwi_bm25, pcretriever], # ์ฌ์ฉํ ๊ฒ์ ๋ชจ๋ธ์ ๋ฆฌ์คํธ
weights=[0.3, 0.7], # ๊ฐ ๊ฒ์ ๋ชจ๋ธ์ ๊ฒฐ๊ณผ์ ์ ์ฉํ ๊ฐ์ค์น
search_type="mmr", # ๊ฒ์ ๊ฒฐ๊ณผ์ ๋ค์์ฑ์ ์ฆ์ง์ํค๋ MMR ๋ฐฉ์์ ์ฌ์ฉ
)
# Pinecone vector store ์ด๊ธฐํ
# vectorstore = PineconeVectorStore(
# index_name=os.getenv("INDEX_NAME"), embedding=embedding_model
# )
# retriever = vectorstore.as_retriever(search_kwargs={'k': 2})
return kiwibm25_pc_37
|