|
from langchain_core.runnables import RunnablePassthrough |
|
from langchain_core.output_parsers import StrOutputParser |
|
from langchain_community.chat_models import ChatOllama |
|
from langchain_core.prompts import ChatPromptTemplate |
|
from langchain_pinecone import PineconeVectorStore |
|
from langchain_community.embeddings import SentenceTransformerEmbeddings |
|
|
|
import os |
|
from dotenv import load_dotenv |
|
from langchain_community.retrievers import BM25Retriever |
|
from langchain.retrievers import EnsembleRetriever |
|
from kiwipiepy import Kiwi |
|
load_dotenv() |
|
|
|
kiwi = Kiwi() |
|
|
|
def kiwi_tokenize(text): |
|
return [token.form for token in kiwi.tokenize(text)] |
|
|
|
|
|
def retriever(pc, bm25): |
|
pcretriever = pc.as_retriever(search_kwargs={'k':4}) |
|
kiwi_bm25 = BM25Retriever.from_documents(bm25,preprocess_func=kiwi_tokenize) |
|
kiwi_bm25.k=4 |
|
|
|
kiwibm25_pc_37 = EnsembleRetriever( |
|
retrievers=[kiwi_bm25, pcretriever], |
|
weights=[0.3, 0.7], |
|
search_type="mmr", |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return kiwibm25_pc_37 |
|
|
|
|