Upload 5 files
Browse files- make_answer.py +3 -0
- make_chain_gguh.py +32 -0
- make_chain_model.py +32 -0
- retrieve_docs.py +5 -0
- retriever.py +38 -0
make_answer.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
def result(rag_chain,question):
|
2 |
+
response= rag_chain.invoke(question)
|
3 |
+
print(f"[ํํ์ด์ ๋ต๋ณ]\n{response}")
|
make_chain_gguh.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_core.runnables import RunnablePassthrough
|
2 |
+
from langchain_core.output_parsers import StrOutputParser
|
3 |
+
from langchain_community.chat_models import ChatOllama
|
4 |
+
from langchain_core.prompts import ChatPromptTemplate
|
5 |
+
from langchain_pinecone import PineconeVectorStore
|
6 |
+
from langchain_community.embeddings import SentenceTransformerEmbeddings
|
7 |
+
|
8 |
+
def make_chain_ollama(retriever):
|
9 |
+
def format_docs(docs):
|
10 |
+
# ๊ฒ์ํ ๋ฌธ์ ๊ฒฐ๊ณผ๋ฅผ ํ๋์ ๋ฌธ๋จ์ผ๋ก ํฉ์ณ์ค๋๋ค.
|
11 |
+
return "\n\n".join(doc.page_content for doc in docs)
|
12 |
+
|
13 |
+
# LangChain์ด ์ง์ํ๋ ๋ค๋ฅธ ์ฑํ
๋ชจ๋ธ์ ์ฌ์ฉํฉ๋๋ค. ์ฌ๊ธฐ์๋ Ollama๋ฅผ ์ฌ์ฉํฉ๋๋ค.
|
14 |
+
llm = ChatOllama(model="zephyr:latest")
|
15 |
+
|
16 |
+
template = "\"```\" Below is an instruction that describes a task. Write a response that appropriately completes the request."\
|
17 |
+
"์ ์ํ๋ context์์๋ง ๋๋ตํ๊ณ context์ ์๋ ๋ด์ฉ์ ๋ชจ๋ฅด๊ฒ ๋ค๊ณ ๋๋ตํด"\
|
18 |
+
"make answer in korean. ํ๊ตญ์ด๋ก ๋๋ตํ์ธ์"\
|
19 |
+
"\n\nContext:\n{context}\n;"\
|
20 |
+
"Question: {question}"\
|
21 |
+
"\n\nAnswer:"
|
22 |
+
|
23 |
+
prompt = ChatPromptTemplate.from_template(template)
|
24 |
+
|
25 |
+
rag_chain = (
|
26 |
+
{"context": retriever| format_docs, "question": RunnablePassthrough()}
|
27 |
+
| prompt
|
28 |
+
| llm
|
29 |
+
| StrOutputParser()
|
30 |
+
)
|
31 |
+
|
32 |
+
return rag_chain
|
make_chain_model.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_core.runnables import RunnablePassthrough
|
2 |
+
from langchain_core.output_parsers import StrOutputParser
|
3 |
+
from langchain_community.chat_models import ChatOllama
|
4 |
+
from langchain_core.prompts import ChatPromptTemplate
|
5 |
+
from langchain_pinecone import PineconeVectorStore
|
6 |
+
from langchain_community.embeddings import SentenceTransformerEmbeddings
|
7 |
+
|
8 |
+
def make_chain_llm(retriever,llm):
|
9 |
+
def format_docs(docs):
|
10 |
+
# ๊ฒ์ํ ๋ฌธ์ ๊ฒฐ๊ณผ๋ฅผ ํ๋์ ๋ฌธ๋จ์ผ๋ก ํฉ์ณ์ค๋๋ค.
|
11 |
+
return "\n\n".join(doc.page_content for doc in docs)
|
12 |
+
|
13 |
+
# LangChain์ด ์ง์ํ๋ ๋ค๋ฅธ ์ฑํ
๋ชจ๋ธ์ ์ฌ์ฉํฉ๋๋ค. ์ฌ๊ธฐ์๋ Ollama๋ฅผ ์ฌ์ฉํฉ๋๋ค.
|
14 |
+
# llm = ChatOllama(model="zephyr:latest")
|
15 |
+
|
16 |
+
template = "\"```\" Below is an instruction that describes a task. Write a response that appropriately completes the request."\
|
17 |
+
"์ ์ํ๋ context์์๋ง ๋๋ตํ๊ณ context์ ์๋ ๋ด์ฉ์ ์์ฑํ์ง๋ง"\
|
18 |
+
"make answer in korean. ํ๊ตญ์ด๋ก ๋๋ตํ์ธ์"\
|
19 |
+
"\n\nContext:\n{context}\n;"\
|
20 |
+
"Question: {question}"\
|
21 |
+
"\n\nAnswer:"
|
22 |
+
|
23 |
+
prompt = ChatPromptTemplate.from_template(template)
|
24 |
+
|
25 |
+
rag_chain = (
|
26 |
+
{"context": retriever| format_docs, "question": RunnablePassthrough()}
|
27 |
+
| prompt
|
28 |
+
| llm
|
29 |
+
| StrOutputParser()
|
30 |
+
)
|
31 |
+
|
32 |
+
return rag_chain
|
retrieve_docs.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def print_search_results(retrievers, query):
|
2 |
+
print(f"Query: {query}")
|
3 |
+
for i in range(len(retrievers.invoke(query))):
|
4 |
+
print(f'์ฐพ์ ๋ฌธ์ฅ{i+1}:',
|
5 |
+
retrievers.invoke(query)[i].page_content)
|
retriever.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_core.runnables import RunnablePassthrough
|
2 |
+
from langchain_core.output_parsers import StrOutputParser
|
3 |
+
from langchain_community.chat_models import ChatOllama
|
4 |
+
from langchain_core.prompts import ChatPromptTemplate
|
5 |
+
from langchain_pinecone import PineconeVectorStore
|
6 |
+
from langchain_community.embeddings import SentenceTransformerEmbeddings
|
7 |
+
|
8 |
+
import os
|
9 |
+
from dotenv import load_dotenv
|
10 |
+
from langchain.retrievers import BM25Retriever, EnsembleRetriever
|
11 |
+
from kiwipiepy import Kiwi
|
12 |
+
load_dotenv()
|
13 |
+
|
14 |
+
kiwi = Kiwi()
|
15 |
+
|
16 |
+
def kiwi_tokenize(text):
|
17 |
+
return [token.form for token in kiwi.tokenize(text)]
|
18 |
+
# embedding_model = SentenceTransformerEmbeddings(model_name='BM-K/KoSimCSE-roberta-multitask', model_kwargs={"trust_remote_code":True})
|
19 |
+
|
20 |
+
def retriever(pc, bm25):
|
21 |
+
pcretriever = pc.as_retriever(search_kwargs={'k':4})
|
22 |
+
kiwi_bm25 = BM25Retriever.from_documents(bm25,preprocess_func=kiwi_tokenize)
|
23 |
+
kiwi_bm25.k=4
|
24 |
+
|
25 |
+
kiwibm25_pc_37 = EnsembleRetriever(
|
26 |
+
retrievers=[kiwi_bm25, pcretriever], # ์ฌ์ฉํ ๊ฒ์ ๋ชจ๋ธ์ ๋ฆฌ์คํธ
|
27 |
+
weights=[0.3, 0.7], # ๊ฐ ๊ฒ์ ๋ชจ๋ธ์ ๊ฒฐ๊ณผ์ ์ ์ฉํ ๊ฐ์ค์น
|
28 |
+
search_type="mmr", # ๊ฒ์ ๊ฒฐ๊ณผ์ ๋ค์์ฑ์ ์ฆ์ง์ํค๋ MMR ๋ฐฉ์์ ์ฌ์ฉ
|
29 |
+
)
|
30 |
+
# Pinecone vector store ์ด๊ธฐํ
|
31 |
+
# vectorstore = PineconeVectorStore(
|
32 |
+
# index_name=os.getenv("INDEX_NAME"), embedding=embedding_model
|
33 |
+
# )
|
34 |
+
|
35 |
+
# retriever = vectorstore.as_retriever(search_kwargs={'k': 2})
|
36 |
+
|
37 |
+
return kiwibm25_pc_37
|
38 |
+
|