mintaeng commited on
Commit
f233426
โ€ข
1 Parent(s): 1f6cd04

Upload 5 files

Browse files
Files changed (5) hide show
  1. make_answer.py +3 -0
  2. make_chain_gguh.py +32 -0
  3. make_chain_model.py +32 -0
  4. retrieve_docs.py +5 -0
  5. retriever.py +38 -0
make_answer.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ def result(rag_chain,question):
2
+ response= rag_chain.invoke(question)
3
+ print(f"[ํ’‹ํ’‹์ด์˜ ๋‹ต๋ณ€]\n{response}")
make_chain_gguh.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.runnables import RunnablePassthrough
2
+ from langchain_core.output_parsers import StrOutputParser
3
+ from langchain_community.chat_models import ChatOllama
4
+ from langchain_core.prompts import ChatPromptTemplate
5
+ from langchain_pinecone import PineconeVectorStore
6
+ from langchain_community.embeddings import SentenceTransformerEmbeddings
7
+
8
+ def make_chain_ollama(retriever):
9
+ def format_docs(docs):
10
+ # ๊ฒ€์ƒ‰ํ•œ ๋ฌธ์„œ ๊ฒฐ๊ณผ๋ฅผ ํ•˜๋‚˜์˜ ๋ฌธ๋‹จ์œผ๋กœ ํ•ฉ์ณ์ค๋‹ˆ๋‹ค.
11
+ return "\n\n".join(doc.page_content for doc in docs)
12
+
13
+ # LangChain์ด ์ง€์›ํ•˜๋Š” ๋‹ค๋ฅธ ์ฑ„ํŒ… ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค. ์—ฌ๊ธฐ์„œ๋Š” Ollama๋ฅผ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.
14
+ llm = ChatOllama(model="zephyr:latest")
15
+
16
+ template = "\"```\" Below is an instruction that describes a task. Write a response that appropriately completes the request."\
17
+ "์ œ์‹œํ•˜๋Š” context์—์„œ๋งŒ ๋Œ€๋‹ตํ•˜๊ณ  context์— ์—†๋Š” ๋‚ด์šฉ์€ ๋ชจ๋ฅด๊ฒ ๋‹ค๊ณ  ๋Œ€๋‹ตํ•ด"\
18
+ "make answer in korean. ํ•œ๊ตญ์–ด๋กœ ๋Œ€๋‹ตํ•˜์„ธ์š”"\
19
+ "\n\nContext:\n{context}\n;"\
20
+ "Question: {question}"\
21
+ "\n\nAnswer:"
22
+
23
+ prompt = ChatPromptTemplate.from_template(template)
24
+
25
+ rag_chain = (
26
+ {"context": retriever| format_docs, "question": RunnablePassthrough()}
27
+ | prompt
28
+ | llm
29
+ | StrOutputParser()
30
+ )
31
+
32
+ return rag_chain
make_chain_model.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.runnables import RunnablePassthrough
2
+ from langchain_core.output_parsers import StrOutputParser
3
+ from langchain_community.chat_models import ChatOllama
4
+ from langchain_core.prompts import ChatPromptTemplate
5
+ from langchain_pinecone import PineconeVectorStore
6
+ from langchain_community.embeddings import SentenceTransformerEmbeddings
7
+
8
+ def make_chain_llm(retriever,llm):
9
+ def format_docs(docs):
10
+ # ๊ฒ€์ƒ‰ํ•œ ๋ฌธ์„œ ๊ฒฐ๊ณผ๋ฅผ ํ•˜๋‚˜์˜ ๋ฌธ๋‹จ์œผ๋กœ ํ•ฉ์ณ์ค๋‹ˆ๋‹ค.
11
+ return "\n\n".join(doc.page_content for doc in docs)
12
+
13
+ # LangChain์ด ์ง€์›ํ•˜๋Š” ๋‹ค๋ฅธ ์ฑ„ํŒ… ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค. ์—ฌ๊ธฐ์„œ๋Š” Ollama๋ฅผ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.
14
+ # llm = ChatOllama(model="zephyr:latest")
15
+
16
+ template = "\"```\" Below is an instruction that describes a task. Write a response that appropriately completes the request."\
17
+ "์ œ์‹œํ•˜๋Š” context์—์„œ๋งŒ ๋Œ€๋‹ตํ•˜๊ณ  context์— ์—†๋Š” ๋‚ด์šฉ์€ ์ƒ์„ฑํ•˜์ง€๋งˆ"\
18
+ "make answer in korean. ํ•œ๊ตญ์–ด๋กœ ๋Œ€๋‹ตํ•˜์„ธ์š”"\
19
+ "\n\nContext:\n{context}\n;"\
20
+ "Question: {question}"\
21
+ "\n\nAnswer:"
22
+
23
+ prompt = ChatPromptTemplate.from_template(template)
24
+
25
+ rag_chain = (
26
+ {"context": retriever| format_docs, "question": RunnablePassthrough()}
27
+ | prompt
28
+ | llm
29
+ | StrOutputParser()
30
+ )
31
+
32
+ return rag_chain
retrieve_docs.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ def print_search_results(retrievers, query):
2
+ print(f"Query: {query}")
3
+ for i in range(len(retrievers.invoke(query))):
4
+ print(f'์ฐพ์€ ๋ฌธ์žฅ{i+1}:',
5
+ retrievers.invoke(query)[i].page_content)
retriever.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.runnables import RunnablePassthrough
2
+ from langchain_core.output_parsers import StrOutputParser
3
+ from langchain_community.chat_models import ChatOllama
4
+ from langchain_core.prompts import ChatPromptTemplate
5
+ from langchain_pinecone import PineconeVectorStore
6
+ from langchain_community.embeddings import SentenceTransformerEmbeddings
7
+
8
+ import os
9
+ from dotenv import load_dotenv
10
+ from langchain.retrievers import BM25Retriever, EnsembleRetriever
11
+ from kiwipiepy import Kiwi
12
+ load_dotenv()
13
+
14
+ kiwi = Kiwi()
15
+
16
+ def kiwi_tokenize(text):
17
+ return [token.form for token in kiwi.tokenize(text)]
18
+ # embedding_model = SentenceTransformerEmbeddings(model_name='BM-K/KoSimCSE-roberta-multitask', model_kwargs={"trust_remote_code":True})
19
+
20
+ def retriever(pc, bm25):
21
+ pcretriever = pc.as_retriever(search_kwargs={'k':4})
22
+ kiwi_bm25 = BM25Retriever.from_documents(bm25,preprocess_func=kiwi_tokenize)
23
+ kiwi_bm25.k=4
24
+
25
+ kiwibm25_pc_37 = EnsembleRetriever(
26
+ retrievers=[kiwi_bm25, pcretriever], # ์‚ฌ์šฉํ•  ๊ฒ€์ƒ‰ ๋ชจ๋ธ์˜ ๋ฆฌ์ŠคํŠธ
27
+ weights=[0.3, 0.7], # ๊ฐ ๊ฒ€์ƒ‰ ๋ชจ๋ธ์˜ ๊ฒฐ๊ณผ์— ์ ์šฉํ•  ๊ฐ€์ค‘์น˜
28
+ search_type="mmr", # ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ์˜ ๋‹ค์–‘์„ฑ์„ ์ฆ์ง„์‹œํ‚ค๋Š” MMR ๋ฐฉ์‹์„ ์‚ฌ์šฉ
29
+ )
30
+ # Pinecone vector store ์ดˆ๊ธฐํ™”
31
+ # vectorstore = PineconeVectorStore(
32
+ # index_name=os.getenv("INDEX_NAME"), embedding=embedding_model
33
+ # )
34
+
35
+ # retriever = vectorstore.as_retriever(search_kwargs={'k': 2})
36
+
37
+ return kiwibm25_pc_37
38
+