Spaces:
Sleeping
Sleeping
from langchain import PromptTemplate | |
#from langchain_core.prompts import PromptTemplate | |
from langchain_community.embeddings import HuggingFaceBgeEmbeddings | |
from langchain_community.vectorstores import FAISS | |
from langchain_community.llms.ctransformers import CTransformers | |
#from langchain.chains import RetrievalQA | |
from langchain.chains.retrieval_qa.base import RetrievalQA | |
import chainlit as cl | |
from transformers import AutoModel | |
DB_FAISS_PATH = 'vectorstores/' | |
custom_prompt_template = ''' | |
use the following pieces of information to answer the user's questions. | |
If you don't know the answer, please just say that don't know the answer, don't try to make uo an answer. | |
Context : {} | |
Question : {question} | |
only return the helpful answer below and nothing else. | |
''' | |
def set_custom_prompt(): | |
""" | |
Prompt template for QA retrieval for vector stores | |
""" | |
prompt = PromptTemplate(template = custom_prompt_template, | |
input_variables = ['context','question']) | |
return prompt | |
def load_llm(): | |
llm = CTransformers( | |
model = 'TheBloke/Llama-2-7B-Chat-GGML', | |
#model = AutoModel.from_pretrained("TheBloke/Llama-2-7B-Chat-GGML"), | |
model_type = 'llama', | |
max_new_token = 512, | |
temperature = 0.5 | |
) | |
return llm | |
def retrieval_qa_chain(llm,prompt,db): | |
qa_chain = RetrievalQA.from_chain_type( | |
llm = llm, | |
chain_type = 'stuff', | |
retriever = db.as_retriever(search_kwargs= {'k': 2}), | |
return_source_documents = True, | |
chain_type_kwargs = {'prompt': prompt} | |
) | |
return qa_chain | |
def qa_bot(): | |
embeddings = HuggingFaceBgeEmbeddings(model_name = 'sentence-transformers/all-MiniLM-L6-v2', | |
model_kwargs = {'device':'cpu'}) | |
db = FAISS.load_local(DB_FAISS_PATH,embeddings) | |
llm = load_llm() | |
qa_prompt = set_custom_prompt() | |
qa = retrieval_qa_chain(llm,qa_prompt, db) | |
return qa | |
def final_result(query): | |
qa_result = qa_bot() | |
response = qa_result({'query' : query}) | |
return response | |
## Chainlit | |
async def start(): | |
chain = qa_bot() | |
msg = cl.Message(content = 'Starting the bot...') | |
await msg.send() | |
msg.conteny = "Hi Welcome to the medical Bot. What is your query?" | |
await msg.update() | |
cl.user_session.set('chain', chain) | |
async def main(message): | |
chain = cl.user_session.set('chain') | |
cb = cl.AsyncLangchainCallbackHandler( | |
stream_final_answer= True, | |
answer_prefix_tokens= ['FINAL','ANSWER'] | |
) | |
cb.answer_reached = True | |
res = await chain.acall(message,callbacks = [cb]) | |
answer = res['result'] | |
sources = res['sources_documents'] | |
if sources : | |
answer += f"\nSources :" + str(sources) | |
else : | |
answer += f"\nNo Rources Found" | |
await cl.Message(content=answer).send() | |