Spaces:

sabazo
/

docs-qachat-demo

Sleeping

App Files Files Community

Asaad Almutareb commited on Nov 30, 2023

Commit

93457a9

1 Parent(s): d4bbccc

added caching with sqlite

Browse files

added different retrievers

Files changed (2) hide show

.gitignore +2 -1
app.py +84 -45

.gitignore CHANGED Viewed

@@ -165,4 +165,5 @@ cython_debug/
 *.pickle
 chroma_db/*
 bin
-obj

 *.pickle
 chroma_db/*
 bin
+obj
+.langchain.sqlite

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 # gradio
 import gradio as gr
 #import random
-#import time
 #boto3 for S3 access
 import boto3
 from botocore import UNSIGNED
@@ -15,25 +15,36 @@ from langchain.llms import HuggingFaceHub
 from langchain.embeddings import HuggingFaceHubEmbeddings
 # vectorestore
 from langchain.vectorstores import Chroma
-from langchain.vectorstores import FAISS
 # retrieval chain
-from langchain.chains import RetrievalQA
 from langchain.chains import RetrievalQAWithSourcesChain
 # prompt template
 from langchain.prompts import PromptTemplate
 from langchain.memory import ConversationBufferMemory
 # logging
 import logging
-import zipfile
-#contextual retriever
-from langchain.retrievers import ContextualCompressionRetriever
-from langchain.retrievers.document_compressors import LLMChainExtractor
-from langchain.retrievers.document_compressors import EmbeddingsFilter
-from langchain.retrievers.multi_query import MultiQueryRetriever
-# streaming
-#from threading import Thread
-#from transformers import TextIteratorStreamer
 # load .env variables
 config = load_dotenv(".env")
@@ -43,22 +54,34 @@ AWS_S3_FILE=os.getenv('AWS_S3_FILE')
 VS_DESTINATION=os.getenv('VS_DESTINATION')
 # initialize Model config
-model_id = HuggingFaceHub(repo_id="HuggingFaceH4/zephyr-7b-beta", model_kwargs={
-    "temperature":0.1,
     "max_new_tokens":1024,
     "repetition_penalty":1.2,
-    "streaming": True,
-    "return_full_text":True
     })
-model_name = "sentence-transformers/multi-qa-mpnet-base-dot-v1"
 embeddings = HuggingFaceHubEmbeddings(repo_id=model_name)
 # retrieve vectorsrore
 s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED))
 ## Chroma DB
 s3.download_file(AWS_S3_LOCATION, AWS_S3_FILE, VS_DESTINATION)
 db = Chroma(persist_directory="./vectorstore", embedding_function=embeddings)
 db.get()
@@ -70,12 +93,18 @@ db.get()
 # FAISS_INDEX_PATH='./chroma_db/faiss_db_ray'
 # db = FAISS.load_local(FAISS_INDEX_PATH, embeddings)
-retriever = db.as_retriever(search_type = "mmr")#, search_kwargs={'k': 5, 'fetch_k': 25})
-compressor = LLMChainExtractor.from_llm(model_id)
-compression_retriever = ContextualCompressionRetriever(base_compressor=compressor, base_retriever=retriever)
-# embeddings_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.76)
-# compression_retriever = ContextualCompressionRetriever(base_compressor=embeddings_filter, base_retriever=retriever)
 global qa
 template = """
@@ -101,16 +130,18 @@ memory = ConversationBufferMemory(memory_key="history", input_key="question")
 # logging for the chain
 logging.basicConfig()
-logging.getLogger("langchain.chains").setLevel(logging.INFO)
-# qa = RetrievalQA.from_chain_type(llm=model_id, chain_type="stuff", retriever=compression_retriever, verbose=True, return_source_documents=True, chain_type_kwargs={
 #     "verbose": True,
 #     "memory": memory,
 #     "prompt": prompt
 # }
 #     )
-qa = RetrievalQAWithSourcesChain.from_chain_type(llm=model_id, retriever=compression_retriever, verbose=True, chain_type_kwargs={
     "verbose": True,
     "memory": memory,
     "prompt": prompt,
@@ -118,8 +149,11 @@ qa = RetrievalQAWithSourcesChain.from_chain_type(llm=model_id, retriever=compres
 }
     )
-def pretty_print_docs(docs):
-    print(f"\n{'-' * 100}\n".join([f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)]))
 def add_text(history, text):
     history = history + [(text, None)]
@@ -127,20 +161,18 @@ def add_text(history, text):
 def bot(history):
     response = infer(history[-1][0], history)
-    print(*response)
-    print(*memory)
-    sources = [doc.metadata.get("source") for doc in response['sources']]
     src_list = '\n'.join(sources)
     print_this = response['answer'] + "\n\n\n Sources: \n\n\n" + src_list
-    #sources = f"`Sources:`\n\n' + response['sources']"
-    #history[-1][1] = ""
-    #for character in response['result']: #print_this:
-    #    history[-1][1] += character
-    #    time.sleep(0.05)
-    #    yield history
-    history[-1][1] = response['answer']
-    return history #, sources
 def infer(question, history):
     query =  question
@@ -152,23 +184,30 @@ css="""
 """
 title = """
-<div style="text-align: center;max-width: 700px;">
     <h1>Chat with your Documentation</h1>
-    <p style="text-align: center;">Chat with Documentation, <br />
-    when everything is ready, you can start asking questions about the docu ;)</p>
 </div>
 """
 with gr.Blocks(css=css) as demo:
-    with gr.Column(elem_id="col-container"):
         gr.HTML(title)
         chatbot = gr.Chatbot([], elem_id="chatbot")
-        clear = gr.Button("Clear")
         with gr.Row():
             question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter ")
     question.submit(add_text, [chatbot, question], [chatbot, question], queue=False).then(
         bot, chatbot, chatbot
     )
-    clear.click(lambda: None, None, chatbot, queue=False)
 demo.queue().launch()

 # gradio
 import gradio as gr
 #import random
+import time
 #boto3 for S3 access
 import boto3
 from botocore import UNSIGNED
 from langchain.embeddings import HuggingFaceHubEmbeddings
 # vectorestore
 from langchain.vectorstores import Chroma
+#from langchain.vectorstores import FAISS
 # retrieval chain
+#from langchain.chains import RetrievalQA
 from langchain.chains import RetrievalQAWithSourcesChain
 # prompt template
 from langchain.prompts import PromptTemplate
 from langchain.memory import ConversationBufferMemory
 # logging
 import logging
+#import zipfile
+# improve results with retriever
+# from langchain.retrievers import ContextualCompressionRetriever
+# from langchain.retrievers.document_compressors import LLMChainExtractor
+# from langchain.retrievers.document_compressors import EmbeddingsFilter
+# from langchain.retrievers.multi_query import MultiQueryRetriever
+from langchain.retrievers import BM25Retriever, EnsembleRetriever
+# reorder retrived documents
+#from langchain.document_transformers import LongContextReorder
+# github issues
+from langchain.document_loaders import GitHubIssuesLoader
+# debugging
+from langchain.globals import set_verbose
+# caching
+from langchain.globals import set_llm_cache
+#from langchain.cache import InMemoryCache
+# We can do the same thing with a SQLite cache
+from langchain.cache import SQLiteCache
+#set_llm_cache(InMemoryCache())
+set_verbose(True)
 # load .env variables
 config = load_dotenv(".env")
 VS_DESTINATION=os.getenv('VS_DESTINATION')
 # initialize Model config
+model_id = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.1", model_kwargs={
+    # "temperature":0.1,
     "max_new_tokens":1024,
     "repetition_penalty":1.2,
+#    "streaming": True,
+#    "return_full_text":True
     })
+#model_name = "sentence-transformers/multi-qa-mpnet-base-dot-v1"
+model_name = "sentence-transformers/all-mpnet-base-v2"
 embeddings = HuggingFaceHubEmbeddings(repo_id=model_name)
+# remove old vectorstore
+if os.path.exists(VS_DESTINATION):
+    os.remove(VS_DESTINATION)
+# remove old sqlite cache
+if os.path.exists('.langchain.sqlite'):
+    os.remove('.langchain.sqlite')
+set_llm_cache(SQLiteCache(database_path=".langchain.sqlite"))
 # retrieve vectorsrore
 s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED))
 ## Chroma DB
 s3.download_file(AWS_S3_LOCATION, AWS_S3_FILE, VS_DESTINATION)
+# use the cached embeddings instead of embeddings to speed up re-retrival
 db = Chroma(persist_directory="./vectorstore", embedding_function=embeddings)
 db.get()
 # FAISS_INDEX_PATH='./chroma_db/faiss_db_ray'
 # db = FAISS.load_local(FAISS_INDEX_PATH, embeddings)
+# initialize the bm25 retriever and chroma/faiss retriever
+bm25_retriever = BM25Retriever.
+bm25_retriever.k = 2
+retriever = db.as_retriever(search_type="mmr")#, search_kwargs={'k': 3, 'lambda_mult': 0.25})
+# asks LLM to create 3 alternatives baed on user query
+# multi_retriever = MultiQueryRetriever.from_llm(retriever=retriever, llm=model_id)
+# asks LLM to extract relevant parts from retrieved documents
+# compressor = LLMChainExtractor.from_llm(model_id)
+# compression_retriever = ContextualCompressionRetriever(base_compressor=compressor, base_retriever=multi_retriever)
 global qa
 template = """
 # logging for the chain
 logging.basicConfig()
+logging.getLogger("langchain.retrievers").setLevel(logging.INFO)
+logging.getLogger("langchain.chains.qa_with_sources").setLevel(logging.INFO)
+# qa = RetrievalQA.from_chain_type(llm=model_id, retriever=retriever, return_source_documents=True, verbose=True, chain_type_kwargs={
 #     "verbose": True,
 #     "memory": memory,
 #     "prompt": prompt
 # }
 #     )
+qa = RetrievalQAWithSourcesChain.from_chain_type(llm=model_id, retriever=retriever, return_source_documents=True, verbose=True, chain_type_kwargs={
     "verbose": True,
     "memory": memory,
     "prompt": prompt,
 }
     )
+#####
+#
+# Gradio fns
+####
 def add_text(history, text):
     history = history + [(text, None)]
 def bot(history):
     response = infer(history[-1][0], history)
+    sources = [doc.metadata.get("source") for doc in response['source_documents']]
     src_list = '\n'.join(sources)
     print_this = response['answer'] + "\n\n\n Sources: \n\n\n" + src_list
+    # history[-1][1] = ""
+    # for character in response['answer']:
+    #     #print_this:
+    #     history[-1][1] += character
+    #     time.sleep(0.01)
+    #     yield history
+    history[-1][1] = print_this #response['answer']
+    return history
 def infer(question, history):
     query =  question
 """
 title = """
+<div style="text-align: center;max-width: 1920px;">
     <h1>Chat with your Documentation</h1>
+    <p style="text-align: center;">This is a privately hosten Docs AI Buddy, <br />
+    It will help you with any question regarding the documentation of Ray ;)</p>
 </div>
 """
 with gr.Blocks(css=css) as demo:
+    with gr.Column(min_width=900, elem_id="col-container"):
         gr.HTML(title)
         chatbot = gr.Chatbot([], elem_id="chatbot")
+        #with gr.Row():
+        #    clear = gr.Button("Clear")
         with gr.Row():
             question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter ")
+        with gr.Row():
+            clear = gr.ClearButton([chatbot, question])
     question.submit(add_text, [chatbot, question], [chatbot, question], queue=False).then(
         bot, chatbot, chatbot
     )
+    #clear.click(lambda: None, None, chatbot, queue=False)
 demo.queue().launch()