mistral-PDF-chat_B5_A

Sleeping

ali121300 commited on May 19

Commit

c6d68c0

•

1 Parent(s): 2f4d027

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -48,7 +48,7 @@ def get_text_chunks(text:str) ->list:
 def get_vectorstore(text_chunks : list) -> FAISS:
     #model = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
-    model="paraphrase-distilroberta-base-v1"
     encode_kwargs = {
         "normalize_embeddings": True
     }  # set True to compute cosine similarity
@@ -59,24 +59,18 @@ def get_vectorstore(text_chunks : list) -> FAISS:
     return vectorstore
-def get_conversation_chain(vectorstore):
-    n_gpu_layers = 40 # Change this value based on your model and your GPU VRAM pool.
-    n_batch = 512 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
-    n_ctx=2048
-    callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
-    # Make sure the model path is correct for your system
-    llm = LlamaCpp(
-    model_path="mostafaamiri/persian-llama-7b-GGUF-Q4",
-    n_gpu_layers=n_gpu_layers, n_batch=n_batch,
-    callback_manager=callback_manager,
-    verbose=True,
-    n_ctx=n_ctx)
     memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
     conversation_chain = ConversationalRetrievalChain.from_llm(
-        llm=llm,
-        retriever=vectorstore.as_retriever(),
-        memory=memory,
-        # retriever_kwargs={"k": 1},
     )
     return conversation_chain

 def get_vectorstore(text_chunks : list) -> FAISS:
     #model = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
+    model="intfloat/multilingual-e5-large"
     encode_kwargs = {
         "normalize_embeddings": True
     }  # set True to compute cosine similarity
     return vectorstore
+def get_conversation_chain(vectorstore:FAISS) -> ConversationalRetrievalChain:
+    # llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613")
+    llm = HuggingFaceHub(
+        #repo_id="mistralai/Mistral-7B-Instruct-v0.2",
+        repo_id="google/gemma-1.1-7b-it",
+        #repo_id="TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF"
+        model_kwargs={"temperature": 0.5, "max_length": 2048},
+    )
     memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
     conversation_chain = ConversationalRetrievalChain.from_llm(
+        llm=llm, retriever=vectorstore.as_retriever(), memory=memory
     )
     return conversation_chain