rasyosef commited on
Commit
f7e770a
β€’
1 Parent(s): 39012bc

used multi-qa-MiniLM-L6-cos-v1 model and cosine distance strategy in vectorstore retriever

Browse files
Files changed (1) hide show
  1. app.py +7 -2
app.py CHANGED
@@ -3,6 +3,7 @@ import gradio as gr
3
  from langchain.text_splitter import CharacterTextSplitter
4
  from langchain_community.document_loaders import UnstructuredFileLoader
5
  from langchain.vectorstores.faiss import FAISS
 
6
  from langchain_community.embeddings import HuggingFaceEmbeddings
7
 
8
  from langchain.chains import RetrievalQA
@@ -55,8 +56,12 @@ def prepare_vector_store_retriever(filename):
55
  documents = text_splitter.split_documents(raw_documents)
56
 
57
  # Creating a vectorstore
58
- embeddings = HuggingFaceEmbeddings()
59
- vectorstore = FAISS.from_documents(documents, embeddings)
 
 
 
 
60
 
61
  return VectorStoreRetriever(vectorstore=vectorstore, search_kwargs={"k": 2})
62
 
 
3
  from langchain.text_splitter import CharacterTextSplitter
4
  from langchain_community.document_loaders import UnstructuredFileLoader
5
  from langchain.vectorstores.faiss import FAISS
6
+ from langchain.vectorstores.utils import DistanceStrategy
7
  from langchain_community.embeddings import HuggingFaceEmbeddings
8
 
9
  from langchain.chains import RetrievalQA
 
56
  documents = text_splitter.split_documents(raw_documents)
57
 
58
  # Creating a vectorstore
59
+ embeddings = HuggingFaceEmbeddings(
60
+ model_name="sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
61
+ model_kwargs={'device': 'cpu'},
62
+ encode_kwargs={'normalize_embeddings': False}
63
+ )
64
+ vectorstore = FAISS.from_documents(documents, embeddings, distance_strategy=DistanceStrategy.COSINE)
65
 
66
  return VectorStoreRetriever(vectorstore=vectorstore, search_kwargs={"k": 2})
67