VenkyPas commited on
Commit
51ca168
·
1 Parent(s): f546f2e

Fixing vector store creation process

Browse files
Files changed (1) hide show
  1. app.py +8 -7
app.py CHANGED
@@ -52,20 +52,21 @@ hf_embeddings = HuggingFaceEndpointEmbeddings(
52
  task="feature-extraction",
53
  huggingfacehub_api_token=HF_TOKEN,
54
  )
 
 
 
55
 
56
- VECTOR_STORE_LOCATION = "./data/vectorstore"
57
-
58
- if os.path.exists(VECTOR_STORE_LOCATION):
59
  vectorstore = FAISS.load_local(
60
- VECTOR_STORE_LOCATION,
61
  hf_embeddings,
62
  allow_dangerous_deserialization=True # this is necessary to load the vectorstore from disk as it's stored as a `.pkl` file.
63
  )
64
  hf_retriever = vectorstore.as_retriever()
65
- print("Loaded Vectorstore at " + VECTOR_STORE_LOCATION)
66
  else:
67
  print("Indexing Files")
68
- os.makedirs(VECTOR_STORE_LOCATION, exist_ok=True)
69
  ### 4. INDEX FILES
70
  ### NOTE: REMEMBER TO BATCH THE DOCUMENTS WITH MAXIMUM BATCH SIZE = 32
71
  for i in range(0, len(split_documents), 32):
@@ -73,7 +74,7 @@ else:
73
  vectorstore = FAISS.from_documents(split_documents[i:i+32], hf_embeddings)
74
  continue
75
  vectorstore.add_documents(split_documents[i:i+32])
76
- vectorstore.save_local(VECTOR_STORE_LOCATION)
77
 
78
  hf_retriever = vectorstore.as_retriever()
79
 
 
52
  task="feature-extraction",
53
  huggingfacehub_api_token=HF_TOKEN,
54
  )
55
+ DATA_DIR = "./data"
56
+ VECTOR_STORE_DIR = os.path.join(DATA_DIR, "vectorstore")
57
+ VECTOR_STORE_PATH = os.path.join(VECTOR_STORE_DIR, "index.faiss")
58
 
59
+ if os.path.exists(VECTOR_STORE_PATH):
 
 
60
  vectorstore = FAISS.load_local(
61
+ VECTOR_STORE_PATH,
62
  hf_embeddings,
63
  allow_dangerous_deserialization=True # this is necessary to load the vectorstore from disk as it's stored as a `.pkl` file.
64
  )
65
  hf_retriever = vectorstore.as_retriever()
66
+ print("Loaded Vectorstore at " + VECTOR_STORE_DIR)
67
  else:
68
  print("Indexing Files")
69
+ os.makedirs(VECTOR_STORE_DIR, exist_ok=True)
70
  ### 4. INDEX FILES
71
  ### NOTE: REMEMBER TO BATCH THE DOCUMENTS WITH MAXIMUM BATCH SIZE = 32
72
  for i in range(0, len(split_documents), 32):
 
74
  vectorstore = FAISS.from_documents(split_documents[i:i+32], hf_embeddings)
75
  continue
76
  vectorstore.add_documents(split_documents[i:i+32])
77
+ vectorstore.save_local(VECTOR_STORE_DIR)
78
 
79
  hf_retriever = vectorstore.as_retriever()
80