Spaces:

SoumyaJ
/

PdfQnAUsingPinecone

Sleeping

SoumyaJ commited on Feb 26

Commit

8451d71

verified ·

1 Parent(s): 7e780fe

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -35,7 +35,7 @@ app.add_middleware(
 UPLOAD_DIR = "uploads"
 os.makedirs(UPLOAD_DIR, exist_ok=True)
-persist_directory = "./chroma_db"
 load_dotenv()
 os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")
@@ -64,7 +64,8 @@ def generate_file_id(file_path):
 def delete_existing_embedding(file_id):
     if os.path.exists(persist_directory):
-        shutil.rmtree(persist_directory)
 def tempUploadFile(filePath,file):
     with open(filePath,'wb') as buffer:
@@ -111,7 +112,7 @@ def loadAndSplitPdfFile(filePath):
 def prepare_retriever(filePath = "", load_from_chromadb = False):
     if load_from_chromadb:
-        vector_store = Chroma(persist_directory=persist_directory, embedding_function = embeddings)
         return vector_store.as_retriever(search_kwargs={"k": 5})
     elif filePath:
         doc_chunks = loadAndSplitPdfFile(filePath)
@@ -125,7 +126,7 @@ def prepare_retriever(filePath = "", load_from_chromadb = False):
                     if isinstance(value, (str, int, float, bool, Path))
                 }
-        vector_store = Chroma.from_documents(documents= doc_chunks, persist_directory=persist_directory, embedding= embeddings)
         vector_store.persist()
 def get_retriever_chain(retriever):

 UPLOAD_DIR = "uploads"
 os.makedirs(UPLOAD_DIR, exist_ok=True)
+persist_directory = "/home/user/.cache/chroma_db"
 load_dotenv()
 os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")
 def delete_existing_embedding(file_id):
     if os.path.exists(persist_directory):
+        shutil.rmtree(persist_directory, ignore_errors = True)
+        os.makedirs(persist_directory, exist_ok=True)
 def tempUploadFile(filePath,file):
     with open(filePath,'wb') as buffer:
 def prepare_retriever(filePath = "", load_from_chromadb = False):
     if load_from_chromadb:
+        vector_store = Chroma(persist_directory=persist_directory, embedding_function = embeddings, client_settings={"allow_reset": True})
         return vector_store.as_retriever(search_kwargs={"k": 5})
     elif filePath:
         doc_chunks = loadAndSplitPdfFile(filePath)
                     if isinstance(value, (str, int, float, bool, Path))
                 }
+        vector_store = Chroma.from_documents(documents= doc_chunks, persist_directory=persist_directory, embedding= embeddings, read)
         vector_store.persist()
 def get_retriever_chain(retriever):