SalehAhmad commited on
Commit
939d2ad
·
verified ·
1 Parent(s): 81b62bf

Upload 3 files

Browse files
Files changed (2) hide show
  1. data_ingester.py +17 -5
  2. data_query.py +1 -13
data_ingester.py CHANGED
@@ -20,10 +20,11 @@ class ChatbotDataIngester:
20
  self.loader = ChatbotDataLoader()
21
  self.vector_store = vector_store
22
  self.embeddings = embeddings
23
- self.text_splitter = SpacyTextSplitter(
24
- separator=["\n\n", "\n", '.'],
25
  chunk_size=1000,
26
- chunk_overlap=200,)
 
 
27
 
28
  def embed_content(self, content):
29
  """
@@ -51,6 +52,8 @@ class ChatbotDataIngester:
51
  for file_path, content in file_contents.items()
52
  ]
53
 
 
 
54
  split_docs = self.text_splitter.split_documents(documents)
55
 
56
  # Generate UUIDs for documents
@@ -66,8 +69,17 @@ class ChatbotDataIngester:
66
  Clear all documents in the vector store.
67
  """
68
  try:
69
- self.vector_store.delete(delete_all=True)
70
- print("Cleared the vector store.")
 
 
 
 
 
 
 
 
 
71
  except Exception as e:
72
  print(f"Failed to clear the vector store: {str(e)}")
73
 
 
20
  self.loader = ChatbotDataLoader()
21
  self.vector_store = vector_store
22
  self.embeddings = embeddings
23
+ self.text_splitter = RecursiveCharacterTextSplitter(
 
24
  chunk_size=1000,
25
+ chunk_overlap=200,
26
+ length_function=len,
27
+ )
28
 
29
  def embed_content(self, content):
30
  """
 
52
  for file_path, content in file_contents.items()
53
  ]
54
 
55
+ print(f'{len(documents)} documents loaded from the database')
56
+
57
  split_docs = self.text_splitter.split_documents(documents)
58
 
59
  # Generate UUIDs for documents
 
69
  Clear all documents in the vector store.
70
  """
71
  try:
72
+ current_index = self.vector_store.get_pinecone_index('test')
73
+ check = False
74
+ for ids in current_index.list(namespace='default'):
75
+ check = True
76
+ break
77
+ if not check:
78
+ print("The vector store is already empty.")
79
+ return
80
+ else:
81
+ self.vector_store.delete(delete_all=True)
82
+ print("Cleared the vector store.")
83
  except Exception as e:
84
  print(f"Failed to clear the vector store: {str(e)}")
85
 
data_query.py CHANGED
@@ -20,19 +20,7 @@ class ChatbotDataQuery:
20
  def __init__(self, vector_store):
21
  self.llm = ChatOpenAI(model="gpt-4o", api_key=os.getenv("OPENAI_API_KEY"))
22
 
23
- self.system_prompt = '''You are Wagner, a highly intelligent and friendly AI assistant. You are an assistant who helps answer queries about Daniel Ringel. WHen asked about you, simply asnwer about yourself and nothing else.
24
- For example:
25
- Input: Who are you?
26
- Answer: I am Wagner, a highly intelligent and friendly AI assistant. I am an assistant who helps answer queries.
27
-
28
- Input: What is your name?
29
- Answer: My name is Wagner.
30
-
31
- Input: How old are you?
32
- Answer: Sorry, I don't have an age as I am an AI assistant.
33
-
34
- Input: What is my name?
35
- Answer: My name is Wagner.'''
36
 
37
  if vector_store is None:
38
  raise ValueError("Vector store cannot be None")
 
20
  def __init__(self, vector_store):
21
  self.llm = ChatOpenAI(model="gpt-4o", api_key=os.getenv("OPENAI_API_KEY"))
22
 
23
+ self.system_prompt = '''You are Wagner, a highly intelligent and friendly AI assistant. More details on you are in a separate file: \'Who-is-Wagner-Chatbot-Response.docx\'.\n'''
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  if vector_store is None:
26
  raise ValueError("Vector store cannot be None")