Spaces:
Runtime error
Runtime error
Upload 3 files
Browse files- data_ingester.py +17 -5
- data_query.py +1 -13
data_ingester.py
CHANGED
@@ -20,10 +20,11 @@ class ChatbotDataIngester:
|
|
20 |
self.loader = ChatbotDataLoader()
|
21 |
self.vector_store = vector_store
|
22 |
self.embeddings = embeddings
|
23 |
-
self.text_splitter =
|
24 |
-
separator=["\n\n", "\n", '.'],
|
25 |
chunk_size=1000,
|
26 |
-
chunk_overlap=200,
|
|
|
|
|
27 |
|
28 |
def embed_content(self, content):
|
29 |
"""
|
@@ -51,6 +52,8 @@ class ChatbotDataIngester:
|
|
51 |
for file_path, content in file_contents.items()
|
52 |
]
|
53 |
|
|
|
|
|
54 |
split_docs = self.text_splitter.split_documents(documents)
|
55 |
|
56 |
# Generate UUIDs for documents
|
@@ -66,8 +69,17 @@ class ChatbotDataIngester:
|
|
66 |
Clear all documents in the vector store.
|
67 |
"""
|
68 |
try:
|
69 |
-
self.vector_store.
|
70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
except Exception as e:
|
72 |
print(f"Failed to clear the vector store: {str(e)}")
|
73 |
|
|
|
20 |
self.loader = ChatbotDataLoader()
|
21 |
self.vector_store = vector_store
|
22 |
self.embeddings = embeddings
|
23 |
+
self.text_splitter = RecursiveCharacterTextSplitter(
|
|
|
24 |
chunk_size=1000,
|
25 |
+
chunk_overlap=200,
|
26 |
+
length_function=len,
|
27 |
+
)
|
28 |
|
29 |
def embed_content(self, content):
|
30 |
"""
|
|
|
52 |
for file_path, content in file_contents.items()
|
53 |
]
|
54 |
|
55 |
+
print(f'{len(documents)} documents loaded from the database')
|
56 |
+
|
57 |
split_docs = self.text_splitter.split_documents(documents)
|
58 |
|
59 |
# Generate UUIDs for documents
|
|
|
69 |
Clear all documents in the vector store.
|
70 |
"""
|
71 |
try:
|
72 |
+
current_index = self.vector_store.get_pinecone_index('test')
|
73 |
+
check = False
|
74 |
+
for ids in current_index.list(namespace='default'):
|
75 |
+
check = True
|
76 |
+
break
|
77 |
+
if not check:
|
78 |
+
print("The vector store is already empty.")
|
79 |
+
return
|
80 |
+
else:
|
81 |
+
self.vector_store.delete(delete_all=True)
|
82 |
+
print("Cleared the vector store.")
|
83 |
except Exception as e:
|
84 |
print(f"Failed to clear the vector store: {str(e)}")
|
85 |
|
data_query.py
CHANGED
@@ -20,19 +20,7 @@ class ChatbotDataQuery:
|
|
20 |
def __init__(self, vector_store):
|
21 |
self.llm = ChatOpenAI(model="gpt-4o", api_key=os.getenv("OPENAI_API_KEY"))
|
22 |
|
23 |
-
self.system_prompt = '''You are Wagner, a highly intelligent and friendly AI assistant.
|
24 |
-
For example:
|
25 |
-
Input: Who are you?
|
26 |
-
Answer: I am Wagner, a highly intelligent and friendly AI assistant. I am an assistant who helps answer queries.
|
27 |
-
|
28 |
-
Input: What is your name?
|
29 |
-
Answer: My name is Wagner.
|
30 |
-
|
31 |
-
Input: How old are you?
|
32 |
-
Answer: Sorry, I don't have an age as I am an AI assistant.
|
33 |
-
|
34 |
-
Input: What is my name?
|
35 |
-
Answer: My name is Wagner.'''
|
36 |
|
37 |
if vector_store is None:
|
38 |
raise ValueError("Vector store cannot be None")
|
|
|
20 |
def __init__(self, vector_store):
|
21 |
self.llm = ChatOpenAI(model="gpt-4o", api_key=os.getenv("OPENAI_API_KEY"))
|
22 |
|
23 |
+
self.system_prompt = '''You are Wagner, a highly intelligent and friendly AI assistant. More details on you are in a separate file: \'Who-is-Wagner-Chatbot-Response.docx\'.\n'''
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
if vector_store is None:
|
26 |
raise ValueError("Vector store cannot be None")
|