rchrdgwr commited on
Commit
cbe98ad
1 Parent(s): 34efc41

FEAT: added qdrant

Browse files
Files changed (2) hide show
  1. aimakerspace/vectordatabase.py +69 -0
  2. app.py +29 -7
aimakerspace/vectordatabase.py CHANGED
@@ -52,8 +52,77 @@ class VectorDatabase:
52
  for text, embedding in zip(list_of_text, embeddings):
53
  self.insert(text, np.array(embedding))
54
  return self
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
 
 
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  if __name__ == "__main__":
58
  list_of_text = [
59
  "I like to eat broccoli and bananas.",
 
52
  for text, embedding in zip(list_of_text, embeddings):
53
  self.insert(text, np.array(embedding))
54
  return self
55
+ import hashlib
56
+ from qdrant_client import QdrantClient
57
+ from qdrant_client.http.models import PointStruct
58
+ class QdrantDatabase:
59
+ def __init__(self, qdrant_client: QdrantClient, collection_name: str, embedding_model=None):
60
+ self.qdrant_client = qdrant_client
61
+ self.collection_name = collection_name
62
+ self.embedding_model = embedding_model or EmbeddingModel()
63
+ self.vectors = defaultdict(np.array) # Still keeps a local copy if needed
64
+
65
+ def string_to_int_id(self, s: str) -> int:
66
+ return int(hashlib.sha256(s.encode('utf-8')).hexdigest(), 16) % (10**8)
67
+
68
+ def insert(self, key: str, vector: np.array) -> None:
69
+
70
+ point_id = self.string_to_int_id(key)
71
+ # Insert vector into Qdrant
72
+ payload = {"text": key} # Storing the key (text) as payload
73
+ point = PointStruct(
74
+ id=point_id,
75
+ vector={"default": vector.tolist()}, # Use the vector name defined in the collection
76
+ payload=payload
77
+ )
78
+
79
+ # Insert the vector into Qdrant with the associated document
80
+ self.qdrant_client.upsert(
81
+ collection_name=self.collection_name,
82
+ points=[point] # Qdrant expects a list of PointStruct
83
+ )
84
+
85
+ def search(
86
+ self,
87
+ query_vector: np.array,
88
+ k: int,
89
+ distance_measure: Callable = None,
90
+ ) -> List[Tuple[str, float]]:
91
+ # Perform search in Qdrant
92
+ print(query_vector)
93
+ if isinstance(query_vector, list):
94
+ query_vector = np.array(query_vector)
95
+
96
+ search_results = self.qdrant_client.search(
97
+ collection_name=self.collection_name,
98
+ query_vector={"name": "default", "vector": query_vector.tolist()},# Convert numpy array to list
99
+ limit=k
100
+ )
101
 
102
+ # Extract and return results
103
+ return [(result.payload['text'], result.score) for result in search_results]
104
 
105
+ def search_by_text(
106
+ self,
107
+ query_text: str,
108
+ k: int,
109
+ distance_measure: Callable = None,
110
+ return_as_text: bool = False,
111
+ ) -> List[Tuple[str, float]]:
112
+ query_vector = self.embedding_model.get_embedding(query_text)
113
+ results = self.search(query_vector, k, distance_measure)
114
+ return [result[0] for result in results] if return_as_text else results
115
+
116
+ def retrieve_from_key(self, key: str) -> np.array:
117
+ # Retrieve from local cache
118
+ return self.vectors.get(key, None)
119
+
120
+ async def abuild_from_list(self, list_of_text: List[str]) -> "QdrantDatabase":
121
+ embeddings = await self.embedding_model.async_get_embeddings(list_of_text)
122
+ for text, embedding in zip(list_of_text, embeddings):
123
+ self.insert(text, np.array(embedding))
124
+ return self
125
+
126
  if __name__ == "__main__":
127
  list_of_text = [
128
  "I like to eat broccoli and bananas.",
app.py CHANGED
@@ -8,7 +8,7 @@ from aimakerspace.openai_utils.prompts import (
8
  AssistantRolePrompt,
9
  )
10
  from aimakerspace.openai_utils.embedding import EmbeddingModel
11
- from aimakerspace.vectordatabase import VectorDatabase
12
  from aimakerspace.openai_utils.chatmodel import ChatOpenAI
13
  import chainlit as cl
14
  import fitz
@@ -80,6 +80,7 @@ def process_text_file(file: AskFileResponse):
80
  return texts
81
 
82
 
 
83
  @cl.on_chat_start
84
  async def on_chat_start():
85
  files = None
@@ -112,19 +113,40 @@ async def on_chat_start():
112
 
113
  # decide if to use the dict vector store of the Qdrant vector store
114
 
115
- use_qdrant = False
116
-
 
117
  # Create a dict vector store
118
  if use_qdrant:
119
- msg = cl.Message(
120
- content="Sorry, qgrant not implemented yet", disable_human_feedback=True
 
 
121
  )
122
- await msg.send()
123
- raise NotImplemented()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  else:
125
  vector_db = VectorDatabase()
126
  vector_db = await vector_db.abuild_from_list(texts)
127
 
 
 
 
 
128
  chat_openai = ChatOpenAI()
129
 
130
  # Create a chain
 
8
  AssistantRolePrompt,
9
  )
10
  from aimakerspace.openai_utils.embedding import EmbeddingModel
11
+ from aimakerspace.vectordatabase import VectorDatabase, QdrantDatabase
12
  from aimakerspace.openai_utils.chatmodel import ChatOpenAI
13
  import chainlit as cl
14
  import fitz
 
80
  return texts
81
 
82
 
83
+
84
  @cl.on_chat_start
85
  async def on_chat_start():
86
  files = None
 
113
 
114
  # decide if to use the dict vector store of the Qdrant vector store
115
 
116
+ use_qdrant = True
117
+ from qdrant_client import QdrantClient
118
+ from qdrant_client.http.models import VectorParams, Distance
119
  # Create a dict vector store
120
  if use_qdrant:
121
+ embedding_model = EmbeddingModel()
122
+ qdrant_client = QdrantClient(
123
+ url='https://6b3eac94-adfe-42cb-98f8-9f068538243c.europe-west3-0.gcp.cloud.qdrant.io:6333', # Replace with your cluster URL
124
+ api_key='YrnApyEfdNAt41N7WkcZwjhjKqiIQQbXHBtzk_04guNyRLa83J0hOw' # Replace with your API key
125
  )
126
+ vectors_config = {
127
+ "default": VectorParams(size=1536, distance="Cosine") # Adjust size as per your model's output
128
+ }
129
+ if not qdrant_client.collection_exists("my_collection"):
130
+ qdrant_client.create_collection(
131
+ collection_name="my_collection",
132
+ vectors_config=vectors_config
133
+ )
134
+
135
+ vector_db = QdrantDatabase(
136
+ qdrant_client=qdrant_client,
137
+ collection_name="my_collection",
138
+ embedding_model=embedding_model # Replace with your embedding model instance
139
+ )
140
+ vector_db = await vector_db.abuild_from_list(texts)
141
+
142
  else:
143
  vector_db = VectorDatabase()
144
  vector_db = await vector_db.abuild_from_list(texts)
145
 
146
+ msg = cl.Message(
147
+ content=f"The Vector store has been created", disable_human_feedback=True
148
+ )
149
+ await msg.send()
150
  chat_openai = ChatOpenAI()
151
 
152
  # Create a chain