Spaces:
Running
Running
from typing import List | |
from embedding_provider import EmbeddingProvider | |
from database.annoydb import AnnoyDB | |
class SearchManager: | |
def __init__( | |
self, | |
embedding_provider: EmbeddingProvider, | |
documents: List[str], | |
semantic_weight: float = 0.7, | |
keyword_weight: float = 0.3 | |
) -> None: | |
"""Smart Search Manager | |
Args: | |
embedding_provider (EmbeddingProvider): embedding provider | |
documents (List[str]): list of documents | |
semantic_weight (float, optional): _description_. Defaults to 0.7. | |
keyword_weight (float, optional): _description_. Defaults to 0.3. | |
""" | |
self.embedding_provider = embedding_provider | |
self.semantic_embeddings = embedding_provider.embed_documents(documents) | |
# Vector Database Setup | |
self.vector_db = AnnoyDB( | |
embedding_dim=self.semantic_embeddings.shape[1] | |
) | |
for emb, doc in zip(self.semantic_embeddings, documents): | |
self.vector_db.add_item(emb, doc) | |
self.vector_db.build() | |
# Keyword Search Setup | |
self.keyword_search = KeywordSearchProvider(documents) | |
# Weights for hybrid search | |
self.semantic_weight = semantic_weight | |
self.keyword_weight = keyword_weight | |
self.documents = documents |