Spaces:
Running
Running
from pinecone import Pinecone | |
import os | |
from text_embedder_encoder import TextEmbedder, encoder_model_name | |
class Retriever: | |
def __init__(self, | |
pinecone_api_key=os.environ["pinecone_api_key"], | |
index_name=f"hebrew-dentist-qa-{encoder_model_name.replace('/', '-')}".lower()): | |
# Initialize Pinecone connection | |
self.pc = Pinecone(api_key=pinecone_api_key) | |
self.index_name = index_name | |
self.text_embedder = TextEmbedder() | |
self.vector_dim = 768 | |
def search_similar(self, query_text, top_k=50): | |
""" | |
Search for similar content using vector similarity in Pinecone | |
""" | |
try: | |
# Generate embedding for query | |
query_vector = self.text_embedder.encode(query_text) | |
# Get Pinecone index | |
index = self.pc.Index(self.index_name) | |
# Execute search | |
results = index.query( | |
vector=query_vector, | |
top_k=top_k, | |
include_metadata=True, | |
) | |
answers = [] | |
for match in results['matches']: | |
answer = match['metadata']['answer'] | |
answers.append(answer) | |
return answers | |
except Exception as e: | |
print(f"Error performing similarity search: {e}") | |
return [] | |