hebrew-dentsit / retriever.py
borodache's picture
Upload 7 files
c9580eb verified
from pinecone import Pinecone
import os
from text_embedder_encoder import TextEmbedder, encoder_model_name
class Retriever:
def __init__(self,
pinecone_api_key=os.environ["pinecone_api_key"],
index_name=f"hebrew-dentist-qa-{encoder_model_name.replace('/', '-')}".lower()):
# Initialize Pinecone connection
self.pc = Pinecone(api_key=pinecone_api_key)
self.index_name = index_name
self.text_embedder = TextEmbedder()
self.vector_dim = 768
def search_similar(self, query_text, top_k=50):
"""
Search for similar content using vector similarity in Pinecone
"""
try:
# Generate embedding for query
query_vector = self.text_embedder.encode(query_text)
# Get Pinecone index
index = self.pc.Index(self.index_name)
# Execute search
results = index.query(
vector=query_vector,
top_k=top_k,
include_metadata=True,
)
answers = []
for match in results['matches']:
answer = match['metadata']['answer']
answers.append(answer)
return answers
except Exception as e:
print(f"Error performing similarity search: {e}")
return []