from pinecone import Pinecone import os from text_embedder_encoder import TextEmbedder, encoder_model_name class Retriever: def __init__(self, pinecone_api_key=os.environ["pinecone_api_key"], index_name=f"hebrew-dentist-qa-{encoder_model_name.replace('/', '-')}".lower()): # Initialize Pinecone connection self.pc = Pinecone(api_key=pinecone_api_key) self.index_name = index_name self.text_embedder = TextEmbedder() self.vector_dim = 768 def search_similar(self, query_text, top_k=50): """ Search for similar content using vector similarity in Pinecone """ try: # Generate embedding for query query_vector = self.text_embedder.encode(query_text) # Get Pinecone index index = self.pc.Index(self.index_name) # Execute search results = index.query( vector=query_vector, top_k=top_k, include_metadata=True, ) answers = [] for match in results['matches']: answer = match['metadata']['answer'] answers.append(answer) return answers except Exception as e: print(f"Error performing similarity search: {e}") return []