import pinecone from langchain_google_genai import GoogleGenerativeAIEmbeddings import os import uuid from pymongo import MongoClient from dotenv import load_dotenv load_dotenv() FLASH_API = os.getenv("FLASH_API") PINECONE_API=os.getenv("PINECONE_API") PINECONE_INDEX=os.getenv("PINECONE_INDEX") google_embeddings = GoogleGenerativeAIEmbeddings( model="models/embedding-001", # Correct model name google_api_key=FLASH_API # Your API key ) # Initialize Pinecone instance pc = pinecone.Pinecone( api_key=PINECONE_API # Your Pinecone API key ) MONGO_URI = os.getenv("MONGO_URI") DB_NAME = os.getenv("DB_NAME") COLLECTION_NAME = os.getenv("COLLECTION_NAME") mongo_client = MongoClient(MONGO_URI) db = mongo_client[DB_NAME] collection = db[COLLECTION_NAME] # Define the Pinecone index name (make sure it exists in your Pinecone dashboard) index = pc.Index(PINECONE_INDEX) def create_embedding(object_url,tags,categories): try: document = collection.find_one({'object_url': object_url}) content = document.get("description") file_type = document.get("type") mongo_id = str(document.get('_id')) # Convert ObjectId to string for storage in metadata # Generate the embedding embedding = google_embeddings.embed_query(content) # Generate a unique ID for Pinecone pinecone_id = str(uuid.uuid4()) # Prepare the vector with metadata vectors = [{ 'id': pinecone_id, 'values': embedding, 'metadata': { 'description': content, "url": object_url, "filetype": file_type, "mongo_id": mongo_id, "tags":','.join(tags), "categories":','.join(categories)# Store MongoDB ID in metadata } }] # Upsert the vector to Pinecone index.upsert(vectors) print(f"Inserted: {object_url} in Pinecone with MongoDB ID in metadata") # Update MongoDB document with Pinecone ID and success status collection.update_one( {"_id": document["_id"]}, {"$set": { "pinecone_id": pinecone_id, "successfully_embedding_created": True }} ) return True except Exception as e: print(f"Error occurred: {e}") # Update MongoDB document with failure status collection.update_one( {"_id": document["_id"]}, {"$set": { "successfully_embedding_created": False }} ) return False