Spaces:

zayeem00
/

AI-SmartShopper

Sleeping

File size: 2,769 Bytes

7d28b70

# utils.py
import openai
from pinecone import Pinecone, ServerlessSpec
import pandas as pd
from typing import List

# Function to get embeddings from OpenAI's model
def get_embedding(text: str, openai_api_key: str, model: str = "text-embedding-ada-002") -> List[float]:
    """
    Get embeddings for a given text using OpenAI's model.

    Args:
    text (str): Text to be embedded.
    openai_api_key (str): OpenAI API key.
    model (str): Model to be used for embedding. Default is "text-embedding-ada-002".

    Returns:
    List[float]: Embedding vector.
    """
    openai.api_key = openai_api_key
    try:
        response = openai.Embedding.create(
            model=model,
            input=text
        )
        return response['data'][0]['embedding']
    except Exception as e:
        print(f"Error getting embedding: {e}")
        return []

# Function to process the uploaded CSV and store embeddings in Pinecone
def process_csv(file, openai_api_key: str, pinecone_api_key: str, pinecone_env: str) -> str:
    """
    Process the uploaded CSV file and store embeddings in Pinecone.

    Args:
    file: Uploaded CSV file.
    openai_api_key (str): OpenAI API key.
    pinecone_api_key (str): Pinecone API key.
    pinecone_env (str): Pinecone environment.

    Returns:
    str: Status message.
    """
    try:
        df = pd.read_csv(file.name)
        
        # Initialize Pinecone
        pc = Pinecone(api_key=pinecone_api_key)
        index_name = "product-recommendations"
        
        # Check if index exists
        if index_name not in pc.list_indexes().names():
            try:
                pc.create_index(
                    name=index_name,
                    dimension=1536,
                    spec=ServerlessSpec(cloud="aws", region=pinecone_env)
                )
            except Exception as e:
                print(f"Error creating Pinecone index: {e}")
                return "Failed to create Pinecone index."

        index = pc.Index(index_name)
        
        embeddings = []
        for i, row in df.iterrows():
            embedding = get_embedding(row['description'], openai_api_key)
            if embedding:
                embeddings.append((str(row['product_id']), embedding, {'product_name': row['product_name'], 'image_url': row['image_url']}))
        
        if embeddings:
            try:
                index.upsert(embeddings)
            except Exception as e:
                print(f"Error upserting embeddings to Pinecone: {e}")
                return "Failed to upsert embeddings."
        
        return "Product catalog processed and embeddings stored in Pinecone."
    except Exception as e:
        print(f"Error processing CSV file: {e}")
        return "Failed to process CSV file."