File size: 2,769 Bytes
7d28b70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# utils.py
import openai
from pinecone import Pinecone, ServerlessSpec
import pandas as pd
from typing import List

# Function to get embeddings from OpenAI's model
def get_embedding(text: str, openai_api_key: str, model: str = "text-embedding-ada-002") -> List[float]:
    """
    Get embeddings for a given text using OpenAI's model.

    Args:
    text (str): Text to be embedded.
    openai_api_key (str): OpenAI API key.
    model (str): Model to be used for embedding. Default is "text-embedding-ada-002".

    Returns:
    List[float]: Embedding vector.
    """
    openai.api_key = openai_api_key
    try:
        response = openai.Embedding.create(
            model=model,
            input=text
        )
        return response['data'][0]['embedding']
    except Exception as e:
        print(f"Error getting embedding: {e}")
        return []

# Function to process the uploaded CSV and store embeddings in Pinecone
def process_csv(file, openai_api_key: str, pinecone_api_key: str, pinecone_env: str) -> str:
    """
    Process the uploaded CSV file and store embeddings in Pinecone.

    Args:
    file: Uploaded CSV file.
    openai_api_key (str): OpenAI API key.
    pinecone_api_key (str): Pinecone API key.
    pinecone_env (str): Pinecone environment.

    Returns:
    str: Status message.
    """
    try:
        df = pd.read_csv(file.name)
        
        # Initialize Pinecone
        pc = Pinecone(api_key=pinecone_api_key)
        index_name = "product-recommendations"
        
        # Check if index exists
        if index_name not in pc.list_indexes().names():
            try:
                pc.create_index(
                    name=index_name,
                    dimension=1536,
                    spec=ServerlessSpec(cloud="aws", region=pinecone_env)
                )
            except Exception as e:
                print(f"Error creating Pinecone index: {e}")
                return "Failed to create Pinecone index."

        index = pc.Index(index_name)
        
        embeddings = []
        for i, row in df.iterrows():
            embedding = get_embedding(row['description'], openai_api_key)
            if embedding:
                embeddings.append((str(row['product_id']), embedding, {'product_name': row['product_name'], 'image_url': row['image_url']}))
        
        if embeddings:
            try:
                index.upsert(embeddings)
            except Exception as e:
                print(f"Error upserting embeddings to Pinecone: {e}")
                return "Failed to upsert embeddings."
        
        return "Product catalog processed and embeddings stored in Pinecone."
    except Exception as e:
        print(f"Error processing CSV file: {e}")
        return "Failed to process CSV file."