zayeem00 commited on
Commit
7d28b70
·
verified ·
1 Parent(s): 68bc4b5

Create utils.py

Browse files
Files changed (1) hide show
  1. utils.py +82 -0
utils.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # utils.py
2
+ import openai
3
+ from pinecone import Pinecone, ServerlessSpec
4
+ import pandas as pd
5
+ from typing import List
6
+
7
+ # Function to get embeddings from OpenAI's model
8
+ def get_embedding(text: str, openai_api_key: str, model: str = "text-embedding-ada-002") -> List[float]:
9
+ """
10
+ Get embeddings for a given text using OpenAI's model.
11
+
12
+ Args:
13
+ text (str): Text to be embedded.
14
+ openai_api_key (str): OpenAI API key.
15
+ model (str): Model to be used for embedding. Default is "text-embedding-ada-002".
16
+
17
+ Returns:
18
+ List[float]: Embedding vector.
19
+ """
20
+ openai.api_key = openai_api_key
21
+ try:
22
+ response = openai.Embedding.create(
23
+ model=model,
24
+ input=text
25
+ )
26
+ return response['data'][0]['embedding']
27
+ except Exception as e:
28
+ print(f"Error getting embedding: {e}")
29
+ return []
30
+
31
+ # Function to process the uploaded CSV and store embeddings in Pinecone
32
+ def process_csv(file, openai_api_key: str, pinecone_api_key: str, pinecone_env: str) -> str:
33
+ """
34
+ Process the uploaded CSV file and store embeddings in Pinecone.
35
+
36
+ Args:
37
+ file: Uploaded CSV file.
38
+ openai_api_key (str): OpenAI API key.
39
+ pinecone_api_key (str): Pinecone API key.
40
+ pinecone_env (str): Pinecone environment.
41
+
42
+ Returns:
43
+ str: Status message.
44
+ """
45
+ try:
46
+ df = pd.read_csv(file.name)
47
+
48
+ # Initialize Pinecone
49
+ pc = Pinecone(api_key=pinecone_api_key)
50
+ index_name = "product-recommendations"
51
+
52
+ # Check if index exists
53
+ if index_name not in pc.list_indexes().names():
54
+ try:
55
+ pc.create_index(
56
+ name=index_name,
57
+ dimension=1536,
58
+ spec=ServerlessSpec(cloud="aws", region=pinecone_env)
59
+ )
60
+ except Exception as e:
61
+ print(f"Error creating Pinecone index: {e}")
62
+ return "Failed to create Pinecone index."
63
+
64
+ index = pc.Index(index_name)
65
+
66
+ embeddings = []
67
+ for i, row in df.iterrows():
68
+ embedding = get_embedding(row['description'], openai_api_key)
69
+ if embedding:
70
+ embeddings.append((str(row['product_id']), embedding, {'product_name': row['product_name'], 'image_url': row['image_url']}))
71
+
72
+ if embeddings:
73
+ try:
74
+ index.upsert(embeddings)
75
+ except Exception as e:
76
+ print(f"Error upserting embeddings to Pinecone: {e}")
77
+ return "Failed to upsert embeddings."
78
+
79
+ return "Product catalog processed and embeddings stored in Pinecone."
80
+ except Exception as e:
81
+ print(f"Error processing CSV file: {e}")
82
+ return "Failed to process CSV file."