Spaces:

zayeem00
/

AI-SmartShopper

Sleeping

App Files Files Community

zayeem00 commited on Jul 28, 2024

Commit

7d28b70

verified ·

1 Parent(s): 68bc4b5

Create utils.py

Browse files

Files changed (1) hide show

utils.py +82 -0

utils.py ADDED Viewed

	@@ -0,0 +1,82 @@

+# utils.py
+import openai
+from pinecone import Pinecone, ServerlessSpec
+import pandas as pd
+from typing import List
+# Function to get embeddings from OpenAI's model
+def get_embedding(text: str, openai_api_key: str, model: str = "text-embedding-ada-002") -> List[float]:
+    """
+    Get embeddings for a given text using OpenAI's model.
+    Args:
+    text (str): Text to be embedded.
+    openai_api_key (str): OpenAI API key.
+    model (str): Model to be used for embedding. Default is "text-embedding-ada-002".
+    Returns:
+    List[float]: Embedding vector.
+    """
+    openai.api_key = openai_api_key
+    try:
+        response = openai.Embedding.create(
+            model=model,
+            input=text
+        )
+        return response['data'][0]['embedding']
+    except Exception as e:
+        print(f"Error getting embedding: {e}")
+        return []
+# Function to process the uploaded CSV and store embeddings in Pinecone
+def process_csv(file, openai_api_key: str, pinecone_api_key: str, pinecone_env: str) -> str:
+    """
+    Process the uploaded CSV file and store embeddings in Pinecone.
+    Args:
+    file: Uploaded CSV file.
+    openai_api_key (str): OpenAI API key.
+    pinecone_api_key (str): Pinecone API key.
+    pinecone_env (str): Pinecone environment.
+    Returns:
+    str: Status message.
+    """
+    try:
+        df = pd.read_csv(file.name)
+        # Initialize Pinecone
+        pc = Pinecone(api_key=pinecone_api_key)
+        index_name = "product-recommendations"
+        # Check if index exists
+        if index_name not in pc.list_indexes().names():
+            try:
+                pc.create_index(
+                    name=index_name,
+                    dimension=1536,
+                    spec=ServerlessSpec(cloud="aws", region=pinecone_env)
+                )
+            except Exception as e:
+                print(f"Error creating Pinecone index: {e}")
+                return "Failed to create Pinecone index."
+        index = pc.Index(index_name)
+        embeddings = []
+        for i, row in df.iterrows():
+            embedding = get_embedding(row['description'], openai_api_key)
+            if embedding:
+                embeddings.append((str(row['product_id']), embedding, {'product_name': row['product_name'], 'image_url': row['image_url']}))
+        if embeddings:
+            try:
+                index.upsert(embeddings)
+            except Exception as e:
+                print(f"Error upserting embeddings to Pinecone: {e}")
+                return "Failed to upsert embeddings."
+        return "Product catalog processed and embeddings stored in Pinecone."
+    except Exception as e:
+        print(f"Error processing CSV file: {e}")
+        return "Failed to process CSV file."