Spaces:
Sleeping
Sleeping
# utils.py | |
import openai | |
from pinecone import Pinecone, ServerlessSpec | |
import pandas as pd | |
from typing import List | |
# Function to get embeddings from OpenAI's model | |
def get_embedding(text: str, openai_api_key: str, model: str = "text-embedding-ada-002") -> List[float]: | |
""" | |
Get embeddings for a given text using OpenAI's model. | |
Args: | |
text (str): Text to be embedded. | |
openai_api_key (str): OpenAI API key. | |
model (str): Model to be used for embedding. Default is "text-embedding-ada-002". | |
Returns: | |
List[float]: Embedding vector. | |
""" | |
openai.api_key = openai_api_key | |
try: | |
response = openai.Embedding.create( | |
model=model, | |
input=text | |
) | |
return response['data'][0]['embedding'] | |
except Exception as e: | |
print(f"Error getting embedding: {e}") | |
return [] | |
# Function to process the uploaded CSV and store embeddings in Pinecone | |
def process_csv(file, openai_api_key: str, pinecone_api_key: str, pinecone_env: str) -> str: | |
""" | |
Process the uploaded CSV file and store embeddings in Pinecone. | |
Args: | |
file: Uploaded CSV file. | |
openai_api_key (str): OpenAI API key. | |
pinecone_api_key (str): Pinecone API key. | |
pinecone_env (str): Pinecone environment. | |
Returns: | |
str: Status message. | |
""" | |
try: | |
df = pd.read_csv(file.name) | |
# Initialize Pinecone | |
pc = Pinecone(api_key=pinecone_api_key) | |
index_name = "product-recommendations" | |
# Check if index exists | |
if index_name not in pc.list_indexes().names(): | |
try: | |
pc.create_index( | |
name=index_name, | |
dimension=1536, | |
spec=ServerlessSpec(cloud="aws", region=pinecone_env) | |
) | |
except Exception as e: | |
print(f"Error creating Pinecone index: {e}") | |
return "Failed to create Pinecone index." | |
index = pc.Index(index_name) | |
embeddings = [] | |
for i, row in df.iterrows(): | |
embedding = get_embedding(row['description'], openai_api_key) | |
if embedding: | |
embeddings.append((str(row['product_id']), embedding, {'product_name': row['product_name'], 'image_url': row['image_url']})) | |
if embeddings: | |
try: | |
index.upsert(embeddings) | |
except Exception as e: | |
print(f"Error upserting embeddings to Pinecone: {e}") | |
return "Failed to upsert embeddings." | |
return "Product catalog processed and embeddings stored in Pinecone." | |
except Exception as e: | |
print(f"Error processing CSV file: {e}") | |
return "Failed to process CSV file." |