Spaces:
Sleeping
Sleeping
import pandas as pd | |
from itertools import combinations | |
from collections import defaultdict | |
from load_data import items | |
# Flatten the items into a single list for vectorization | |
flat_items = [item for sublist in items for item in sublist] | |
# Create a co-occurrence matrix | |
product_co_occurrence = defaultdict(int) | |
for sublist in items: | |
for combination in combinations(sublist, 2): | |
product_co_occurrence[tuple(sorted(combination))] += 1 | |
# Convert to DataFrame | |
co_occurrence_df = pd.DataFrame(list(product_co_occurrence.items()), columns=["Pair", "Frequency"]) | |
# Sort to find the most common co-occurring products | |
co_occurrence_df = co_occurrence_df.sort_values(by="Frequency", ascending=False) | |
# Function to calculate confidence | |
def calculate_confidence(item1, item2, df): | |
item1_transactions = df[df[item1] > 0].shape[0] | |
both_transactions = df[(df[item1] > 0) & (df[item2] > 0)].shape[0] | |
return both_transactions / item1_transactions | |
# Function to get recommendations based on a product | |
def get_recommendations(product_name, co_occurrence_df, df, confidence_threshold=0.1): | |
# Find pairs that include the product name | |
relevant_pairs = co_occurrence_df[co_occurrence_df["Pair"].apply(lambda x: product_name in x)] | |
# Extract the other product in the pair and calculate confidence | |
recommended_products = [] | |
for pair in relevant_pairs["Pair"]: | |
other_product = pair[0] if pair[1] == product_name else pair[1] | |
confidence = calculate_confidence(product_name, other_product, df) | |
if confidence > confidence_threshold: | |
recommended_products.append(other_product) | |
# Return the top recommendations | |
return recommended_products[:3] | |