|
import pandas as pd |
|
import numpy as np |
|
from sklearn.model_selection import train_test_split |
|
from sklearn.preprocessing import LabelEncoder |
|
from tensorflow.keras.models import Model |
|
from tensorflow.keras.layers import Input, Embedding, Flatten, concatenate, Dense |
|
from tensorflow.keras.optimizers import Adam |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
import tensorflow as tf |
|
|
|
|
|
gpu_available = tf.config.list_physical_devices('GPU') |
|
print(gpu_available) |
|
|
|
|
|
books = pd.read_csv("../data/datasets/books.csv") |
|
ratings = pd.read_csv("../data/datasets/ratings.csv") |
|
|
|
|
|
user_encoder = LabelEncoder() |
|
book_encoder = LabelEncoder() |
|
|
|
ratings["user_id"] = user_encoder.fit_transform(ratings["user_id"]) |
|
ratings["book_id"] = book_encoder.fit_transform(ratings["book_id"]) |
|
|
|
|
|
all_books = np.arange(len(books)) |
|
|
|
|
|
def build_model(num_users, num_books, embedding_size=50): |
|
""" |
|
Build a recommendation model. |
|
Args: |
|
num_users (int): The number of users in the dataset. |
|
num_books (int): The number of books in the dataset. |
|
embedding_size (int, optional): The size of the embedding vectors. Defaults to 50. |
|
Returns: |
|
keras.Model: The compiled recommendation model. |
|
""" |
|
user_input = Input(shape=(1,)) |
|
book_input = Input(shape=(1,)) |
|
|
|
user_embedding = Embedding(input_dim=num_users, output_dim=embedding_size)(user_input) |
|
book_embedding = Embedding(input_dim=num_books, output_dim=embedding_size)(book_input) |
|
|
|
user_flat = Flatten()(user_embedding) |
|
book_flat = Flatten()(book_embedding) |
|
|
|
merged = concatenate([user_flat, book_flat]) |
|
dense1 = Dense(128, activation="relu")(merged) |
|
output = Dense(1)(dense1) |
|
|
|
model = Model(inputs=[user_input, book_input], outputs=output) |
|
model.compile(loss="mean_squared_error", optimizer=Adam(learning_rate=0.001)) |
|
|
|
return model |
|
|
|
|
|
with tf.device('/GPU:0') if gpu_available else tf.device('/CPU:0'): |
|
model_cf = build_model(num_users=len(ratings["user_id"].unique()), |
|
num_books=len(books)) |
|
model_cf.summary() |
|
history = model_cf.fit([ratings["user_id"], ratings["book_id"]], |
|
ratings["rating"], |
|
epochs=5, |
|
batch_size=128, |
|
validation_split=0.1) |
|
|
|
|
|
model_cf.save("recommendation_model.keras") |
|
|
|
|
|
test_loss = model_cf.evaluate([ratings["user_id"], ratings["book_id"]], ratings["rating"]) |
|
print(f"Collaborative Filtering Test Loss: {test_loss}") |
|
|
|
|
|
user_id = 0 |
|
book_name = "The Great Gatsby" |
|
|
|
print("Content-Based Recommendation:") |
|
print(content_based_recommendation(book_name, books)) |
|
|
|
print("\nModel-Recommended History-Based Recommendation:") |
|
print(history_based_recommendation(user_id, model_cf, ratings)) |
|
|
|
print("\nHybrid Recommendation:") |
|
print(hybrid_recommendation(user_id, book_name, model_cf, books, ratings)) |
|
|