Book / src /recommend.py
anwesh2410's picture
Upload 39 files
54862ee verified
import joblib
from pymongo import MongoClient
from collections import Counter
import pandas as pd
from urllib.parse import quote_plus
# Load the pre-trained KNN model
model_path = 'model/book_model.joblib'
model = joblib.load(model_path)
# # # MongoDB client setup
client = MongoClient("mongodb+srv://Atharva:[email protected]/")
db = client['book_dataset']
# books_collection = db['BOOK']
# Updated MongoClient initialization
# Encode credentials
# username = 'debook'
# password = 'debook?' # Replace with your actual password
# encoded_username = quote_plus(username)
# encoded_password = quote_plus(password)
# # Create the connection string
# connection_string = f"mongodb+srv://{encoded_username}:{encoded_password}@cluster0.chkfn.mongodb.net/book_dataset?retryWrites=true&w=majority"
# # Initialize MongoDB client
# client = MongoClient(connection_string)
# db = client['book_dataset']
books_collection = db['BOOK']
df = pd.read_csv('data/vectors1.csv', index_col='Book-Title')
def search_books(title):
"""Search for book titles in MongoDB similar to the given title."""
query = {"Book-Title": {"$regex": str(title), "$options": "i"}}
matched_books = books_collection.find(query, {"Book-Title": 1, "Image-URL-M": 1}).limit(2)
return [{"title": book["Book-Title"], "image_url": book["Image-URL-M"]} for book in matched_books]
def find_top_common_books(titles):
book_counter = Counter()
recommended_books = []
for title in titles:
try:
book = df.loc[title]
except KeyError as e:
print('The given book', e, 'does not exist')
continue # Skip to the next title if the book is not found
# Find nearest neighbors
distance, indice = model.kneighbors([book.values], n_neighbors=30)
# Get top 5 recommended books for this title
recommended_books = pd.DataFrame({
'title': df.iloc[indice[0]].index.values,
'distance': distance[0]
}).sort_values(by='distance', ascending=True).head(5)['title'].values
# Update the counter with the recommended books
book_counter.update(recommended_books)
# Get the top common books
top_common_books = [book for book, _ in book_counter.most_common(7)]
final_recommendations=[]
for b in top_common_books:
final_recommendations.append(search_books(b))
return final_recommendations