File size: 2,440 Bytes
54862ee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
import joblib
from pymongo import MongoClient
from collections import Counter
import pandas as pd
from urllib.parse import quote_plus
# Load the pre-trained KNN model
model_path = 'model/book_model.joblib'
model = joblib.load(model_path)
# # # MongoDB client setup
client = MongoClient("mongodb+srv://Atharva:[email protected]/")
db = client['book_dataset']
# books_collection = db['BOOK']
# Updated MongoClient initialization
# Encode credentials
# username = 'debook'
# password = 'debook?' # Replace with your actual password
# encoded_username = quote_plus(username)
# encoded_password = quote_plus(password)
# # Create the connection string
# connection_string = f"mongodb+srv://{encoded_username}:{encoded_password}@cluster0.chkfn.mongodb.net/book_dataset?retryWrites=true&w=majority"
# # Initialize MongoDB client
# client = MongoClient(connection_string)
# db = client['book_dataset']
books_collection = db['BOOK']
df = pd.read_csv('data/vectors1.csv', index_col='Book-Title')
def search_books(title):
"""Search for book titles in MongoDB similar to the given title."""
query = {"Book-Title": {"$regex": str(title), "$options": "i"}}
matched_books = books_collection.find(query, {"Book-Title": 1, "Image-URL-M": 1}).limit(2)
return [{"title": book["Book-Title"], "image_url": book["Image-URL-M"]} for book in matched_books]
def find_top_common_books(titles):
book_counter = Counter()
recommended_books = []
for title in titles:
try:
book = df.loc[title]
except KeyError as e:
print('The given book', e, 'does not exist')
continue # Skip to the next title if the book is not found
# Find nearest neighbors
distance, indice = model.kneighbors([book.values], n_neighbors=30)
# Get top 5 recommended books for this title
recommended_books = pd.DataFrame({
'title': df.iloc[indice[0]].index.values,
'distance': distance[0]
}).sort_values(by='distance', ascending=True).head(5)['title'].values
# Update the counter with the recommended books
book_counter.update(recommended_books)
# Get the top common books
top_common_books = [book for book, _ in book_counter.most_common(7)]
final_recommendations=[]
for b in top_common_books:
final_recommendations.append(search_books(b))
return final_recommendations |