anwesh2410
/

Book

Model card Files Files and versions Community

Book / src /recommend.py

anwesh2410's picture

Upload 39 files

54862ee verified 2 months ago

history blame contribute delete

2.44 kB

	import joblib
	from pymongo import MongoClient
	from collections import Counter
	import pandas as pd
	from urllib.parse import quote_plus
	# Load the pre-trained KNN model
	model_path = 'model/book_model.joblib'
	model = joblib.load(model_path)

	# # # MongoDB client setup
	client = MongoClient("mongodb+srv://Atharva:[email protected]/")
	db = client['book_dataset']
	# books_collection = db['BOOK']
	# Updated MongoClient initialization
	# Encode credentials
	# username = 'debook'
	# password = 'debook?' # Replace with your actual password

	# encoded_username = quote_plus(username)
	# encoded_password = quote_plus(password)

	# # Create the connection string
	# connection_string = f"mongodb+srv://{encoded_username}:{encoded_password}@cluster0.chkfn.mongodb.net/book_dataset?retryWrites=true&w=majority"

	# # Initialize MongoDB client
	# client = MongoClient(connection_string)
	# db = client['book_dataset']

	books_collection = db['BOOK']
	df = pd.read_csv('data/vectors1.csv', index_col='Book-Title')

	def search_books(title):
	"""Search for book titles in MongoDB similar to the given title."""
	query = {"Book-Title": {"$regex": str(title), "$options": "i"}}
	matched_books = books_collection.find(query, {"Book-Title": 1, "Image-URL-M": 1}).limit(2)
	return [{"title": book["Book-Title"], "image_url": book["Image-URL-M"]} for book in matched_books]

	def find_top_common_books(titles):
	book_counter = Counter()
	recommended_books = []

	for title in titles:
	try:
	book = df.loc[title]
	except KeyError as e:
	print('The given book', e, 'does not exist')
	continue # Skip to the next title if the book is not found

	# Find nearest neighbors
	distance, indice = model.kneighbors([book.values], n_neighbors=30)

	# Get top 5 recommended books for this title
	recommended_books = pd.DataFrame({
	'title': df.iloc[indice[0]].index.values,
	'distance': distance[0]
	}).sort_values(by='distance', ascending=True).head(5)['title'].values

	# Update the counter with the recommended books
	book_counter.update(recommended_books)

	# Get the top common books
	top_common_books = [book for book, _ in book_counter.most_common(7)]
	final_recommendations=[]
	for b in top_common_books:
	final_recommendations.append(search_books(b))


	return final_recommendations