|
import joblib |
|
from pymongo import MongoClient |
|
from collections import Counter |
|
import pandas as pd |
|
from urllib.parse import quote_plus |
|
|
|
model_path = 'model/book_model.joblib' |
|
model = joblib.load(model_path) |
|
|
|
|
|
client = MongoClient("mongodb+srv://Atharva:[email protected]/") |
|
db = client['book_dataset'] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
books_collection = db['BOOK'] |
|
df = pd.read_csv('data/vectors1.csv', index_col='Book-Title') |
|
|
|
def search_books(title): |
|
"""Search for book titles in MongoDB similar to the given title.""" |
|
query = {"Book-Title": {"$regex": str(title), "$options": "i"}} |
|
matched_books = books_collection.find(query, {"Book-Title": 1, "Image-URL-M": 1}).limit(2) |
|
return [{"title": book["Book-Title"], "image_url": book["Image-URL-M"]} for book in matched_books] |
|
|
|
def find_top_common_books(titles): |
|
book_counter = Counter() |
|
recommended_books = [] |
|
|
|
for title in titles: |
|
try: |
|
book = df.loc[title] |
|
except KeyError as e: |
|
print('The given book', e, 'does not exist') |
|
continue |
|
|
|
|
|
distance, indice = model.kneighbors([book.values], n_neighbors=30) |
|
|
|
|
|
recommended_books = pd.DataFrame({ |
|
'title': df.iloc[indice[0]].index.values, |
|
'distance': distance[0] |
|
}).sort_values(by='distance', ascending=True).head(5)['title'].values |
|
|
|
|
|
book_counter.update(recommended_books) |
|
|
|
|
|
top_common_books = [book for book, _ in book_counter.most_common(7)] |
|
final_recommendations=[] |
|
for b in top_common_books: |
|
final_recommendations.append(search_books(b)) |
|
|
|
|
|
return final_recommendations |