File size: 2,440 Bytes
54862ee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import joblib
from pymongo import MongoClient
from collections import Counter
import pandas as pd
from urllib.parse import quote_plus
# Load the pre-trained KNN model
model_path = 'model/book_model.joblib'
model = joblib.load(model_path)

# # # MongoDB client setup
client = MongoClient("mongodb+srv://Atharva:[email protected]/")
db = client['book_dataset']
# books_collection = db['BOOK']
# Updated MongoClient initialization
# Encode credentials
# username = 'debook'
# password = 'debook?'  # Replace with your actual password

# encoded_username = quote_plus(username)
# encoded_password = quote_plus(password)

# # Create the connection string
# connection_string = f"mongodb+srv://{encoded_username}:{encoded_password}@cluster0.chkfn.mongodb.net/book_dataset?retryWrites=true&w=majority"

# # Initialize MongoDB client
# client = MongoClient(connection_string)
# db = client['book_dataset']

books_collection = db['BOOK']
df = pd.read_csv('data/vectors1.csv', index_col='Book-Title')

def search_books(title):
    """Search for book titles in MongoDB similar to the given title."""
    query = {"Book-Title": {"$regex": str(title), "$options": "i"}}
    matched_books = books_collection.find(query, {"Book-Title": 1, "Image-URL-M": 1}).limit(2)
    return [{"title": book["Book-Title"], "image_url": book["Image-URL-M"]} for book in matched_books]

def find_top_common_books(titles):
    book_counter = Counter()
    recommended_books = []

    for title in titles:
        try:
            book = df.loc[title]
        except KeyError as e:
            print('The given book', e, 'does not exist')
            continue  # Skip to the next title if the book is not found

        # Find nearest neighbors
        distance, indice = model.kneighbors([book.values], n_neighbors=30)

        # Get top 5 recommended books for this title
        recommended_books = pd.DataFrame({
            'title': df.iloc[indice[0]].index.values,
            'distance': distance[0]
        }).sort_values(by='distance', ascending=True).head(5)['title'].values

        # Update the counter with the recommended books
        book_counter.update(recommended_books)
    
    # Get the top common books
    top_common_books = [book for book, _ in book_counter.most_common(7)]
    final_recommendations=[]
    for b in top_common_books:
        final_recommendations.append(search_books(b))
    
    
    return final_recommendations