Spaces:

bt5153-books
/

README

Running

App Files Files Community

Yew Chong commited on May 4, 2024

Commit

15cf29b

1 Parent(s): 7748238

frontend

Browse files

Files changed (6) hide show

README.md +15 -1
main.py +135 -0
model.py +309 -0
templates/index.html +17 -0
templates/recommended_books.html +88 -0
templates/test_users.html +35 -0

README.md CHANGED Viewed

@@ -16,7 +16,21 @@ Hello, and welcome to our books recommendation project for BT5153!
 # Project Directory
 ## Front-end UI
-**To Add Frontend Here**
 ## Source Code
 Codes are stored under `./Books` as `.ipynb` files, and named according to the order they should be run.

 # Project Directory
 ## Front-end UI
+### Book Recommendation Ensemble Model Interface
+This interface generates recommendations, but only for a list of randomly sampled test users from our dataset.
+This interface was created on Python version 3.11.4, with requirements listed in `requirements.txt`.
+There may be some requirements missed, please install as needed.
+All sub-models and the final ensemble classifier model were trained in advance. They are included inside the Data
+folder.
+All data used for live recommendation is in the Data folder. Since the Data folder is too large to be submitted,
+we will submit a representative subset of the data.
+Start the interface with `python -m flask run`.
 ## Source Code
 Codes are stored under `./Books` as `.ipynb` files, and named according to the order they should be run.

main.py ADDED Viewed

	@@ -0,0 +1,135 @@

+import logging
+from flask import Flask, render_template, request
+from model import Model
+import plotly.graph_objects as go
+model = Model()
+app = Flask(__name__)
+logging.basicConfig(level=logging.DEBUG)
+PRED_CACHE = dict()
+@app.route('/')
+def index():
+    return render_template('index.html')
+@app.route('/test_users')
+def test_users():
+    model.run_predictions_on_full_test()
+    model.prepare_user_details()
+    # Options for the dropdown menu
+    user_details = model.user_details['top_books'].to_dict()
+    return render_template('test_users.html', user_details=user_details)
+@app.route('/test_users/<chosen_user>')
+def process(chosen_user):
+    # Get book recommendations
+    if chosen_user in PRED_CACHE:
+        preds_df = PRED_CACHE[chosen_user]
+    else:
+        preds_df = model.get_user_predictions(chosen_user)
+        PRED_CACHE[chosen_user] = preds_df
+    if preds_df is None:
+        return "No predictions hit!"
+    # Get Pandas series of recommended books
+    recommended_books = preds_df.set_index('book_id')[['title_without_series', 'target', 'final_score']]
+    recommended_books['is_recommended'] = recommended_books['final_score'] >= 0.45
+    # Use Bootstrap's List to make a list of recommended books and a button for each book, routing to '/explain/book_id'
+    # Render the page with recommended books
+    return render_template(
+        'recommended_books.html',
+        chosen_user=chosen_user,
+        recommended_books=recommended_books
+    )
+@app.route('/test_users/<chosen_user>/<int:chosen_book>')
+def explain(chosen_user, chosen_book):
+    # Get book recommendations
+    # This should be a cache hit since we're coming from `process`, but we include the else path just in case
+    if chosen_user in PRED_CACHE:
+        preds_df = PRED_CACHE[chosen_user]
+    else:
+        preds_df = model.get_user_predictions(chosen_user)
+        PRED_CACHE[chosen_user] = preds_df
+    # Get Pandas series of recommended books
+    recommended_books = preds_df.set_index('book_id')[['title_without_series', 'target', 'final_score']]
+    recommended_books['is_recommended'] = recommended_books['final_score'] >= 0.45
+    # book_details = model.all_books[model.all_books['book_id'] == book_id]
+    logging.info(f"Generating explanation for user:{chosen_user}, book:{chosen_book}")
+    book_df = preds_df.set_index('book_id').loc[chosen_book]
+    waterfall_cols = [
+        'intercept',
+        'clus_score',
+        'gen_score',
+        'desc_score',
+        'rev_score',
+        'user_score',
+        'tit_score',
+        'final_score'
+    ]
+    waterfall_display_cols = [
+        'Intercept',
+        'Book Clustering Similarity',
+        'Genre Similarity',
+        'Description Topic Similarity',
+        'Review Vector Similarity',
+        'User Clustering Similarity',
+        'Title Vector Similarity',
+        'Sum of Sub-Model Scores'
+    ]
+    waterfall_data = book_df[waterfall_cols].tolist()
+    fig = go.Figure(
+        go.Waterfall(
+            name='Recommendation explanation',
+            orientation='h',
+            measure=['relative', 'relative', 'relative', 'relative', 'relative', 'relative', 'relative', 'total'],
+            y=waterfall_display_cols,
+            x=waterfall_data
+        )
+    )
+    fig_html = fig.to_html(full_html=False)
+    top_model_idx = waterfall_cols.index(book_df[waterfall_cols[:-1]].astype(float).idxmax())
+    top_model = waterfall_display_cols[top_model_idx]
+    explanation_str = f"The highest contributing model was {top_model}. "
+    if book_df['final_score'] >= 0.45:
+        reasons = [
+            '-', # intercept
+            'it is similar to books you enjoyed in terms of book statistics like popularity and page count.',
+            'it is similar to books you enjoyed in terms of overlapping genres.',
+            'it is similar to books you enjoyed in terms of description similarity.',
+            'it is similar to books you enjoyed in terms of review similarity.',
+            'other users similar to you in taste enjoyed this book.',
+            'it is similar to books you enjoyed in terms of title similarity.',
+        ]
+        explanation_str += "This means that this book was recommended since "
+        explanation_str += reasons[top_model_idx]
+    else:
+        explanation_str += "However, the confidence score is below the threshold of 0.45, so it is not recommended."
+    score_sum = f"{sum(waterfall_data[:-1]):.5f}"
+    final_score = f"{book_df['final_score']:.5f}"
+    return render_template(
+        'recommended_books.html',
+        chosen_user=chosen_user,
+        recommended_books=recommended_books,
+        render_explanation='true',
+        fig=fig_html,
+        score_sum=score_sum,
+        final_score=final_score,
+        explanation_str=explanation_str
+    )
+if __name__ == '__main__':
+    app.run(debug=True)

model.py ADDED Viewed

	@@ -0,0 +1,309 @@

+import numpy as np
+import pandas as pd
+from tqdm import tqdm
+import pickle
+import os
+from collections import defaultdict
+import random
+import warnings
+import logging
+from langchain_community.embeddings import HuggingFaceBgeEmbeddings
+from langchain_community.vectorstores import FAISS
+warnings.filterwarnings("ignore")
+random.seed(5153)
+logging.basicConfig(level=logging.DEBUG)
+class Model:
+    def __init__(self):
+        self.cache_path = "Data/cache.pkl"
+        self.is_loaded = False
+        self.dataset = None
+        self.predictions = None
+        self.user_details = None
+        self.temp_store = None
+        self.pipeline = None
+        self.chosen_books_per_user = None
+        self.all_books = pd.read_csv("Data/books.csv")
+        logging.info("Initialized model")
+    def run_predictions_on_full_test(self):
+        if self.is_loaded:
+            logging.info("Model is already loaded")
+            return
+        if self.does_cache_exist():
+            logging.info("Retrieving cached full-test predictions")
+            self.retrieve_cache()
+            logging.info("Completed full-test")
+            return
+        logging.info("Generating full-test predictions")
+        reviews_df = pd.read_csv("Data/final_dataset/reviews_test.csv")
+        good_reviews = reviews_df[reviews_df['rating'] > 3]
+        good_user_books_dict = good_reviews.groupby('user_id')['book_id'].unique().apply(list).to_dict()
+        # to further minimize compute time, we only use 20 (randomly sampled) users
+        num_random_users = 20
+        randomly_sampled_users = random.sample(list(good_user_books_dict.keys()), num_random_users)
+        sampled_good_user_books_dict = {user_id: good_user_books_dict[user_id] for user_id in randomly_sampled_users}
+        # to minimize compute time, we take only 150 random (good) books per user
+        # prepare it in the form of user_id -> list[book_id]
+        num_rand_books_per_user = 150
+        chosen_books_per_user = {
+            user_id: random.sample(books, min(len(books), num_rand_books_per_user))
+            for user_id, books in sampled_good_user_books_dict.items()
+        }
+        # save this for reference
+        self.chosen_books_per_user = chosen_books_per_user
+        # run predictions on all of the above users
+        self.prepare_predictions(chosen_books_per_user)
+        logging.info("Caching full-test predictions")
+        self.cache_results()
+        logging.info("Completed full-test")
+    def run_prediction_on_adhoc_user(self, chosen_book_ids):
+        self.prepare_predictions(
+            {'current_user': chosen_book_ids}
+        )
+    def prepare_predictions(self, target_users_and_books: dict[str, list[str]]):
+        """
+        Given a dictionary of user_id to list[book_id], where the list of book IDs are the books favored by
+        the associated user, this function returns the recommended books for each user provided in the dictionary
+        :param target_users_and_books: Dictionary of user ID to favored books (as book IDs)
+        :return: Dataframe of user IDs and associated recommended books, plus individual model scores
+        """
+        target_user_list = list(target_users_and_books.keys())
+        file_dict = {}
+        for filename in ['reviews_test', 'users_test', 'reviews_sub']:
+            file_dict[filename] = pd.read_csv(f'Data/final_dataset/{filename}.csv')
+        file_dict['users'] = file_dict['users_test']
+        file_dict['reviews'] = file_dict['reviews_test']
+        file_dict['good_reviews'] = file_dict['reviews'][file_dict['reviews']['rating'] > 3]
+        file_dict['books'] = pd.read_csv('Data/books.csv')
+        #################################################################################
+        # GENRE MODEL; DESCRIPTION MODEL; TITLE MODEL; BOOK STATS CLUSTER MODEL
+        #################################################################################
+        clusterbooks = pd.DataFrame(
+            np.load('Data/Recommended Storage/cluster_books.npy', allow_pickle=True),
+            columns=['target_book', 'recco_book_id', 'similarity_score']).astype(float)  # wasn't saved as float
+        genrebooks = pd.DataFrame(
+            np.load('Data/Recommended Storage/genres_books.npy', allow_pickle=True),
+            columns=['target_book', 'recco_book_id', 'similarity_score'])
+        descbooks = pd.DataFrame(
+            np.load('Data/Recommended Storage/description_books.npy', allow_pickle=True),
+            columns=['target_book', 'recco_book_id', 'similarity_score'])
+        revbooks = pd.DataFrame(
+            np.load('Data/Recommended Storage/reviews_books_new.npy', allow_pickle=True),
+            columns=['target_book', 'recco_book_id', 'similarity_score'])
+        def optimized_converter(simbooks, user_id_list, name, prog_bar_description):
+            user_ratings_list = pd.DataFrame(columns=['user_id', 'recco_book_id', 'similarity_score'])
+            for curr_user_id in tqdm(user_id_list, desc=prog_bar_description):
+                curr_user_books = pd.Series(target_users_and_books[curr_user_id])
+                relevant_simbooks = simbooks[simbooks['target_book'].isin(curr_user_books)]
+                summed_scores = relevant_simbooks.groupby('recco_book_id')['similarity_score'].sum().reset_index()
+                summed_scores['user_id'] = curr_user_id
+                if not curr_user_books.empty:
+                    summed_scores = summed_scores[~summed_scores['recco_book_id'].isin(curr_user_books)]
+                    # TODO: Think about how to adjust this for small number of books
+                    summed_scores['similarity_score'] /= len(curr_user_books)
+                top_30 = summed_scores.nlargest(30, 'similarity_score')
+                user_ratings_list = pd.concat([user_ratings_list, top_30], ignore_index=True)
+            return user_ratings_list.rename(columns={'recco_book_id': 'book_id', 'similarity_score': name})
+        genre_users = optimized_converter(genrebooks, target_user_list, 'gen_score', "Generating recs (genre)")
+        cluster_users = optimized_converter(clusterbooks, target_user_list, 'clus_score',
+                                            "Generating recs (book stats cluster)")
+        description_users = optimized_converter(descbooks, target_user_list, 'desc_score',
+                                                "Generating recs (description)")
+        reviews_users = optimized_converter(revbooks, target_user_list, 'rev_score', "Generating recs (reviews)")
+        #################################################################################
+        # USER SIMILARITY CLUSTERING MODEL
+        #################################################################################
+        def jaccard_similarity_pandas(target_user, reviews_sub, n):
+            target_user_books = target_users_and_books[target_user]
+            relevant_reviews = reviews_sub[reviews_sub['book_id'].isin(target_user_books)]
+            intersections = relevant_reviews.groupby('user_id').size()
+            # all_books = pd.concat(
+            #     [df[df['user_id'] == target_user]['book_id'], reviews_sub['book_id']]).drop_duplicates()
+            user_book_counts = reviews_sub.groupby('user_id')['book_id'].nunique()
+            unions = len(target_user_books) + user_book_counts - intersections
+            jaccard_index = intersections / unions
+            top_n_users = jaccard_index.nlargest(n)
+            return top_n_users.reset_index().values.tolist()
+        def recommend_books(target_user_id, reviews_sub, num_books):
+            # df = reviews_sub[(reviews_sub['rating'].isin([4, 5]))]
+            top_n_similar_users = jaccard_similarity_pandas(target_user_id, reviews_sub, n=20)
+            target_user_books = target_users_and_books[target_user_id]
+            similar_users_reviews = reviews_sub[reviews_sub['user_id'].isin([user[0] for user in top_n_similar_users])]
+            recommended_books = defaultdict(float)
+            for curr_user_id, similarity_score in top_n_similar_users:
+                user_reviews = similar_users_reviews[similar_users_reviews['user_id'] == curr_user_id]
+                for _, row in user_reviews.iterrows():
+                    if row['book_id'] not in target_user_books:
+                        recommended_books[row['book_id']] += similarity_score
+            # Return top recommended books sorted by score
+            sorted_recommended_books = sorted(recommended_books.items(), key=lambda x: x[1], reverse=True)
+            return [(target_user_id, book_id, book_score) for book_id, book_score in
+                    sorted_recommended_books[:num_books]]
+        all_recommendations = []
+        for each_user_id in tqdm(target_user_list, desc="Generating recs (users)"):
+            recommendations = recommend_books(each_user_id, file_dict['reviews_sub'], 30)
+            all_recommendations.extend(recommendations)
+        user_users = pd.DataFrame(all_recommendations, columns=['user_id', 'book_id', 'user_score'])
+        user_users.head()
+        #################################################################################
+        # TITLE SIMILARITY MODEL
+        #################################################################################
+        store = FAISS.load_local(
+            "Data/faiss_store",
+            HuggingFaceBgeEmbeddings(
+                model_kwargs={"device": "cpu"},
+                encode_kwargs={"normalize_embeddings": True}
+            ),
+            allow_dangerous_deserialization=True
+        )
+        title_output = []
+        for user_id, books in tqdm(target_users_and_books.items(), desc="Generating recs (title)"):
+            user_book_id = target_users_and_books[user_id]
+            user_books = file_dict['books'][(file_dict['books']['book_id'].isin(user_book_id))]
+            titles = '\n'.join(user_books['title_without_series'])  # Using titles without series for queries
+            results = store.similarity_search_with_score(titles, k=80)
+            for result, score in results:
+                if result.metadata.get('book_id') not in user_books:
+                    title_output.append([user_id, result.metadata.get('book_id'), 1 - score])
+        # Save formatted
+        title_users = pd.DataFrame(title_output, columns=['user_id', 'book_id', 'tit_score'])
+        #################################################################################
+        # COMBINING MODEL OUTPUTS
+        #################################################################################
+        self.temp_store = {
+            'cluster': cluster_users,
+            'genre': genre_users,
+            'desc': description_users,
+            'reviews': reviews_users,
+            'users': user_users,
+            'title': title_users,
+        }
+        combined_df = pd.merge(cluster_users, genre_users, on=['user_id', 'book_id'], how='outer')
+        combined_df = pd.merge(combined_df, description_users, on=['user_id', 'book_id'], how='outer')
+        combined_df = pd.merge(combined_df, reviews_users, on=['user_id', 'book_id'], how='outer')
+        combined_df = pd.merge(combined_df, user_users, on=['user_id', 'book_id'], how='outer')
+        combined_df = pd.merge(combined_df, title_users, on=['user_id', 'book_id'], how='outer')
+        combined_df.fillna(0, inplace=True)
+        combined_df['book_id'] = combined_df['book_id'].astype(int)
+        combined_df['tit_score'] = combined_df['tit_score'].astype(float)
+        reviews_df = file_dict['reviews'][file_dict['reviews']['rating'].isin([1, 2, 3, 4, 5])]
+        reviews_filtered = reviews_df[['user_id', 'book_id', 'rating']]
+        combined_df = combined_df.merge(reviews_filtered, on=['user_id', 'book_id'], how='left')
+        combined_df.rename(columns={'rating': 'target'}, inplace=True)
+        combined_df['binary'] = np.where(combined_df['target'] >= 4, 1, 0)
+        # remove books which are not recommended at all
+        combined_df = combined_df[
+            (combined_df[['clus_score', 'gen_score', 'desc_score', 'rev_score', 'user_score', 'tit_score']] != 0).any(
+                axis=1)]
+        with open("Data/final_model.pkl", 'rb') as file:
+            self.pipeline = pickle.load(file)
+        X_test = combined_df.drop(columns=['user_id', 'book_id', 'target', 'binary'])
+        predictions_df = combined_df[
+            ['user_id', 'book_id', 'clus_score', 'gen_score', 'desc_score', 'rev_score', 'user_score',
+             'tit_score', 'target', 'binary']].copy()
+        predictions_df['final_score'] = self.pipeline.predict_proba(X_test).T[1]
+        predictions_df['would_recommend'] = predictions_df['final_score'] >= 0.45  # peak f2 score at this threshold
+        predictions_df = predictions_df.sort_values(['user_id', 'final_score'], ascending=[True, False])
+        self.dataset = combined_df
+        self.predictions = predictions_df
+    def prepare_user_details(self):
+        users_list = self.dataset['user_id'].unique()
+        users_df = pd.read_csv("Data/final_dataset/users_test.csv")
+        books_df = pd.read_csv("Data/final_dataset/books_test.csv")
+        # filter to keep only relevant users
+        users_df = users_df[users_df['user_id'].isin(users_list)]
+        # merge to get book and review data
+        full_df = users_df.merge(books_df, on="user_id")
+        user_details = pd.DataFrame()
+        top_books_per_user = full_df.groupby("user_id").apply(
+            lambda x: x.sort_values('rating').nlargest(n=5, columns='rating')['title_without_series'].tolist())
+        user_details['top_books'] = top_books_per_user
+        self.user_details = user_details
+    def get_user_predictions(self, chosen_user):
+        logging.info(f"Generating predictions for user: {chosen_user}")
+        user_predictions = self.predictions[self.predictions['user_id'] == chosen_user]
+        user_predictions = user_predictions.dropna(subset=['target'])
+        if len(user_predictions) == 0:
+            logging.info(f"No predictions hit! Exiting early")
+            return None
+        # transform model scores using the pipeline (scaler + logistic regression coefficients)
+        # specifically, apply scaler then apply linear layer of logistic regression
+        model_score_cols = [c for c in user_predictions.columns if c.endswith('_score') and c != 'final_score']
+        scaled_model_scores = self.pipeline['scaler'].transform(user_predictions[model_score_cols])
+        multed_model_scores = scaled_model_scores * self.pipeline['classifier'].coef_[0]
+        final_model_scores = pd.DataFrame(multed_model_scores, columns=model_score_cols)
+        final_model_scores['intercept'] = self.pipeline['classifier'].intercept_[0]
+        columns = ['book_id', 'target', 'final_score', 'would_recommend']
+        predictions_and_score = pd.concat(
+            [user_predictions[columns].reset_index(drop=True), final_model_scores],
+            axis=1
+        )
+        return predictions_and_score.merge(self.all_books[['book_id', 'title_without_series']], on='book_id')
+    def cache_results(self):
+        with open(self.cache_path, 'wb+') as f:
+            to_pickle = dict()
+            to_pickle['dataset'] = self.dataset
+            to_pickle['predictions'] = self.predictions
+            to_pickle['temp_store'] = self.temp_store
+            to_pickle['pipeline'] = self.pipeline
+            to_pickle['chosen_books'] = self.chosen_books_per_user
+            # to_pickle['user_details'] = self.user_details
+            pickle.dump(to_pickle, f)
+        self.is_loaded = True
+    def does_cache_exist(self):
+        return os.path.exists(self.cache_path)
+    def retrieve_cache(self):
+        with open(self.cache_path, 'rb') as f:
+            unpickled = pickle.load(f)
+            for key, val in unpickled.items():
+                exec(f"self.{key} = val")
+        self.is_loaded = True

templates/index.html ADDED Viewed

	@@ -0,0 +1,17 @@

+<!DOCTYPE html>
+<html>
+<head>
+    <title>Book Recommender</title>
+    <!-- Bootstrap CSS -->
+    <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-QWTKZyjpPEjISv5WaRU9OFeRpok6YctnYmDr5pNlyT2bRjXh0JMhjY6hW+ALEwIH" crossorigin="anonymous">
+</head>
+<body>
+    <div class="container">
+        <h1>Book Recommendation System</h1>
+        <p>Welcome! You can access the options below:</p>
+        <a href="/test_users" class="btn btn-primary">Go to Test User Set</a>
+    </div>
+    <!-- Bootstrap JS (Optional) -->
+    <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js" integrity="sha384-YvpcrYf0tY3lHB60NNkmXc5s9fDVZLESaAA55NDzOxhy9GkcIdslK1eN7N6jIeHz" crossorigin="anonymous"></script>
+</body>
+</html>

templates/recommended_books.html ADDED Viewed

	@@ -0,0 +1,88 @@

+<!DOCTYPE html>
+<html>
+<head>
+    <title>Book Recommender</title>
+    <!-- The loading of KaTeX is deferred to speed up page rendering -->
+    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/dist/katex.min.css" integrity="sha384-wcIxkf4k558AjM3Yz3BBFQUbk/zgIYC2R0QpeeYb+TwlBVMrlgLqwRjRtGZiK7ww" crossorigin="anonymous">
+    <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/katex.min.js" integrity="sha384-hIoBPJpTUs74ddyc4bFZSM1TVlQDA60VBbJS0oA934VSz82sBx1X7kSx2ATBDIyd" crossorigin="anonymous"></script>
+    <!-- Bootstrap CSS -->
+    <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-QWTKZyjpPEjISv5WaRU9OFeRpok6YctnYmDr5pNlyT2bRjXh0JMhjY6hW+ALEwIH" crossorigin="anonymous">
+</head>
+<body>
+    <div class="container">
+        <h1>User books breakdown</h1>
+        <h2>What is on this page?</h2>
+        <p>
+            The books below are books that have been recommended by at least one sub-model in our ensemble model for
+            the current user.
+            They are sorted by our ensemble model's confidence score. If our model is confident that the user will
+            enjoy the book, then the book is sorted towards the top.
+            <br/><br/>
+            These books have also already been read by this user. We can compare the user's actual rating to the
+            confidence score of our ensemble model to see how accurate our ensemble model's predictions are.
+            <br/><br/>
+            Click on the Explain button in blue to look at a breakdown of each sub-model's scores and how they
+            contributed to the ensemble model's final confidence score, and for a brief explanation of why this book
+            was recommended to the user.
+            <br/><br/>
+            <a href="/test_users">Click here to return to the list of users.</a>
+        </p>
+        <h2>Current user ID: {{ chosen_user }}</h2>
+        <table class="table table-striped" style="margin-top: 3em">
+            <thead class="thead-dark">
+                <tr>
+                    <th>Title</th>
+                    <th>User's actual rating</th>
+                    <th>Prediction confidence score</th>
+                    <th>Recommended?</th>
+                    <th>Show explanation</th>
+                </tr>
+            </thead>
+            <tbody>
+                {% for book_id, book_data in recommended_books.iterrows() %}
+                <tr>
+                    <td>{{ book_data['title_without_series'] }}</td>
+                    <td>{{ book_data['target'] }}</td>
+                    <td>{{ book_data['final_score'] }}</td>
+                    <td>{{ book_data['is_recommended'] }}</td>
+                    <td><a href="/test_users/{{ chosen_user }}/{{ book_id }}" class="btn btn-primary">Explain</a></td>
+                </tr>
+                {% endfor %}
+            </tbody>
+        </table>
+        {{ fig|safe }}
+        <div style="display: flex; flex-direction: column; justify-content: space-around; align-items: center">
+            <div id="ScoreSum" style="font-size: 1.5em"></div>
+            <div id="Formula" style="font-size: 1.5em"></div>
+            <p style="text-align: center; width: 50%; margin-top: 1em">
+                {{ explanation_str }}
+            </p>
+        </div>
+    </div>
+    <script>
+        scoreSumDiv = document.getElementById("ScoreSum");
+        formulaDiv = document.getElementById("Formula");
+        // disgusting string-concatenating-Jinja-templating monster
+        if ({{ render_explanation }}) {
+            const score_sum = {{ score_sum }};
+            katex.render("\\text{Sum of sub-model scores}=" + score_sum, scoreSumDiv);
+            const start = "\\text{Confidence Score} = \\frac{1}{1+e^{-(";
+            const end = ")}} = ";
+            const threshold = "0.45";
+            const final_score = {{ final_score }};
+            let conclusion;
+            if (parseFloat(final_score) >= parseFloat(threshold)) {
+                conclusion = "\\ge " + threshold + " \\text{ (Recommended)}";
+            } else {
+                conclusion = "\\lt " + threshold + " \\text{ (Not recommended)}";
+            }
+            const render_str = start + score_sum + end + final_score + conclusion;
+            katex.render(render_str, formulaDiv);
+        }
+    </script>
+    <!-- Bootstrap JS (Optional) -->
+    <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js" integrity="sha384-YvpcrYf0tY3lHB60NNkmXc5s9fDVZLESaAA55NDzOxhy9GkcIdslK1eN7N6jIeHz" crossorigin="anonymous"></script>
+</body>
+</html>

templates/test_users.html ADDED Viewed

	@@ -0,0 +1,35 @@

+<!DOCTYPE html>
+<html>
+<head>
+    <title>Book Recommender</title>
+    <!-- Bootstrap CSS -->
+    <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-QWTKZyjpPEjISv5WaRU9OFeRpok6YctnYmDr5pNlyT2bRjXh0JMhjY6hW+ALEwIH" crossorigin="anonymous">
+</head>
+<body>
+    <div class="container">
+        <h1>Select a User</h1>
+        <p>
+            We generated recommendations for each of the 20 test users below. In each card, you can see that user's
+            favourite titles (top 5, sorted by user's rating) to give an idea of the user's taste profile. Click
+            on the button to see what we have recommended for this user.
+        </p>
+        <div class="row">
+            {% for user_id, top_books in user_details.items() %}
+            <div class="col-md-4 mb-4">
+                <div class="card">
+                    <div class="card-body">
+                        <h5 class="card-title">User</h5>
+                        {% for title in top_books %}
+                        <h6 class="card-text">{{ title }}</h6>
+                        {% endfor %}
+                        <a href="/test_users/{{ user_id }}" class="btn btn-primary">Select</a>
+                    </div>
+                </div>
+            </div>
+            {% endfor %}
+        </div>
+    </div>
+    <!-- Bootstrap JS (Optional) -->
+    <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js" integrity="sha384-YvpcrYf0tY3lHB60NNkmXc5s9fDVZLESaAA55NDzOxhy9GkcIdslK1eN7N6jIeHz" crossorigin="anonymous"></script>
+</body>
+</html>