conlan commited on
Commit
7957649
·
1 Parent(s): 8189b4d

Update files from private repo

Browse files
Files changed (5) hide show
  1. ai_services.py +32 -0
  2. app.py +183 -25
  3. bookdb.py +94 -25
  4. requirements.txt +10 -1
  5. thumbnail.jpg +0 -0
ai_services.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI
2
+
3
+ from dotenv import load_dotenv
4
+ import os
5
+ import streamlit as st
6
+
7
+ if os.getenv("OPENAI_API_KEY") is None:
8
+ load_dotenv()
9
+
10
+ openAIclient = OpenAI()
11
+
12
+ def get_suggestion_text(closestReadBookData, targetBookData):
13
+ closestReadTitle = closestReadBookData['title']
14
+ closestReadAuthor = closestReadBookData['authors']
15
+
16
+ targetBookTitle = targetBookData['title']
17
+ targetBookAuthor = targetBookData['authors']
18
+
19
+ messageContent = "In 2 sentences max, please cheerfully explain why I might enjoy " + targetBookTitle + " by " + targetBookAuthor + " if I liked " + closestReadTitle + " by " + closestReadAuthor + "."
20
+
21
+ response = openAIclient.chat.completions.create(
22
+ model="gpt-4o-mini",
23
+ messages=[
24
+ {"role": "system", "content": "You are an experienced librarian."},
25
+ {"role": "user", "content": messageContent}
26
+ ]
27
+ )
28
+
29
+ chatCompletionMessage = response.choices[0].message
30
+
31
+ # return f"Because you liked {closestReadTitle} by {closestReadAuthor}, we think you might like {targetBookTitle} by {targetBookAuthor}."
32
+ return chatCompletionMessage.content
app.py CHANGED
@@ -1,10 +1,26 @@
1
  import pickle
2
  import streamlit as st
 
 
 
 
 
3
  import numpy as np
4
  import bookdb
5
 
 
 
6
  st.header("My Book Buddy 🐛")
7
 
 
 
 
 
 
 
 
 
 
8
  if "upvoted_book_ids" not in st.session_state:
9
  st.session_state["upvoted_book_ids"] = []
10
 
@@ -28,12 +44,21 @@ def update_display(displayData):
28
  st.session_state["recommendedBooksData"] = displayData["recommendedBooksData"]
29
  st.session_state["topCorrelatedReadersData"] = displayData["topCorrelatedReadersData"]
30
 
 
 
31
  def on_reset_votes():
32
  st.session_state["upvoted_book_ids"] = []
33
  st.session_state["downvoted_book_ids"] = []
34
  st.session_state["numSimilarUsers"] = 0
35
  st.session_state["recommendedBooksData"] = None
36
  st.session_state["topCorrelatedReadersData"] = None
 
 
 
 
 
 
 
37
 
38
  def on_submit_votes():
39
  upvoteBookTitles = st.session_state["multiselect_upvote"]
@@ -68,36 +93,112 @@ def on_submit_votes():
68
 
69
  update_display(bookdb.update_user_ratings(upvotedBookIds, downvotedBookIds))
70
 
71
- with st.form(key='upvote_form'):
72
- col1, col2 = st.columns(2)
73
 
74
- allBookTitles = bookdb.get_all_book_titles()
 
 
 
75
 
76
- myRatedBookTitles = bookdb.get_book_titles(st.session_state["upvoted_book_ids"] + st.session_state["downvoted_book_ids"])
 
77
 
78
- # remove myRatedBookTitles from allBookTitles
79
- remainingBookTitles = [x for x in allBookTitles if x not in myRatedBookTitles]
80
 
81
- col1.multiselect(
82
- 'Upvote Books 👍',
83
- remainingBookTitles,
84
- key='multiselect_upvote'
85
- )
86
- col2.multiselect(
87
- 'Downvote Books 👎',
88
- remainingBookTitles,
89
- key='multiselect_downvote'
90
- )
91
- st.form_submit_button(label='Submit', type="primary", on_click=on_submit_votes)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
  if st.session_state["recommendedBooksData"] is not None:
94
- df = st.session_state["recommendedBooksData"]
95
 
96
  st.subheader("Recommendations")
97
 
98
- st.dataframe(df, hide_index=True, use_container_width=True)
99
 
100
- st.button('Reset All Ratings', type="secondary", on_click=on_reset_votes)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
  st.subheader("Your Ratings")
103
 
@@ -119,7 +220,7 @@ if st.session_state["recommendedBooksData"] is not None:
119
  for bookId in downvotedBookIds:
120
  displayCol2.markdown(f' - {bookId}-{bookdb.get_book_title(bookId)}')
121
 
122
- st.divider()
123
 
124
  # st.write(f"Similar User Min Percent Shared Books = {round(bookdb.SIMILAR_USER_MIN_PERCENT_SHARED_BOOKS * 100)}%")
125
  # st.write(f"Similar User Min Correlation = {bookdb.SIMILAR_USER_MIN_CORRELATION}")
@@ -127,9 +228,66 @@ if st.session_state["recommendedBooksData"] is not None:
127
  # if "numSimilarUsers" in st.session_state:
128
  # st.write(f"{st.session_state['numSimilarUsers']} similar users")
129
 
130
- if st.session_state["topCorrelatedReadersData"] is not None:
131
- df = st.session_state["topCorrelatedReadersData"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
 
133
- st.subheader("Top Correlated Readers")
134
 
135
- st.dataframe(df, use_container_width=True)
 
 
 
 
1
  import pickle
2
  import streamlit as st
3
+
4
+ st.set_page_config(layout="wide")
5
+
6
+ from streamlit_modal import Modal
7
+
8
  import numpy as np
9
  import bookdb
10
 
11
+ import ai_services
12
+
13
  st.header("My Book Buddy 🐛")
14
 
15
+ if "clicked_book" not in st.session_state:
16
+ st.session_state["clicked_book"] = {}
17
+
18
+ if "closestReadBook" not in st.session_state:
19
+ st.session_state["closestReadBook"] = None
20
+
21
+ if "suggestedBookText" not in st.session_state:
22
+ st.session_state["suggestedBookText"] = ""
23
+
24
  if "upvoted_book_ids" not in st.session_state:
25
  st.session_state["upvoted_book_ids"] = []
26
 
 
44
  st.session_state["recommendedBooksData"] = displayData["recommendedBooksData"]
45
  st.session_state["topCorrelatedReadersData"] = displayData["topCorrelatedReadersData"]
46
 
47
+ st.session_state["bookByRatingData"] = displayData["bookByRatingData"]
48
+
49
  def on_reset_votes():
50
  st.session_state["upvoted_book_ids"] = []
51
  st.session_state["downvoted_book_ids"] = []
52
  st.session_state["numSimilarUsers"] = 0
53
  st.session_state["recommendedBooksData"] = None
54
  st.session_state["topCorrelatedReadersData"] = None
55
+ st.session_state["bookByRatingData"] = None
56
+
57
+ def on_test_submit_votes():
58
+ upvotedBookIds = [104, 103, 102, 110, 113, 124, 129, 135, 141, 142, 155, 161, 165, 176, 181, 974, 4443, 1496, 1003, 974, 2600] # TODO REMOVE
59
+ downvotedBookIds = [126, 179, 183, 184, 187, 9076, 960, 5895, 777, 6902, 2084, 584] # TODO REMOVE
60
+
61
+ update_display(bookdb.update_user_ratings(upvotedBookIds, downvotedBookIds))
62
 
63
  def on_submit_votes():
64
  upvoteBookTitles = st.session_state["multiselect_upvote"]
 
93
 
94
  update_display(bookdb.update_user_ratings(upvotedBookIds, downvotedBookIds))
95
 
96
+ clickedBook = st.session_state["clicked_book"] if "clicked_book" in st.session_state else {}
 
97
 
98
+ modal = Modal(
99
+ f"📖 {clickedBook['title'] if 'title' in clickedBook else ''}",
100
+ key="book-modal"
101
+ )
102
 
103
+ def on_update_recommendations():
104
+ # iterate through the number of recommendations and check the selected radio button
105
 
106
+ recommendations = st.session_state["recommendedBooksData"]
 
107
 
108
+ bookIdsToUpvote = []
109
+ bookIdsToDownvote = []
110
+
111
+ for counter in range(len(recommendations)):
112
+ radioKey = f"update_recommendation_{counter}"
113
+
114
+ radioValue = st.session_state[radioKey]
115
+
116
+ if radioValue == "👍":
117
+ bookIdsToUpvote.append(recommendations[counter]["book_id"])
118
+ elif radioValue == "👎":
119
+ bookIdsToDownvote.append(recommendations[counter]["book_id"])
120
+
121
+ # reset the radio button
122
+ st.session_state[radioKey] = None
123
+
124
+ if len(bookIdsToUpvote) == 0 and len(bookIdsToDownvote) == 0:
125
+ st.warning("Please select at least one book to upvote or downvote")
126
+ return
127
+
128
+ # append booksToUpvote to upvotedBookIds
129
+ upvotedBookIds = st.session_state["upvoted_book_ids"]
130
+ downvotedBookIds = st.session_state["downvoted_book_ids"]
131
+
132
+ upvotedBookIds.extend(bookIdsToUpvote)
133
+ downvotedBookIds.extend(bookIdsToDownvote)
134
+
135
+ # remove any upvoted books from downvotedBookIds if they are in there
136
+ downvotedBookIds = [x for x in downvotedBookIds if x not in upvotedBookIds]
137
+
138
+ update_display(bookdb.update_user_ratings(upvotedBookIds, downvotedBookIds))
139
 
140
  if st.session_state["recommendedBooksData"] is not None:
141
+ recommendations = st.session_state["recommendedBooksData"]
142
 
143
  st.subheader("Recommendations")
144
 
145
+ recommendationsCol1, recommendationsCol2, recommendationsCol3 = st.columns(3)
146
 
147
+ for counter in range(len(recommendations)):
148
+ bookData = recommendations[counter]
149
+
150
+ bookMetadata = bookdb.get_book_metadata_by_id(bookData["book_id"])
151
+
152
+ # print(bookMetadata)
153
+
154
+ if counter % 3 == 0:
155
+ container = recommendationsCol1.container(border=True)
156
+ elif counter % 3 == 1:
157
+ container = recommendationsCol2.container(border=True)
158
+ else:
159
+ container = recommendationsCol3.container(border=True)
160
+
161
+ bookAuthor = bookData["authors"]
162
+ bookTitle = bookData["title"]
163
+ bookPubYear = bookData["original_publication_year"]
164
+
165
+ containerCol1, containerCol2, containerCol3 = container.columns([5, 10, 2])
166
+
167
+ containerCol1.html(f"<img width='100%' src='{bookMetadata['thumbnail']}'>")
168
+
169
+ textContainer = containerCol2.container()
170
+
171
+ titleClicked = textContainer.button(f"{bookTitle}", use_container_width=True)
172
+
173
+ if titleClicked:
174
+ bookByRatingData = st.session_state["bookByRatingData"]
175
+ upvotedBookIds = st.session_state["upvoted_book_ids"]
176
+ targetBookId = bookData["book_id"]
177
+
178
+ closestReadBookData = bookdb.find_closest_read_title(bookByRatingData, upvotedBookIds, targetBookId)
179
+
180
+ targetBookData = bookdb.get_book_data_by_id(targetBookId)
181
+
182
+ st.session_state["closestReadBook"] = closestReadBookData
183
+ st.session_state["clicked_book"] = bookData
184
+
185
+ st.session_state["suggestedBookText"] = ai_services.get_suggestion_text(closestReadBookData, targetBookData)
186
+
187
+ modal.open()
188
+
189
+ textContainer.markdown(f"*{bookAuthor}*")
190
+
191
+ textContainer.markdown(f"Published: {int(bookPubYear)}")
192
+
193
+ # containerCol2.markdown(textHTMLcontent, unsafe_allow_html=True)
194
+ # containerCol2.markdown(f"[{bookTitle}](https://streamlit.io)\n\n*{bookAuthor}*\n\nPublished: {int(bookPubYear)}")
195
+ # radioContainer = containerCol3.container()
196
+
197
+ containerCol3.radio(label="Upvote/Downvote", label_visibility="hidden", options=["👍", "👎"], key=f"update_recommendation_{counter}", index=None)
198
+
199
+ # radioContainer.button('Why This Book?', type="secondary", key=f"submit_{counter}")
200
+
201
+ st.button('Update Recommendations', type="primary", on_click=on_update_recommendations)
202
 
203
  st.subheader("Your Ratings")
204
 
 
220
  for bookId in downvotedBookIds:
221
  displayCol2.markdown(f' - {bookId}-{bookdb.get_book_title(bookId)}')
222
 
223
+ st.button('Reset All Ratings', type="secondary", on_click=on_reset_votes)
224
 
225
  # st.write(f"Similar User Min Percent Shared Books = {round(bookdb.SIMILAR_USER_MIN_PERCENT_SHARED_BOOKS * 100)}%")
226
  # st.write(f"Similar User Min Correlation = {bookdb.SIMILAR_USER_MIN_CORRELATION}")
 
228
  # if "numSimilarUsers" in st.session_state:
229
  # st.write(f"{st.session_state['numSimilarUsers']} similar users")
230
 
231
+ with st.form(key='upvote_form'):
232
+ col1, col2 = st.columns(2)
233
+
234
+ allBookTitles = bookdb.get_all_book_titles()
235
+
236
+ myRatedBookTitles = bookdb.get_book_titles(st.session_state["upvoted_book_ids"] + st.session_state["downvoted_book_ids"])
237
+
238
+ # remove myRatedBookTitles from allBookTitles
239
+ remainingBookTitles = [x for x in allBookTitles if x not in myRatedBookTitles]
240
+
241
+ col1.multiselect(
242
+ 'Upvote Books 👍',
243
+ remainingBookTitles,
244
+ key='multiselect_upvote'
245
+ )
246
+ col2.multiselect(
247
+ 'Downvote Books 👎',
248
+ remainingBookTitles,
249
+ key='multiselect_downvote'
250
+ )
251
+ st.form_submit_button(label='Submit', type="primary", on_click=on_submit_votes)
252
+
253
+ st.form_submit_button(label='Test', type="secondary", on_click=on_test_submit_votes)
254
+
255
+ if st.session_state["topCorrelatedReadersData"] is not None:
256
+ df = st.session_state["topCorrelatedReadersData"]
257
+
258
+ st.subheader("Top Correlated Readers")
259
+
260
+ st.dataframe(df, use_container_width=True)
261
+
262
+ if modal.is_open():
263
+ with modal.container():
264
+ clickedBook = st.session_state["clicked_book"]
265
+
266
+ clickedBookMetadata = bookdb.get_book_metadata_by_id(clickedBook["book_id"])
267
+
268
+ clickedBookDescription = clickedBookMetadata["description"]
269
+
270
+ st.html(f"{clickedBookDescription}")
271
+
272
+ aiSuggestContainer = st.container(border=True)
273
+
274
+ closestReadBook = st.session_state["closestReadBook"]
275
+
276
+ suggestedBookText = st.session_state["suggestedBookText"]
277
+
278
+ aiSuggestContainer.html(f"<p style=\"color:#DA70D6;\">💫 Because you liked <i><b>{closestReadBook['title']}</b></i> by <b>{closestReadBook['authors']}</b>...</p>")
279
+
280
+ aiSuggestContainer.html(f"<p style=\"color:#DA70D6;\">{suggestedBookText}</p>")
281
+
282
+ buttonCol1, buttonCol2, buttonCol3, buttonCol4 = st.columns(4)
283
+
284
+ clickedBookTitle = clickedBook["title"]
285
+ clickedBookAuthor = clickedBook["authors"]
286
+ clickedBookISBN = clickedBook["isbn"]
287
 
288
+ outboundLinkSuffix = f"{clickedBookTitle} {clickedBookAuthor}"
289
 
290
+ buttonCol1.link_button('🛒 Bookshop', f'https://bookshop.org/search?keywords={outboundLinkSuffix}')
291
+ buttonCol2.link_button('📚 Biblio', f'https://www.biblio.com/search.php?stage=1&result_type=works&keyisbn={outboundLinkSuffix}')
292
+ buttonCol3.link_button('🎧 Libro', f'https://libro.fm/search?utf8=%E2%9C%93&q={outboundLinkSuffix}')
293
+ buttonCol4.link_button('💬 Hardcover', f'https://hardcover.app/search?q={outboundLinkSuffix}')
bookdb.py CHANGED
@@ -1,17 +1,48 @@
1
  import pandas as pd
 
 
2
 
3
  SIMILAR_USER_MIN_PERCENT_SHARED_BOOKS = 0.30
4
  SIMILAR_USER_MIN_CORRELATION = 0.25
5
 
6
- books = pd.read_csv("./goodreads/books.csv",
 
 
 
 
 
 
7
  usecols=["book_id",
8
- # "original_publication_year",
9
  # "average_rating",
10
- "title",
11
- "average_rating"])
12
- books['book_id'] = range(1, len(books) + 1)
 
 
 
 
 
 
13
 
14
- baseRatings = pd.read_csv("./goodreads/ratings.csv")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  def get_book_ids_by_title(book_titles):
17
  return books[books["title"].isin(book_titles)]["book_id"].values
@@ -19,19 +50,36 @@ def get_book_ids_by_title(book_titles):
19
  def get_all_book_titles():
20
  return books["title"].values
21
 
 
 
 
22
  def get_book_title(book_id):
23
  return books[books["book_id"] == book_id]["title"].values[0]
24
 
25
  def get_book_titles(book_ids):
26
  return books[books["book_id"].isin(book_ids)]["title"].values
27
 
28
- def update_user_ratings(upvotedBookIds, downvotedBookIds):
29
- # upvotedBookIds = [104, 103, 102, 110, 113, 124, 129, 135, 141, 142, 155, 161, 165, 176, 181, 974, 4443, 1496, 1003, 974, 2600] # TODO REMOVE
30
- # downvotedBookIds = [126, 179, 183, 184, 187, 9076, 960, 5895, 777, 6902, 2084, 584] # TODO REMOVE
31
 
32
- # get the max user id in baseRatings
33
- newUserId = baseRatings['user_id'].max() + 1
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  RATING_FOR_UPVOTE = 5
36
  RATING_FOR_DOWNVOTE = 1
37
 
@@ -42,12 +90,12 @@ def update_user_ratings(upvotedBookIds, downvotedBookIds):
42
  for bookId in upvotedBookIds:
43
  appendBookIds.append(bookId)
44
  appendBookRatings.append(RATING_FOR_UPVOTE)
45
- appendUserIds.append(newUserId)
46
 
47
  for bookId in downvotedBookIds:
48
  appendBookIds.append(bookId)
49
  appendBookRatings.append(RATING_FOR_DOWNVOTE)
50
- appendUserIds.append(newUserId)
51
 
52
  newUserData = {
53
  'book_id': appendBookIds,
@@ -59,11 +107,11 @@ def update_user_ratings(upvotedBookIds, downvotedBookIds):
59
 
60
  ratings = pd.concat([baseRatings, newRows], ignore_index=True)
61
 
62
- df = pd.merge(books, ratings, on="book_id", how="inner")
63
 
64
- user_df = df.groupby(["user_id","title"])["rating"].mean().unstack()
65
 
66
- targetUserDf = user_df[user_df.index == newUserId]
67
 
68
  targetBooksRead = targetUserDf.dropna(axis=1).columns.tolist()
69
 
@@ -78,9 +126,11 @@ def update_user_ratings(upvotedBookIds, downvotedBookIds):
78
  print(userBookCount)
79
 
80
  # from there get users who've read at least X percent of the main user
81
- minBookCount = book_read_df.shape[1] * SIMILAR_USER_MIN_PERCENT_SHARED_BOOKS
 
 
82
 
83
- # print(minBookCount)
84
 
85
  usersSameBooks = userBookCount[userBookCount > minBookCount].index
86
 
@@ -94,15 +144,25 @@ def update_user_ratings(upvotedBookIds, downvotedBookIds):
94
 
95
  corr_df = filted_df.T.corr().unstack()
96
 
97
- top_readers = pd.DataFrame(corr_df[newUserId][corr_df[newUserId] > SIMILAR_USER_MIN_CORRELATION], columns=["corr"])
98
 
 
 
 
 
 
 
 
 
 
 
99
  print(top_readers)
100
 
101
- if (newUserId in top_readers.index):
102
- top_readers = top_readers.drop(newUserId)
103
 
104
  # get the ratings for the top readers
105
- top_readers_ratings = pd.merge(top_readers, df[["user_id", "book_id", "rating"]], how='inner', on="user_id")
106
 
107
  # weight their ratings by how correlated they are with the user
108
  top_readers_ratings['weighted_rating'] = top_readers_ratings['corr'] * top_readers_ratings['rating']
@@ -118,15 +178,24 @@ def update_user_ratings(upvotedBookIds, downvotedBookIds):
118
  books_recommend = recommendation_df[recommendation_df["weighted_rating"] > 1].sort_values(by="weighted_rating", ascending=False).head(20)
119
 
120
  # get the recommended books (and sort by average_rating)
121
- recommendedBooks = books[books["book_id"].isin(books_recommend.index)].sort_values(by="average_rating", ascending=False)
 
 
 
 
122
  # drop book_id column
123
- recommendedBooks = recommendedBooks.drop(columns=["book_id"])
 
 
 
124
 
125
  return {
126
  "upvotedBookIds": upvotedBookIds,
127
  "downvotedBookIds": downvotedBookIds,
128
  "numSimilarUsers" : len(usersSameBooks),
129
- "recommendedBooksData": recommendedBooks,
 
 
130
  # sort by correlation
131
  "topCorrelatedReadersData" : top_readers.sort_values(by="corr", ascending=False)
132
  }
 
1
  import pandas as pd
2
+ import json
3
+ import streamlit as st
4
 
5
  SIMILAR_USER_MIN_PERCENT_SHARED_BOOKS = 0.30
6
  SIMILAR_USER_MIN_CORRELATION = 0.25
7
 
8
+ MAX_MIN_BOOK_COUNT_FOR_SIMILAR_USER = 10 # the maximum amount of books required for a user to be considered a similar user
9
+
10
+ DEFAULT_BOOK_COVER_URL = "https://m.media-amazon.com/images/I/81QPHl7zgbL._AC_UF1000,1000_QL80_.jpg"
11
+
12
+ @st.cache
13
+ def get_dataframes():
14
+ booksDf = pd.read_csv("./goodreads/books.csv",
15
  usecols=["book_id",
16
+ "original_publication_year",
17
  # "average_rating",
18
+ "isbn",
19
+ "authors",
20
+ "title",
21
+ "average_rating"])
22
+ booksDf['book_id'] = range(1, len(booksDf) + 1)
23
+
24
+ baseRatingsDf = pd.read_csv("./goodreads/ratings.csv")
25
+
26
+ bookMetadataJSON = json.load(open("./goodreads/book_metadata.json"))
27
 
28
+ return booksDf, baseRatingsDf, bookMetadataJSON
29
+
30
+ books, baseRatings, bookMetadata = get_dataframes()
31
+
32
+ targetUserId = baseRatings['user_id'].max() + 1
33
+
34
+ def get_book_metadata_by_id(book_id):
35
+ book_id = str(book_id)
36
+
37
+ data = {
38
+ "description" : "n/a",
39
+ "thumbnail" : DEFAULT_BOOK_COVER_URL
40
+ }
41
+
42
+ if book_id in bookMetadata:
43
+ data = bookMetadata[book_id]
44
+
45
+ return data
46
 
47
  def get_book_ids_by_title(book_titles):
48
  return books[books["title"].isin(book_titles)]["book_id"].values
 
50
  def get_all_book_titles():
51
  return books["title"].values
52
 
53
+ def get_book_data_by_id(book_id):
54
+ return books[books["book_id"] == book_id].to_dict(orient="records")[0]
55
+
56
  def get_book_title(book_id):
57
  return books[books["book_id"] == book_id]["title"].values[0]
58
 
59
  def get_book_titles(book_ids):
60
  return books[books["book_id"].isin(book_ids)]["title"].values
61
 
62
+ def find_closest_read_title(bookByRatingData, upvotedBookIds, targetBookId):
63
+ user_df = bookByRatingData.groupby(["user_id","book_id"])["rating"].mean().unstack()
 
64
 
65
+ # drop all columns except the upvotedBookIds and the targetBookId
66
+ book_read_df = user_df[upvotedBookIds + [targetBookId]]
67
 
68
+ # replace NaNs with 0
69
+ book_read_df = book_read_df.fillna(0)
70
+
71
+ # find the correlation between the targetBookId and the upvotedBookIds
72
+ corr_df = book_read_df.corr().unstack()
73
+
74
+ # find the closest book to the targetBookId that is NOT the targetBookId
75
+ closestBookId = corr_df[targetBookId][corr_df[targetBookId] < 1].idxmax()
76
+
77
+ # get the title of the closest book
78
+ closestReadBookData = get_book_data_by_id(closestBookId)
79
+
80
+ return closestReadBookData
81
+
82
+ def update_user_ratings(upvotedBookIds, downvotedBookIds):
83
  RATING_FOR_UPVOTE = 5
84
  RATING_FOR_DOWNVOTE = 1
85
 
 
90
  for bookId in upvotedBookIds:
91
  appendBookIds.append(bookId)
92
  appendBookRatings.append(RATING_FOR_UPVOTE)
93
+ appendUserIds.append(targetUserId)
94
 
95
  for bookId in downvotedBookIds:
96
  appendBookIds.append(bookId)
97
  appendBookRatings.append(RATING_FOR_DOWNVOTE)
98
+ appendUserIds.append(targetUserId)
99
 
100
  newUserData = {
101
  'book_id': appendBookIds,
 
107
 
108
  ratings = pd.concat([baseRatings, newRows], ignore_index=True)
109
 
110
+ book_by_rating_df = pd.merge(books, ratings, on="book_id", how="inner")
111
 
112
+ user_df = book_by_rating_df.groupby(["user_id","title"])["rating"].mean().unstack()
113
 
114
+ targetUserDf = user_df[user_df.index == targetUserId]
115
 
116
  targetBooksRead = targetUserDf.dropna(axis=1).columns.tolist()
117
 
 
126
  print(userBookCount)
127
 
128
  # from there get users who've read at least X percent of the main user
129
+ minBookCount = int(book_read_df.shape[1] * SIMILAR_USER_MIN_PERCENT_SHARED_BOOKS)
130
+
131
+ minBookCount = min(minBookCount, MAX_MIN_BOOK_COUNT_FOR_SIMILAR_USER)
132
 
133
+ print(f'Min book count for Similar User: {minBookCount}')
134
 
135
  usersSameBooks = userBookCount[userBookCount > minBookCount].index
136
 
 
144
 
145
  corr_df = filted_df.T.corr().unstack()
146
 
147
+ top_readers = None
148
 
149
+ minTopReaderCorrelation = SIMILAR_USER_MIN_CORRELATION
150
+
151
+ while top_readers is None:
152
+ top_readers = pd.DataFrame(corr_df[targetUserId][corr_df[targetUserId] > minTopReaderCorrelation], columns=["corr"])
153
+
154
+ # if top_readers only has 1 row, then we need to lower the correlation threshold
155
+ if len(top_readers) <= 1:
156
+ minTopReaderCorrelation -= 0.05
157
+ top_readers = None
158
+
159
  print(top_readers)
160
 
161
+ if (targetUserId in top_readers.index):
162
+ top_readers = top_readers.drop(targetUserId)
163
 
164
  # get the ratings for the top readers
165
+ top_readers_ratings = pd.merge(top_readers, book_by_rating_df[["user_id", "book_id", "rating"]], how='inner', on="user_id")
166
 
167
  # weight their ratings by how correlated they are with the user
168
  top_readers_ratings['weighted_rating'] = top_readers_ratings['corr'] * top_readers_ratings['rating']
 
178
  books_recommend = recommendation_df[recommendation_df["weighted_rating"] > 1].sort_values(by="weighted_rating", ascending=False).head(20)
179
 
180
  # get the recommended books (and sort by average_rating)
181
+ recommendedBooks = books[books["book_id"].isin(books_recommend.index)]
182
+
183
+ # sort recommended books by the weighted_rating in books_recommend
184
+ recommendedBooks = recommendedBooks.merge(books_recommend, on="book_id").sort_values(by="weighted_rating", ascending=False)
185
+
186
  # drop book_id column
187
+ recommendedBooks = recommendedBooks.drop(columns=["average_rating"])
188
+
189
+ # get each row in the recommendedBooks dataframe as a dictionary
190
+ recommendedBooksRowsAsDicts = recommendedBooks.to_dict(orient="records")
191
 
192
  return {
193
  "upvotedBookIds": upvotedBookIds,
194
  "downvotedBookIds": downvotedBookIds,
195
  "numSimilarUsers" : len(usersSameBooks),
196
+ "recommendedBooksData": recommendedBooksRowsAsDicts,
197
+ "bookByRatingData": book_by_rating_df,
198
+
199
  # sort by correlation
200
  "topCorrelatedReadersData" : top_readers.sort_values(by="corr", ascending=False)
201
  }
requirements.txt CHANGED
@@ -1 +1,10 @@
1
- scikit-learn
 
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ numpy
3
+ scikit-learn
4
+ openai
5
+ tqdm
6
+ streamlit_modal
7
+ python-dotenv
8
+
9
+ # local packages
10
+ -e .
thumbnail.jpg ADDED