Spaces:
Sleeping
Sleeping
Update files from private repo
Browse files- ai_services.py +32 -0
- app.py +183 -25
- bookdb.py +94 -25
- requirements.txt +10 -1
- thumbnail.jpg +0 -0
ai_services.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from openai import OpenAI
|
2 |
+
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
import os
|
5 |
+
import streamlit as st
|
6 |
+
|
7 |
+
if os.getenv("OPENAI_API_KEY") is None:
|
8 |
+
load_dotenv()
|
9 |
+
|
10 |
+
openAIclient = OpenAI()
|
11 |
+
|
12 |
+
def get_suggestion_text(closestReadBookData, targetBookData):
|
13 |
+
closestReadTitle = closestReadBookData['title']
|
14 |
+
closestReadAuthor = closestReadBookData['authors']
|
15 |
+
|
16 |
+
targetBookTitle = targetBookData['title']
|
17 |
+
targetBookAuthor = targetBookData['authors']
|
18 |
+
|
19 |
+
messageContent = "In 2 sentences max, please cheerfully explain why I might enjoy " + targetBookTitle + " by " + targetBookAuthor + " if I liked " + closestReadTitle + " by " + closestReadAuthor + "."
|
20 |
+
|
21 |
+
response = openAIclient.chat.completions.create(
|
22 |
+
model="gpt-4o-mini",
|
23 |
+
messages=[
|
24 |
+
{"role": "system", "content": "You are an experienced librarian."},
|
25 |
+
{"role": "user", "content": messageContent}
|
26 |
+
]
|
27 |
+
)
|
28 |
+
|
29 |
+
chatCompletionMessage = response.choices[0].message
|
30 |
+
|
31 |
+
# return f"Because you liked {closestReadTitle} by {closestReadAuthor}, we think you might like {targetBookTitle} by {targetBookAuthor}."
|
32 |
+
return chatCompletionMessage.content
|
app.py
CHANGED
@@ -1,10 +1,26 @@
|
|
1 |
import pickle
|
2 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
|
3 |
import numpy as np
|
4 |
import bookdb
|
5 |
|
|
|
|
|
6 |
st.header("My Book Buddy 🐛")
|
7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
if "upvoted_book_ids" not in st.session_state:
|
9 |
st.session_state["upvoted_book_ids"] = []
|
10 |
|
@@ -28,12 +44,21 @@ def update_display(displayData):
|
|
28 |
st.session_state["recommendedBooksData"] = displayData["recommendedBooksData"]
|
29 |
st.session_state["topCorrelatedReadersData"] = displayData["topCorrelatedReadersData"]
|
30 |
|
|
|
|
|
31 |
def on_reset_votes():
|
32 |
st.session_state["upvoted_book_ids"] = []
|
33 |
st.session_state["downvoted_book_ids"] = []
|
34 |
st.session_state["numSimilarUsers"] = 0
|
35 |
st.session_state["recommendedBooksData"] = None
|
36 |
st.session_state["topCorrelatedReadersData"] = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
def on_submit_votes():
|
39 |
upvoteBookTitles = st.session_state["multiselect_upvote"]
|
@@ -68,36 +93,112 @@ def on_submit_votes():
|
|
68 |
|
69 |
update_display(bookdb.update_user_ratings(upvotedBookIds, downvotedBookIds))
|
70 |
|
71 |
-
|
72 |
-
col1, col2 = st.columns(2)
|
73 |
|
74 |
-
|
|
|
|
|
|
|
75 |
|
76 |
-
|
|
|
77 |
|
78 |
-
|
79 |
-
remainingBookTitles = [x for x in allBookTitles if x not in myRatedBookTitles]
|
80 |
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
|
93 |
if st.session_state["recommendedBooksData"] is not None:
|
94 |
-
|
95 |
|
96 |
st.subheader("Recommendations")
|
97 |
|
98 |
-
|
99 |
|
100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
|
102 |
st.subheader("Your Ratings")
|
103 |
|
@@ -119,7 +220,7 @@ if st.session_state["recommendedBooksData"] is not None:
|
|
119 |
for bookId in downvotedBookIds:
|
120 |
displayCol2.markdown(f' - {bookId}-{bookdb.get_book_title(bookId)}')
|
121 |
|
122 |
-
st.
|
123 |
|
124 |
# st.write(f"Similar User Min Percent Shared Books = {round(bookdb.SIMILAR_USER_MIN_PERCENT_SHARED_BOOKS * 100)}%")
|
125 |
# st.write(f"Similar User Min Correlation = {bookdb.SIMILAR_USER_MIN_CORRELATION}")
|
@@ -127,9 +228,66 @@ if st.session_state["recommendedBooksData"] is not None:
|
|
127 |
# if "numSimilarUsers" in st.session_state:
|
128 |
# st.write(f"{st.session_state['numSimilarUsers']} similar users")
|
129 |
|
130 |
-
|
131 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
|
133 |
-
|
134 |
|
135 |
-
|
|
|
|
|
|
|
|
1 |
import pickle
|
2 |
import streamlit as st
|
3 |
+
|
4 |
+
st.set_page_config(layout="wide")
|
5 |
+
|
6 |
+
from streamlit_modal import Modal
|
7 |
+
|
8 |
import numpy as np
|
9 |
import bookdb
|
10 |
|
11 |
+
import ai_services
|
12 |
+
|
13 |
st.header("My Book Buddy 🐛")
|
14 |
|
15 |
+
if "clicked_book" not in st.session_state:
|
16 |
+
st.session_state["clicked_book"] = {}
|
17 |
+
|
18 |
+
if "closestReadBook" not in st.session_state:
|
19 |
+
st.session_state["closestReadBook"] = None
|
20 |
+
|
21 |
+
if "suggestedBookText" not in st.session_state:
|
22 |
+
st.session_state["suggestedBookText"] = ""
|
23 |
+
|
24 |
if "upvoted_book_ids" not in st.session_state:
|
25 |
st.session_state["upvoted_book_ids"] = []
|
26 |
|
|
|
44 |
st.session_state["recommendedBooksData"] = displayData["recommendedBooksData"]
|
45 |
st.session_state["topCorrelatedReadersData"] = displayData["topCorrelatedReadersData"]
|
46 |
|
47 |
+
st.session_state["bookByRatingData"] = displayData["bookByRatingData"]
|
48 |
+
|
49 |
def on_reset_votes():
|
50 |
st.session_state["upvoted_book_ids"] = []
|
51 |
st.session_state["downvoted_book_ids"] = []
|
52 |
st.session_state["numSimilarUsers"] = 0
|
53 |
st.session_state["recommendedBooksData"] = None
|
54 |
st.session_state["topCorrelatedReadersData"] = None
|
55 |
+
st.session_state["bookByRatingData"] = None
|
56 |
+
|
57 |
+
def on_test_submit_votes():
|
58 |
+
upvotedBookIds = [104, 103, 102, 110, 113, 124, 129, 135, 141, 142, 155, 161, 165, 176, 181, 974, 4443, 1496, 1003, 974, 2600] # TODO REMOVE
|
59 |
+
downvotedBookIds = [126, 179, 183, 184, 187, 9076, 960, 5895, 777, 6902, 2084, 584] # TODO REMOVE
|
60 |
+
|
61 |
+
update_display(bookdb.update_user_ratings(upvotedBookIds, downvotedBookIds))
|
62 |
|
63 |
def on_submit_votes():
|
64 |
upvoteBookTitles = st.session_state["multiselect_upvote"]
|
|
|
93 |
|
94 |
update_display(bookdb.update_user_ratings(upvotedBookIds, downvotedBookIds))
|
95 |
|
96 |
+
clickedBook = st.session_state["clicked_book"] if "clicked_book" in st.session_state else {}
|
|
|
97 |
|
98 |
+
modal = Modal(
|
99 |
+
f"📖 {clickedBook['title'] if 'title' in clickedBook else ''}",
|
100 |
+
key="book-modal"
|
101 |
+
)
|
102 |
|
103 |
+
def on_update_recommendations():
|
104 |
+
# iterate through the number of recommendations and check the selected radio button
|
105 |
|
106 |
+
recommendations = st.session_state["recommendedBooksData"]
|
|
|
107 |
|
108 |
+
bookIdsToUpvote = []
|
109 |
+
bookIdsToDownvote = []
|
110 |
+
|
111 |
+
for counter in range(len(recommendations)):
|
112 |
+
radioKey = f"update_recommendation_{counter}"
|
113 |
+
|
114 |
+
radioValue = st.session_state[radioKey]
|
115 |
+
|
116 |
+
if radioValue == "👍":
|
117 |
+
bookIdsToUpvote.append(recommendations[counter]["book_id"])
|
118 |
+
elif radioValue == "👎":
|
119 |
+
bookIdsToDownvote.append(recommendations[counter]["book_id"])
|
120 |
+
|
121 |
+
# reset the radio button
|
122 |
+
st.session_state[radioKey] = None
|
123 |
+
|
124 |
+
if len(bookIdsToUpvote) == 0 and len(bookIdsToDownvote) == 0:
|
125 |
+
st.warning("Please select at least one book to upvote or downvote")
|
126 |
+
return
|
127 |
+
|
128 |
+
# append booksToUpvote to upvotedBookIds
|
129 |
+
upvotedBookIds = st.session_state["upvoted_book_ids"]
|
130 |
+
downvotedBookIds = st.session_state["downvoted_book_ids"]
|
131 |
+
|
132 |
+
upvotedBookIds.extend(bookIdsToUpvote)
|
133 |
+
downvotedBookIds.extend(bookIdsToDownvote)
|
134 |
+
|
135 |
+
# remove any upvoted books from downvotedBookIds if they are in there
|
136 |
+
downvotedBookIds = [x for x in downvotedBookIds if x not in upvotedBookIds]
|
137 |
+
|
138 |
+
update_display(bookdb.update_user_ratings(upvotedBookIds, downvotedBookIds))
|
139 |
|
140 |
if st.session_state["recommendedBooksData"] is not None:
|
141 |
+
recommendations = st.session_state["recommendedBooksData"]
|
142 |
|
143 |
st.subheader("Recommendations")
|
144 |
|
145 |
+
recommendationsCol1, recommendationsCol2, recommendationsCol3 = st.columns(3)
|
146 |
|
147 |
+
for counter in range(len(recommendations)):
|
148 |
+
bookData = recommendations[counter]
|
149 |
+
|
150 |
+
bookMetadata = bookdb.get_book_metadata_by_id(bookData["book_id"])
|
151 |
+
|
152 |
+
# print(bookMetadata)
|
153 |
+
|
154 |
+
if counter % 3 == 0:
|
155 |
+
container = recommendationsCol1.container(border=True)
|
156 |
+
elif counter % 3 == 1:
|
157 |
+
container = recommendationsCol2.container(border=True)
|
158 |
+
else:
|
159 |
+
container = recommendationsCol3.container(border=True)
|
160 |
+
|
161 |
+
bookAuthor = bookData["authors"]
|
162 |
+
bookTitle = bookData["title"]
|
163 |
+
bookPubYear = bookData["original_publication_year"]
|
164 |
+
|
165 |
+
containerCol1, containerCol2, containerCol3 = container.columns([5, 10, 2])
|
166 |
+
|
167 |
+
containerCol1.html(f"<img width='100%' src='{bookMetadata['thumbnail']}'>")
|
168 |
+
|
169 |
+
textContainer = containerCol2.container()
|
170 |
+
|
171 |
+
titleClicked = textContainer.button(f"{bookTitle}", use_container_width=True)
|
172 |
+
|
173 |
+
if titleClicked:
|
174 |
+
bookByRatingData = st.session_state["bookByRatingData"]
|
175 |
+
upvotedBookIds = st.session_state["upvoted_book_ids"]
|
176 |
+
targetBookId = bookData["book_id"]
|
177 |
+
|
178 |
+
closestReadBookData = bookdb.find_closest_read_title(bookByRatingData, upvotedBookIds, targetBookId)
|
179 |
+
|
180 |
+
targetBookData = bookdb.get_book_data_by_id(targetBookId)
|
181 |
+
|
182 |
+
st.session_state["closestReadBook"] = closestReadBookData
|
183 |
+
st.session_state["clicked_book"] = bookData
|
184 |
+
|
185 |
+
st.session_state["suggestedBookText"] = ai_services.get_suggestion_text(closestReadBookData, targetBookData)
|
186 |
+
|
187 |
+
modal.open()
|
188 |
+
|
189 |
+
textContainer.markdown(f"*{bookAuthor}*")
|
190 |
+
|
191 |
+
textContainer.markdown(f"Published: {int(bookPubYear)}")
|
192 |
+
|
193 |
+
# containerCol2.markdown(textHTMLcontent, unsafe_allow_html=True)
|
194 |
+
# containerCol2.markdown(f"[{bookTitle}](https://streamlit.io)\n\n*{bookAuthor}*\n\nPublished: {int(bookPubYear)}")
|
195 |
+
# radioContainer = containerCol3.container()
|
196 |
+
|
197 |
+
containerCol3.radio(label="Upvote/Downvote", label_visibility="hidden", options=["👍", "👎"], key=f"update_recommendation_{counter}", index=None)
|
198 |
+
|
199 |
+
# radioContainer.button('Why This Book?', type="secondary", key=f"submit_{counter}")
|
200 |
+
|
201 |
+
st.button('Update Recommendations', type="primary", on_click=on_update_recommendations)
|
202 |
|
203 |
st.subheader("Your Ratings")
|
204 |
|
|
|
220 |
for bookId in downvotedBookIds:
|
221 |
displayCol2.markdown(f' - {bookId}-{bookdb.get_book_title(bookId)}')
|
222 |
|
223 |
+
st.button('Reset All Ratings', type="secondary", on_click=on_reset_votes)
|
224 |
|
225 |
# st.write(f"Similar User Min Percent Shared Books = {round(bookdb.SIMILAR_USER_MIN_PERCENT_SHARED_BOOKS * 100)}%")
|
226 |
# st.write(f"Similar User Min Correlation = {bookdb.SIMILAR_USER_MIN_CORRELATION}")
|
|
|
228 |
# if "numSimilarUsers" in st.session_state:
|
229 |
# st.write(f"{st.session_state['numSimilarUsers']} similar users")
|
230 |
|
231 |
+
with st.form(key='upvote_form'):
|
232 |
+
col1, col2 = st.columns(2)
|
233 |
+
|
234 |
+
allBookTitles = bookdb.get_all_book_titles()
|
235 |
+
|
236 |
+
myRatedBookTitles = bookdb.get_book_titles(st.session_state["upvoted_book_ids"] + st.session_state["downvoted_book_ids"])
|
237 |
+
|
238 |
+
# remove myRatedBookTitles from allBookTitles
|
239 |
+
remainingBookTitles = [x for x in allBookTitles if x not in myRatedBookTitles]
|
240 |
+
|
241 |
+
col1.multiselect(
|
242 |
+
'Upvote Books 👍',
|
243 |
+
remainingBookTitles,
|
244 |
+
key='multiselect_upvote'
|
245 |
+
)
|
246 |
+
col2.multiselect(
|
247 |
+
'Downvote Books 👎',
|
248 |
+
remainingBookTitles,
|
249 |
+
key='multiselect_downvote'
|
250 |
+
)
|
251 |
+
st.form_submit_button(label='Submit', type="primary", on_click=on_submit_votes)
|
252 |
+
|
253 |
+
st.form_submit_button(label='Test', type="secondary", on_click=on_test_submit_votes)
|
254 |
+
|
255 |
+
if st.session_state["topCorrelatedReadersData"] is not None:
|
256 |
+
df = st.session_state["topCorrelatedReadersData"]
|
257 |
+
|
258 |
+
st.subheader("Top Correlated Readers")
|
259 |
+
|
260 |
+
st.dataframe(df, use_container_width=True)
|
261 |
+
|
262 |
+
if modal.is_open():
|
263 |
+
with modal.container():
|
264 |
+
clickedBook = st.session_state["clicked_book"]
|
265 |
+
|
266 |
+
clickedBookMetadata = bookdb.get_book_metadata_by_id(clickedBook["book_id"])
|
267 |
+
|
268 |
+
clickedBookDescription = clickedBookMetadata["description"]
|
269 |
+
|
270 |
+
st.html(f"{clickedBookDescription}")
|
271 |
+
|
272 |
+
aiSuggestContainer = st.container(border=True)
|
273 |
+
|
274 |
+
closestReadBook = st.session_state["closestReadBook"]
|
275 |
+
|
276 |
+
suggestedBookText = st.session_state["suggestedBookText"]
|
277 |
+
|
278 |
+
aiSuggestContainer.html(f"<p style=\"color:#DA70D6;\">💫 Because you liked <i><b>{closestReadBook['title']}</b></i> by <b>{closestReadBook['authors']}</b>...</p>")
|
279 |
+
|
280 |
+
aiSuggestContainer.html(f"<p style=\"color:#DA70D6;\">{suggestedBookText}</p>")
|
281 |
+
|
282 |
+
buttonCol1, buttonCol2, buttonCol3, buttonCol4 = st.columns(4)
|
283 |
+
|
284 |
+
clickedBookTitle = clickedBook["title"]
|
285 |
+
clickedBookAuthor = clickedBook["authors"]
|
286 |
+
clickedBookISBN = clickedBook["isbn"]
|
287 |
|
288 |
+
outboundLinkSuffix = f"{clickedBookTitle} {clickedBookAuthor}"
|
289 |
|
290 |
+
buttonCol1.link_button('🛒 Bookshop', f'https://bookshop.org/search?keywords={outboundLinkSuffix}')
|
291 |
+
buttonCol2.link_button('📚 Biblio', f'https://www.biblio.com/search.php?stage=1&result_type=works&keyisbn={outboundLinkSuffix}')
|
292 |
+
buttonCol3.link_button('🎧 Libro', f'https://libro.fm/search?utf8=%E2%9C%93&q={outboundLinkSuffix}')
|
293 |
+
buttonCol4.link_button('💬 Hardcover', f'https://hardcover.app/search?q={outboundLinkSuffix}')
|
bookdb.py
CHANGED
@@ -1,17 +1,48 @@
|
|
1 |
import pandas as pd
|
|
|
|
|
2 |
|
3 |
SIMILAR_USER_MIN_PERCENT_SHARED_BOOKS = 0.30
|
4 |
SIMILAR_USER_MIN_CORRELATION = 0.25
|
5 |
|
6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
usecols=["book_id",
|
8 |
-
|
9 |
# "average_rating",
|
10 |
-
|
11 |
-
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
def get_book_ids_by_title(book_titles):
|
17 |
return books[books["title"].isin(book_titles)]["book_id"].values
|
@@ -19,19 +50,36 @@ def get_book_ids_by_title(book_titles):
|
|
19 |
def get_all_book_titles():
|
20 |
return books["title"].values
|
21 |
|
|
|
|
|
|
|
22 |
def get_book_title(book_id):
|
23 |
return books[books["book_id"] == book_id]["title"].values[0]
|
24 |
|
25 |
def get_book_titles(book_ids):
|
26 |
return books[books["book_id"].isin(book_ids)]["title"].values
|
27 |
|
28 |
-
def
|
29 |
-
|
30 |
-
# downvotedBookIds = [126, 179, 183, 184, 187, 9076, 960, 5895, 777, 6902, 2084, 584] # TODO REMOVE
|
31 |
|
32 |
-
#
|
33 |
-
|
34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
RATING_FOR_UPVOTE = 5
|
36 |
RATING_FOR_DOWNVOTE = 1
|
37 |
|
@@ -42,12 +90,12 @@ def update_user_ratings(upvotedBookIds, downvotedBookIds):
|
|
42 |
for bookId in upvotedBookIds:
|
43 |
appendBookIds.append(bookId)
|
44 |
appendBookRatings.append(RATING_FOR_UPVOTE)
|
45 |
-
appendUserIds.append(
|
46 |
|
47 |
for bookId in downvotedBookIds:
|
48 |
appendBookIds.append(bookId)
|
49 |
appendBookRatings.append(RATING_FOR_DOWNVOTE)
|
50 |
-
appendUserIds.append(
|
51 |
|
52 |
newUserData = {
|
53 |
'book_id': appendBookIds,
|
@@ -59,11 +107,11 @@ def update_user_ratings(upvotedBookIds, downvotedBookIds):
|
|
59 |
|
60 |
ratings = pd.concat([baseRatings, newRows], ignore_index=True)
|
61 |
|
62 |
-
|
63 |
|
64 |
-
user_df =
|
65 |
|
66 |
-
targetUserDf = user_df[user_df.index ==
|
67 |
|
68 |
targetBooksRead = targetUserDf.dropna(axis=1).columns.tolist()
|
69 |
|
@@ -78,9 +126,11 @@ def update_user_ratings(upvotedBookIds, downvotedBookIds):
|
|
78 |
print(userBookCount)
|
79 |
|
80 |
# from there get users who've read at least X percent of the main user
|
81 |
-
minBookCount = book_read_df.shape[1] * SIMILAR_USER_MIN_PERCENT_SHARED_BOOKS
|
|
|
|
|
82 |
|
83 |
-
|
84 |
|
85 |
usersSameBooks = userBookCount[userBookCount > minBookCount].index
|
86 |
|
@@ -94,15 +144,25 @@ def update_user_ratings(upvotedBookIds, downvotedBookIds):
|
|
94 |
|
95 |
corr_df = filted_df.T.corr().unstack()
|
96 |
|
97 |
-
top_readers =
|
98 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
print(top_readers)
|
100 |
|
101 |
-
if (
|
102 |
-
top_readers = top_readers.drop(
|
103 |
|
104 |
# get the ratings for the top readers
|
105 |
-
top_readers_ratings = pd.merge(top_readers,
|
106 |
|
107 |
# weight their ratings by how correlated they are with the user
|
108 |
top_readers_ratings['weighted_rating'] = top_readers_ratings['corr'] * top_readers_ratings['rating']
|
@@ -118,15 +178,24 @@ def update_user_ratings(upvotedBookIds, downvotedBookIds):
|
|
118 |
books_recommend = recommendation_df[recommendation_df["weighted_rating"] > 1].sort_values(by="weighted_rating", ascending=False).head(20)
|
119 |
|
120 |
# get the recommended books (and sort by average_rating)
|
121 |
-
recommendedBooks = books[books["book_id"].isin(books_recommend.index)]
|
|
|
|
|
|
|
|
|
122 |
# drop book_id column
|
123 |
-
recommendedBooks = recommendedBooks.drop(columns=["
|
|
|
|
|
|
|
124 |
|
125 |
return {
|
126 |
"upvotedBookIds": upvotedBookIds,
|
127 |
"downvotedBookIds": downvotedBookIds,
|
128 |
"numSimilarUsers" : len(usersSameBooks),
|
129 |
-
"recommendedBooksData":
|
|
|
|
|
130 |
# sort by correlation
|
131 |
"topCorrelatedReadersData" : top_readers.sort_values(by="corr", ascending=False)
|
132 |
}
|
|
|
1 |
import pandas as pd
|
2 |
+
import json
|
3 |
+
import streamlit as st
|
4 |
|
5 |
SIMILAR_USER_MIN_PERCENT_SHARED_BOOKS = 0.30
|
6 |
SIMILAR_USER_MIN_CORRELATION = 0.25
|
7 |
|
8 |
+
MAX_MIN_BOOK_COUNT_FOR_SIMILAR_USER = 10 # the maximum amount of books required for a user to be considered a similar user
|
9 |
+
|
10 |
+
DEFAULT_BOOK_COVER_URL = "https://m.media-amazon.com/images/I/81QPHl7zgbL._AC_UF1000,1000_QL80_.jpg"
|
11 |
+
|
12 |
+
@st.cache
|
13 |
+
def get_dataframes():
|
14 |
+
booksDf = pd.read_csv("./goodreads/books.csv",
|
15 |
usecols=["book_id",
|
16 |
+
"original_publication_year",
|
17 |
# "average_rating",
|
18 |
+
"isbn",
|
19 |
+
"authors",
|
20 |
+
"title",
|
21 |
+
"average_rating"])
|
22 |
+
booksDf['book_id'] = range(1, len(booksDf) + 1)
|
23 |
+
|
24 |
+
baseRatingsDf = pd.read_csv("./goodreads/ratings.csv")
|
25 |
+
|
26 |
+
bookMetadataJSON = json.load(open("./goodreads/book_metadata.json"))
|
27 |
|
28 |
+
return booksDf, baseRatingsDf, bookMetadataJSON
|
29 |
+
|
30 |
+
books, baseRatings, bookMetadata = get_dataframes()
|
31 |
+
|
32 |
+
targetUserId = baseRatings['user_id'].max() + 1
|
33 |
+
|
34 |
+
def get_book_metadata_by_id(book_id):
|
35 |
+
book_id = str(book_id)
|
36 |
+
|
37 |
+
data = {
|
38 |
+
"description" : "n/a",
|
39 |
+
"thumbnail" : DEFAULT_BOOK_COVER_URL
|
40 |
+
}
|
41 |
+
|
42 |
+
if book_id in bookMetadata:
|
43 |
+
data = bookMetadata[book_id]
|
44 |
+
|
45 |
+
return data
|
46 |
|
47 |
def get_book_ids_by_title(book_titles):
|
48 |
return books[books["title"].isin(book_titles)]["book_id"].values
|
|
|
50 |
def get_all_book_titles():
|
51 |
return books["title"].values
|
52 |
|
53 |
+
def get_book_data_by_id(book_id):
|
54 |
+
return books[books["book_id"] == book_id].to_dict(orient="records")[0]
|
55 |
+
|
56 |
def get_book_title(book_id):
|
57 |
return books[books["book_id"] == book_id]["title"].values[0]
|
58 |
|
59 |
def get_book_titles(book_ids):
|
60 |
return books[books["book_id"].isin(book_ids)]["title"].values
|
61 |
|
62 |
+
def find_closest_read_title(bookByRatingData, upvotedBookIds, targetBookId):
|
63 |
+
user_df = bookByRatingData.groupby(["user_id","book_id"])["rating"].mean().unstack()
|
|
|
64 |
|
65 |
+
# drop all columns except the upvotedBookIds and the targetBookId
|
66 |
+
book_read_df = user_df[upvotedBookIds + [targetBookId]]
|
67 |
|
68 |
+
# replace NaNs with 0
|
69 |
+
book_read_df = book_read_df.fillna(0)
|
70 |
+
|
71 |
+
# find the correlation between the targetBookId and the upvotedBookIds
|
72 |
+
corr_df = book_read_df.corr().unstack()
|
73 |
+
|
74 |
+
# find the closest book to the targetBookId that is NOT the targetBookId
|
75 |
+
closestBookId = corr_df[targetBookId][corr_df[targetBookId] < 1].idxmax()
|
76 |
+
|
77 |
+
# get the title of the closest book
|
78 |
+
closestReadBookData = get_book_data_by_id(closestBookId)
|
79 |
+
|
80 |
+
return closestReadBookData
|
81 |
+
|
82 |
+
def update_user_ratings(upvotedBookIds, downvotedBookIds):
|
83 |
RATING_FOR_UPVOTE = 5
|
84 |
RATING_FOR_DOWNVOTE = 1
|
85 |
|
|
|
90 |
for bookId in upvotedBookIds:
|
91 |
appendBookIds.append(bookId)
|
92 |
appendBookRatings.append(RATING_FOR_UPVOTE)
|
93 |
+
appendUserIds.append(targetUserId)
|
94 |
|
95 |
for bookId in downvotedBookIds:
|
96 |
appendBookIds.append(bookId)
|
97 |
appendBookRatings.append(RATING_FOR_DOWNVOTE)
|
98 |
+
appendUserIds.append(targetUserId)
|
99 |
|
100 |
newUserData = {
|
101 |
'book_id': appendBookIds,
|
|
|
107 |
|
108 |
ratings = pd.concat([baseRatings, newRows], ignore_index=True)
|
109 |
|
110 |
+
book_by_rating_df = pd.merge(books, ratings, on="book_id", how="inner")
|
111 |
|
112 |
+
user_df = book_by_rating_df.groupby(["user_id","title"])["rating"].mean().unstack()
|
113 |
|
114 |
+
targetUserDf = user_df[user_df.index == targetUserId]
|
115 |
|
116 |
targetBooksRead = targetUserDf.dropna(axis=1).columns.tolist()
|
117 |
|
|
|
126 |
print(userBookCount)
|
127 |
|
128 |
# from there get users who've read at least X percent of the main user
|
129 |
+
minBookCount = int(book_read_df.shape[1] * SIMILAR_USER_MIN_PERCENT_SHARED_BOOKS)
|
130 |
+
|
131 |
+
minBookCount = min(minBookCount, MAX_MIN_BOOK_COUNT_FOR_SIMILAR_USER)
|
132 |
|
133 |
+
print(f'Min book count for Similar User: {minBookCount}')
|
134 |
|
135 |
usersSameBooks = userBookCount[userBookCount > minBookCount].index
|
136 |
|
|
|
144 |
|
145 |
corr_df = filted_df.T.corr().unstack()
|
146 |
|
147 |
+
top_readers = None
|
148 |
|
149 |
+
minTopReaderCorrelation = SIMILAR_USER_MIN_CORRELATION
|
150 |
+
|
151 |
+
while top_readers is None:
|
152 |
+
top_readers = pd.DataFrame(corr_df[targetUserId][corr_df[targetUserId] > minTopReaderCorrelation], columns=["corr"])
|
153 |
+
|
154 |
+
# if top_readers only has 1 row, then we need to lower the correlation threshold
|
155 |
+
if len(top_readers) <= 1:
|
156 |
+
minTopReaderCorrelation -= 0.05
|
157 |
+
top_readers = None
|
158 |
+
|
159 |
print(top_readers)
|
160 |
|
161 |
+
if (targetUserId in top_readers.index):
|
162 |
+
top_readers = top_readers.drop(targetUserId)
|
163 |
|
164 |
# get the ratings for the top readers
|
165 |
+
top_readers_ratings = pd.merge(top_readers, book_by_rating_df[["user_id", "book_id", "rating"]], how='inner', on="user_id")
|
166 |
|
167 |
# weight their ratings by how correlated they are with the user
|
168 |
top_readers_ratings['weighted_rating'] = top_readers_ratings['corr'] * top_readers_ratings['rating']
|
|
|
178 |
books_recommend = recommendation_df[recommendation_df["weighted_rating"] > 1].sort_values(by="weighted_rating", ascending=False).head(20)
|
179 |
|
180 |
# get the recommended books (and sort by average_rating)
|
181 |
+
recommendedBooks = books[books["book_id"].isin(books_recommend.index)]
|
182 |
+
|
183 |
+
# sort recommended books by the weighted_rating in books_recommend
|
184 |
+
recommendedBooks = recommendedBooks.merge(books_recommend, on="book_id").sort_values(by="weighted_rating", ascending=False)
|
185 |
+
|
186 |
# drop book_id column
|
187 |
+
recommendedBooks = recommendedBooks.drop(columns=["average_rating"])
|
188 |
+
|
189 |
+
# get each row in the recommendedBooks dataframe as a dictionary
|
190 |
+
recommendedBooksRowsAsDicts = recommendedBooks.to_dict(orient="records")
|
191 |
|
192 |
return {
|
193 |
"upvotedBookIds": upvotedBookIds,
|
194 |
"downvotedBookIds": downvotedBookIds,
|
195 |
"numSimilarUsers" : len(usersSameBooks),
|
196 |
+
"recommendedBooksData": recommendedBooksRowsAsDicts,
|
197 |
+
"bookByRatingData": book_by_rating_df,
|
198 |
+
|
199 |
# sort by correlation
|
200 |
"topCorrelatedReadersData" : top_readers.sort_values(by="corr", ascending=False)
|
201 |
}
|
requirements.txt
CHANGED
@@ -1 +1,10 @@
|
|
1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
numpy
|
3 |
+
scikit-learn
|
4 |
+
openai
|
5 |
+
tqdm
|
6 |
+
streamlit_modal
|
7 |
+
python-dotenv
|
8 |
+
|
9 |
+
# local packages
|
10 |
+
-e .
|
thumbnail.jpg
ADDED
![]() |