Spaces:
Sleeping
Sleeping
Commit
·
e12639d
1
Parent(s):
6010e28
Simple Recommender complete
Browse files- App/app.py +0 -0
- App/tfidfrecommender.py +4 -1
- app.py +26 -5
App/app.py
DELETED
File without changes
|
App/tfidfrecommender.py
CHANGED
@@ -82,7 +82,7 @@ class TfidfRecommender :
|
|
82 |
lambda x: self.__clean_text(x, for_BERT)
|
83 |
)
|
84 |
|
85 |
-
def tokenize_text (self, ngram_range=(1, 3), min_df=0) :
|
86 |
"""Tokenize the input text.
|
87 |
|
88 |
Args:
|
@@ -181,11 +181,14 @@ class TfidfRecommender :
|
|
181 |
return self.stop_words
|
182 |
|
183 |
def recommend_k_items (self, title, k) :
|
|
|
184 |
idx = self.df[self.df['title'] == title].index[0]
|
|
|
185 |
cosine_sim = cosine_similarity(self.tfidf_matrix[int(idx)], self.tfidf_matrix)
|
186 |
similarity_scores = list(enumerate(cosine_sim[0]))
|
187 |
similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
|
188 |
similarity_scores = similarity_scores[1: k + 1]
|
|
|
189 |
movie_indices = [i[0] for i in similarity_scores]
|
190 |
return self.df.iloc[movie_indices]['id']
|
191 |
|
|
|
82 |
lambda x: self.__clean_text(x, for_BERT)
|
83 |
)
|
84 |
|
85 |
+
def tokenize_text (self, ngram_range=(1, 3), min_df=0.0) :
|
86 |
"""Tokenize the input text.
|
87 |
|
88 |
Args:
|
|
|
181 |
return self.stop_words
|
182 |
|
183 |
def recommend_k_items (self, title, k) :
|
184 |
+
print("jjj")
|
185 |
idx = self.df[self.df['title'] == title].index[0]
|
186 |
+
print("ppp")
|
187 |
cosine_sim = cosine_similarity(self.tfidf_matrix[int(idx)], self.tfidf_matrix)
|
188 |
similarity_scores = list(enumerate(cosine_sim[0]))
|
189 |
similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
|
190 |
similarity_scores = similarity_scores[1: k + 1]
|
191 |
+
print("lol")
|
192 |
movie_indices = [i[0] for i in similarity_scores]
|
193 |
return self.df.iloc[movie_indices]['id']
|
194 |
|
app.py
CHANGED
@@ -10,24 +10,45 @@ desc = pd.read_csv('App/data/descriptions.csv')
|
|
10 |
|
11 |
rec = TfidfRecommender(desc, 'id', 'description' , "none")
|
12 |
def initialize_and_tokenize(tokenizer):
|
|
|
13 |
rec.tokenization_method = tokenizer
|
14 |
rec.tokenize_text()
|
15 |
|
16 |
names = []
|
17 |
def recommend (movies, tok) :
|
18 |
-
|
|
|
|
|
|
|
19 |
pool = concurrent.futures.ThreadPoolExecutor(max_workers=10)
|
20 |
futures = [pool.submit(rec.recommend_k_items, movie, 5) for movie in movies]
|
21 |
-
idss = [
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
ids = [id for ids in idss for id in ids]
|
23 |
ids = list(set(ids))
|
24 |
names = desc[desc['id'].isin(ids)]['title'].to_list()
|
25 |
return ', '.join(names)
|
26 |
|
27 |
-
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
gr.Radio(["bert", "scibert", "nltk" , "none"], value="none", label="Tokenization and text preprocess")],
|
30 |
-
outputs=gr.Textbox())
|
31 |
demo.launch()
|
32 |
|
33 |
|
|
|
10 |
|
11 |
rec = TfidfRecommender(desc, 'id', 'description' , "none")
|
12 |
def initialize_and_tokenize(tokenizer):
|
13 |
+
print("tok")
|
14 |
rec.tokenization_method = tokenizer
|
15 |
rec.tokenize_text()
|
16 |
|
17 |
names = []
|
18 |
def recommend (movies, tok) :
|
19 |
+
rec.tokenization_method = tok
|
20 |
+
tf, vecs = rec.tokenize_text()
|
21 |
+
rec.fit(tf, vecs)
|
22 |
+
print("rec")
|
23 |
pool = concurrent.futures.ThreadPoolExecutor(max_workers=10)
|
24 |
futures = [pool.submit(rec.recommend_k_items, movie, 5) for movie in movies]
|
25 |
+
idss = []
|
26 |
+
print("after submit")
|
27 |
+
for i in range(len(futures)):
|
28 |
+
print("res")
|
29 |
+
idss.append(futures[i].result())
|
30 |
+
print("shutdown")
|
31 |
+
pool.shutdown(wait=True)
|
32 |
ids = [id for ids in idss for id in ids]
|
33 |
ids = list(set(ids))
|
34 |
names = desc[desc['id'].isin(ids)]['title'].to_list()
|
35 |
return ', '.join(names)
|
36 |
|
37 |
+
def recom(movies, tok):
|
38 |
+
rec.tokenization_method = tok
|
39 |
+
tf, vecs = rec.tokenize_text()
|
40 |
+
rec.fit(tf, vecs)
|
41 |
+
print(movies[0])
|
42 |
+
ids = rec.recommend_k_items(movies[0], 5)
|
43 |
+
print("reccc")
|
44 |
+
# ids = list(set(ids))
|
45 |
+
names = desc[desc['id'].isin(ids)]['title'].to_list()
|
46 |
+
return ', '.join(names)
|
47 |
+
|
48 |
+
demo = gr.Interface(fn=recom,
|
49 |
+
inputs=[gr.Dropdown(choices = list(desc['title'][:20]), multiselect=True, max_choices=3, label="Movies"),
|
50 |
gr.Radio(["bert", "scibert", "nltk" , "none"], value="none", label="Tokenization and text preprocess")],
|
51 |
+
outputs=gr.Textbox(label="Recommended"))
|
52 |
demo.launch()
|
53 |
|
54 |
|