amirhosseinkarami commited on
Commit
e12639d
·
1 Parent(s): 6010e28

Simple Recommender complete

Browse files
Files changed (3) hide show
  1. App/app.py +0 -0
  2. App/tfidfrecommender.py +4 -1
  3. app.py +26 -5
App/app.py DELETED
File without changes
App/tfidfrecommender.py CHANGED
@@ -82,7 +82,7 @@ class TfidfRecommender :
82
  lambda x: self.__clean_text(x, for_BERT)
83
  )
84
 
85
- def tokenize_text (self, ngram_range=(1, 3), min_df=0) :
86
  """Tokenize the input text.
87
 
88
  Args:
@@ -181,11 +181,14 @@ class TfidfRecommender :
181
  return self.stop_words
182
 
183
  def recommend_k_items (self, title, k) :
 
184
  idx = self.df[self.df['title'] == title].index[0]
 
185
  cosine_sim = cosine_similarity(self.tfidf_matrix[int(idx)], self.tfidf_matrix)
186
  similarity_scores = list(enumerate(cosine_sim[0]))
187
  similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
188
  similarity_scores = similarity_scores[1: k + 1]
 
189
  movie_indices = [i[0] for i in similarity_scores]
190
  return self.df.iloc[movie_indices]['id']
191
 
 
82
  lambda x: self.__clean_text(x, for_BERT)
83
  )
84
 
85
+ def tokenize_text (self, ngram_range=(1, 3), min_df=0.0) :
86
  """Tokenize the input text.
87
 
88
  Args:
 
181
  return self.stop_words
182
 
183
  def recommend_k_items (self, title, k) :
184
+ print("jjj")
185
  idx = self.df[self.df['title'] == title].index[0]
186
+ print("ppp")
187
  cosine_sim = cosine_similarity(self.tfidf_matrix[int(idx)], self.tfidf_matrix)
188
  similarity_scores = list(enumerate(cosine_sim[0]))
189
  similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
190
  similarity_scores = similarity_scores[1: k + 1]
191
+ print("lol")
192
  movie_indices = [i[0] for i in similarity_scores]
193
  return self.df.iloc[movie_indices]['id']
194
 
app.py CHANGED
@@ -10,24 +10,45 @@ desc = pd.read_csv('App/data/descriptions.csv')
10
 
11
  rec = TfidfRecommender(desc, 'id', 'description' , "none")
12
  def initialize_and_tokenize(tokenizer):
 
13
  rec.tokenization_method = tokenizer
14
  rec.tokenize_text()
15
 
16
  names = []
17
  def recommend (movies, tok) :
18
- initialize_and_tokenize(tok)
 
 
 
19
  pool = concurrent.futures.ThreadPoolExecutor(max_workers=10)
20
  futures = [pool.submit(rec.recommend_k_items, movie, 5) for movie in movies]
21
- idss = [f.result() for f in futures]
 
 
 
 
 
 
22
  ids = [id for ids in idss for id in ids]
23
  ids = list(set(ids))
24
  names = desc[desc['id'].isin(ids)]['title'].to_list()
25
  return ', '.join(names)
26
 
27
- demo = gr.Interface(fn=recommend,
28
- inputs=[gr.Dropdown(choices = list(desc['title']), multiselect=True, max_choices=3, label="Movies"),
 
 
 
 
 
 
 
 
 
 
 
29
  gr.Radio(["bert", "scibert", "nltk" , "none"], value="none", label="Tokenization and text preprocess")],
30
- outputs=gr.Textbox())
31
  demo.launch()
32
 
33
 
 
10
 
11
  rec = TfidfRecommender(desc, 'id', 'description' , "none")
12
  def initialize_and_tokenize(tokenizer):
13
+ print("tok")
14
  rec.tokenization_method = tokenizer
15
  rec.tokenize_text()
16
 
17
  names = []
18
  def recommend (movies, tok) :
19
+ rec.tokenization_method = tok
20
+ tf, vecs = rec.tokenize_text()
21
+ rec.fit(tf, vecs)
22
+ print("rec")
23
  pool = concurrent.futures.ThreadPoolExecutor(max_workers=10)
24
  futures = [pool.submit(rec.recommend_k_items, movie, 5) for movie in movies]
25
+ idss = []
26
+ print("after submit")
27
+ for i in range(len(futures)):
28
+ print("res")
29
+ idss.append(futures[i].result())
30
+ print("shutdown")
31
+ pool.shutdown(wait=True)
32
  ids = [id for ids in idss for id in ids]
33
  ids = list(set(ids))
34
  names = desc[desc['id'].isin(ids)]['title'].to_list()
35
  return ', '.join(names)
36
 
37
+ def recom(movies, tok):
38
+ rec.tokenization_method = tok
39
+ tf, vecs = rec.tokenize_text()
40
+ rec.fit(tf, vecs)
41
+ print(movies[0])
42
+ ids = rec.recommend_k_items(movies[0], 5)
43
+ print("reccc")
44
+ # ids = list(set(ids))
45
+ names = desc[desc['id'].isin(ids)]['title'].to_list()
46
+ return ', '.join(names)
47
+
48
+ demo = gr.Interface(fn=recom,
49
+ inputs=[gr.Dropdown(choices = list(desc['title'][:20]), multiselect=True, max_choices=3, label="Movies"),
50
  gr.Radio(["bert", "scibert", "nltk" , "none"], value="none", label="Tokenization and text preprocess")],
51
+ outputs=gr.Textbox(label="Recommended"))
52
  demo.launch()
53
 
54