valeriedaash commited on
Commit
2d3924f
·
1 Parent(s): 99c7207
Files changed (1) hide show
  1. app.py +101 -100
app.py CHANGED
@@ -109,71 +109,13 @@
109
  # st.write(f'Автор: {df["author"][i]}')
110
  # st.write(f'Ссылка: {df["page_url"][i]}')
111
  # st.write(f'Аннотация: {df["annotation"][i]}')
112
- # import streamlit as st
113
- # import pandas as pd
114
- # import numpy as np
115
- # import torch
116
- # from transformers import AutoTokenizer, AutoModel
117
- # from sklearn.metrics.pairwise import pairwise_distances, cosine_similarity
118
- # import faiss
119
-
120
- # tokenizer = AutoTokenizer.from_pretrained("cointegrated/rubert-tiny2")
121
- # model = AutoModel.from_pretrained("cointegrated/rubert-tiny2")
122
-
123
- # df = pd.read_csv('data_final.csv')
124
-
125
- # MAX_LEN = 300
126
-
127
- # # @st.cache(hash_funcs={tokenizers.Tokenizer: my_hash_func})
128
- # def embed_bert_cls(text, model, tokenizer):
129
- # t = tokenizer(text, padding=True, truncation=True, return_tensors='pt', max_length=MAX_LEN)
130
- # with torch.no_grad():
131
- # model_output = model(**{k: v.to(model.device) for k, v in t.items()})
132
- # embeddings = model_output.last_hidden_state[:, 0, :]
133
- # embeddings = torch.nn.functional.normalize(embeddings)
134
- # return embeddings[0].cpu().numpy()
135
-
136
- # @st.cache_data
137
- # def load_faiss_index():
138
- # books_embs = np.loadtxt('vectors.txt')
139
- # index = faiss.IndexFlatIP(books_embs.shape[1])
140
- # index.add(books_embs)
141
- # return index
142
-
143
- # st.title('Приложение для рекомендации книг')
144
-
145
- # category_filter = st.selectbox('Выберите категорию книги (необязательно)', ['Все'] + list(df['category'].unique()))
146
-
147
- # text = st.text_input('Введите запрос:')
148
- # top_n = st.number_input('Введите количество рекомендаций:', min_value=1, max_value=50, value=1)
149
-
150
- # recommend_button = st.button('Найти')
151
-
152
- # if text and recommend_button:
153
- # query_emb = embed_bert_cls(text, model, tokenizer)
154
- # index = load_faiss_index()
155
- # D, I = index.search(query_emb.reshape(1, -1), top_n)
156
-
157
- # st.subheader('Топ рекомендуемых книг:')
158
-
159
- # for i, j in zip(I[0], D[0]):
160
- # if category_filter == 'Все' or df['category'][i] == category_filter:
161
- # col_1, col_2 = st.columns([1, 3])
162
-
163
- # with col_1:
164
- # st.image(df['image_url'][i], use_column_width=True)
165
- # st.write(round(j, 2))
166
- # with col_2:
167
- # st.write(f'Название книги: **{df["title"][i]}**')
168
- # st.write(f'Автор: {df["author"][i]}')
169
- # st.write(f'Ссылка: {df["page_url"][i]}')
170
- # st.write(f'Аннотация: {df["annotation"][i]}')
171
 
172
  import streamlit as st
173
  import pandas as pd
174
  import numpy as np
175
  import torch
176
  from transformers import AutoTokenizer, AutoModel
 
177
  import faiss
178
 
179
  tokenizer = AutoTokenizer.from_pretrained("cointegrated/rubert-tiny2")
@@ -182,13 +124,8 @@ model = AutoModel.from_pretrained("cointegrated/rubert-tiny2")
182
  df = pd.read_csv('data_final.csv')
183
 
184
  MAX_LEN = 300
185
- @st.cache_data
186
- def load_faiss_index():
187
- books_embs = np.loadtxt('vectors.txt')
188
- index = faiss.IndexFlatIP(books_embs.shape[1])
189
- index.add(books_embs)
190
- return index
191
 
 
192
  def embed_bert_cls(text, model, tokenizer):
193
  t = tokenizer(text, padding=True, truncation=True, return_tensors='pt', max_length=MAX_LEN)
194
  with torch.no_grad():
@@ -197,23 +134,12 @@ def embed_bert_cls(text, model, tokenizer):
197
  embeddings = torch.nn.functional.normalize(embeddings)
198
  return embeddings[0].cpu().numpy()
199
 
200
- @st.cache_data()
201
- def get_recommendations(query_emb, top_n):
202
- index = load_faiss_index()
203
- D, I = index.search(query_emb.reshape(1, -1), top_n)
204
- recommendations = []
205
- for i, j in zip(I[0], D[0]):
206
- recommendation = {
207
- 'image_url': df['image_url'][i],
208
- 'title': df['title'][i],
209
- 'author': df['author'][i],
210
- 'page_url': df['page_url'][i],
211
- 'annotation': df['annotation'][i],
212
- 'category': df['category'][i],
213
- 'similarity_score': round(j, 2)
214
- }
215
- recommendations.append(recommendation)
216
- return recommendations
217
 
218
  st.title('Приложение для рекомендации книг')
219
 
@@ -226,22 +152,97 @@ recommend_button = st.button('Найти')
226
 
227
  if text and recommend_button:
228
  query_emb = embed_bert_cls(text, model, tokenizer)
229
- recommendations = get_recommendations(query_emb, top_n)
230
-
231
- if not recommendations: # Если рекомендации не найдены
232
- st.write('По вашему запросу ничего не найдено.')
233
- else:
234
- st.subheader('Топ рекомендуемых книг:')
235
- for recommendation in recommendations:
236
- if category_filter == 'Все' or recommendation['category'] == category_filter:
237
- col_1, col_2 = st.columns([1, 3])
238
- with col_1:
239
- st.image(recommendation['image_url'], use_column_width=True)
240
- st.write(recommendation['similarity_score'])
241
- with col_2:
242
- st.write(f'Название книги: **{recommendation["title"]}**')
243
- st.write(f'Автор: {recommendation["author"]}')
244
- st.write(f'Ссылка: {recommendation["page_url"]}')
245
- st.write(f'Аннотация: {recommendation["annotation"]}')
246
- st.write(f'Категория: {recommendation["category"]}')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
 
 
109
  # st.write(f'Автор: {df["author"][i]}')
110
  # st.write(f'Ссылка: {df["page_url"][i]}')
111
  # st.write(f'Аннотация: {df["annotation"][i]}')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
  import streamlit as st
114
  import pandas as pd
115
  import numpy as np
116
  import torch
117
  from transformers import AutoTokenizer, AutoModel
118
+ from sklearn.metrics.pairwise import pairwise_distances, cosine_similarity
119
  import faiss
120
 
121
  tokenizer = AutoTokenizer.from_pretrained("cointegrated/rubert-tiny2")
 
124
  df = pd.read_csv('data_final.csv')
125
 
126
  MAX_LEN = 300
 
 
 
 
 
 
127
 
128
+ # @st.cache(hash_funcs={tokenizers.Tokenizer: my_hash_func})
129
  def embed_bert_cls(text, model, tokenizer):
130
  t = tokenizer(text, padding=True, truncation=True, return_tensors='pt', max_length=MAX_LEN)
131
  with torch.no_grad():
 
134
  embeddings = torch.nn.functional.normalize(embeddings)
135
  return embeddings[0].cpu().numpy()
136
 
137
+ @st.cache_data
138
+ def load_faiss_index():
139
+ books_embs = np.loadtxt('vectors.txt')
140
+ index = faiss.IndexFlatIP(books_embs.shape[1])
141
+ index.add(books_embs)
142
+ return index
 
 
 
 
 
 
 
 
 
 
 
143
 
144
  st.title('Приложение для рекомендации книг')
145
 
 
152
 
153
  if text and recommend_button:
154
  query_emb = embed_bert_cls(text, model, tokenizer)
155
+ index = load_faiss_index()
156
+ D, I = index.search(query_emb.reshape(1, -1), top_n)
157
+
158
+ st.subheader('Топ рекомендуемых книг:')
159
+
160
+ for i, j in zip(I[0], D[0]):
161
+ if category_filter == 'Все' or df['category'][i] == category_filter:
162
+ col_1, col_2 = st.columns([1, 3])
163
+
164
+ with col_1:
165
+ st.image(df['image_url'][i], use_column_width=True)
166
+ st.write(round(j, 2))
167
+ with col_2:
168
+ st.write(f'Название книги: **{df["title"][i]}**')
169
+ st.write(f'Автор: {df["author"][i]}')
170
+ st.write(f'Ссылка: {df["page_url"][i]}')
171
+ st.write(f'Аннотация: {df["annotation"][i]}')
172
+
173
+ # import streamlit as st
174
+ # import pandas as pd
175
+ # import numpy as np
176
+ # import torch
177
+ # from transformers import AutoTokenizer, AutoModel
178
+ # import faiss
179
+
180
+ # tokenizer = AutoTokenizer.from_pretrained("cointegrated/rubert-tiny2")
181
+ # model = AutoModel.from_pretrained("cointegrated/rubert-tiny2")
182
+
183
+ # df = pd.read_csv('data_final.csv')
184
+
185
+ # MAX_LEN = 300
186
+ # @st.cache_data
187
+ # def load_faiss_index():
188
+ # books_embs = np.loadtxt('vectors.txt')
189
+ # index = faiss.IndexFlatIP(books_embs.shape[1])
190
+ # index.add(books_embs)
191
+ # return index
192
+
193
+ # def embed_bert_cls(text, model, tokenizer):
194
+ # t = tokenizer(text, padding=True, truncation=True, return_tensors='pt', max_length=MAX_LEN)
195
+ # with torch.no_grad():
196
+ # model_output = model(**{k: v.to(model.device) for k, v in t.items()})
197
+ # embeddings = model_output.last_hidden_state[:, 0, :]
198
+ # embeddings = torch.nn.functional.normalize(embeddings)
199
+ # return embeddings[0].cpu().numpy()
200
+
201
+ # @st.cache_data()
202
+ # def get_recommendations(query_emb, top_n):
203
+ # index = load_faiss_index()
204
+ # D, I = index.search(query_emb.reshape(1, -1), top_n)
205
+ # recommendations = []
206
+ # for i, j in zip(I[0], D[0]):
207
+ # recommendation = {
208
+ # 'image_url': df['image_url'][i],
209
+ # 'title': df['title'][i],
210
+ # 'author': df['author'][i],
211
+ # 'page_url': df['page_url'][i],
212
+ # 'annotation': df['annotation'][i],
213
+ # 'category': df['category'][i],
214
+ # 'similarity_score': round(j, 2)
215
+ # }
216
+ # recommendations.append(recommendation)
217
+ # return recommendations
218
+
219
+ # st.title('Приложение для рекомендации книг')
220
+
221
+ # category_filter = st.selectbox('Выберите категорию книги (необязательно)', ['Все'] + list(df['category'].unique()))
222
+
223
+ # text = st.text_input('Введите запрос:')
224
+ # top_n = st.number_input('Введите количество рекомендаций:', min_value=1, max_value=50, value=1)
225
+
226
+ # recommend_button = st.button('Найти')
227
+
228
+ # if text and recommend_button:
229
+ # query_emb = embed_bert_cls(text, model, tokenizer)
230
+ # recommendations = get_recommendations(query_emb, top_n)
231
+
232
+ # if not recommendations: # Если рекомендации не найдены
233
+ # st.write('По вашему запросу ничего не найдено.')
234
+ # else:
235
+ # st.subheader('Топ рекомендуемых книг:')
236
+ # for recommendation in recommendations:
237
+ # if category_filter == 'Все' or recommendation['category'] == category_filter:
238
+ # col_1, col_2 = st.columns([1, 3])
239
+ # with col_1:
240
+ # st.image(recommendation['image_url'], use_column_width=True)
241
+ # st.write(recommendation['similarity_score'])
242
+ # with col_2:
243
+ # st.write(f'Название книги: **{recommendation["title"]}**')
244
+ # st.write(f'Автор: {recommendation["author"]}')
245
+ # st.write(f'Ссылка: {recommendation["page_url"]}')
246
+ # st.write(f'Аннотация: {recommendation["annotation"]}')
247
+ # st.write(f'Категория: {recommendation["category"]}')
248