Spaces:
Sleeping
Sleeping
Commit
·
2d3924f
1
Parent(s):
99c7207
updated
Browse files
app.py
CHANGED
@@ -109,71 +109,13 @@
|
|
109 |
# st.write(f'Автор: {df["author"][i]}')
|
110 |
# st.write(f'Ссылка: {df["page_url"][i]}')
|
111 |
# st.write(f'Аннотация: {df["annotation"][i]}')
|
112 |
-
# import streamlit as st
|
113 |
-
# import pandas as pd
|
114 |
-
# import numpy as np
|
115 |
-
# import torch
|
116 |
-
# from transformers import AutoTokenizer, AutoModel
|
117 |
-
# from sklearn.metrics.pairwise import pairwise_distances, cosine_similarity
|
118 |
-
# import faiss
|
119 |
-
|
120 |
-
# tokenizer = AutoTokenizer.from_pretrained("cointegrated/rubert-tiny2")
|
121 |
-
# model = AutoModel.from_pretrained("cointegrated/rubert-tiny2")
|
122 |
-
|
123 |
-
# df = pd.read_csv('data_final.csv')
|
124 |
-
|
125 |
-
# MAX_LEN = 300
|
126 |
-
|
127 |
-
# # @st.cache(hash_funcs={tokenizers.Tokenizer: my_hash_func})
|
128 |
-
# def embed_bert_cls(text, model, tokenizer):
|
129 |
-
# t = tokenizer(text, padding=True, truncation=True, return_tensors='pt', max_length=MAX_LEN)
|
130 |
-
# with torch.no_grad():
|
131 |
-
# model_output = model(**{k: v.to(model.device) for k, v in t.items()})
|
132 |
-
# embeddings = model_output.last_hidden_state[:, 0, :]
|
133 |
-
# embeddings = torch.nn.functional.normalize(embeddings)
|
134 |
-
# return embeddings[0].cpu().numpy()
|
135 |
-
|
136 |
-
# @st.cache_data
|
137 |
-
# def load_faiss_index():
|
138 |
-
# books_embs = np.loadtxt('vectors.txt')
|
139 |
-
# index = faiss.IndexFlatIP(books_embs.shape[1])
|
140 |
-
# index.add(books_embs)
|
141 |
-
# return index
|
142 |
-
|
143 |
-
# st.title('Приложение для рекомендации книг')
|
144 |
-
|
145 |
-
# category_filter = st.selectbox('Выберите категорию книги (необязательно)', ['Все'] + list(df['category'].unique()))
|
146 |
-
|
147 |
-
# text = st.text_input('Введите запрос:')
|
148 |
-
# top_n = st.number_input('Введите количество рекомендаций:', min_value=1, max_value=50, value=1)
|
149 |
-
|
150 |
-
# recommend_button = st.button('Найти')
|
151 |
-
|
152 |
-
# if text and recommend_button:
|
153 |
-
# query_emb = embed_bert_cls(text, model, tokenizer)
|
154 |
-
# index = load_faiss_index()
|
155 |
-
# D, I = index.search(query_emb.reshape(1, -1), top_n)
|
156 |
-
|
157 |
-
# st.subheader('Топ рекомендуемых книг:')
|
158 |
-
|
159 |
-
# for i, j in zip(I[0], D[0]):
|
160 |
-
# if category_filter == 'Все' or df['category'][i] == category_filter:
|
161 |
-
# col_1, col_2 = st.columns([1, 3])
|
162 |
-
|
163 |
-
# with col_1:
|
164 |
-
# st.image(df['image_url'][i], use_column_width=True)
|
165 |
-
# st.write(round(j, 2))
|
166 |
-
# with col_2:
|
167 |
-
# st.write(f'Название книги: **{df["title"][i]}**')
|
168 |
-
# st.write(f'Автор: {df["author"][i]}')
|
169 |
-
# st.write(f'Ссылка: {df["page_url"][i]}')
|
170 |
-
# st.write(f'Аннотация: {df["annotation"][i]}')
|
171 |
|
172 |
import streamlit as st
|
173 |
import pandas as pd
|
174 |
import numpy as np
|
175 |
import torch
|
176 |
from transformers import AutoTokenizer, AutoModel
|
|
|
177 |
import faiss
|
178 |
|
179 |
tokenizer = AutoTokenizer.from_pretrained("cointegrated/rubert-tiny2")
|
@@ -182,13 +124,8 @@ model = AutoModel.from_pretrained("cointegrated/rubert-tiny2")
|
|
182 |
df = pd.read_csv('data_final.csv')
|
183 |
|
184 |
MAX_LEN = 300
|
185 |
-
@st.cache_data
|
186 |
-
def load_faiss_index():
|
187 |
-
books_embs = np.loadtxt('vectors.txt')
|
188 |
-
index = faiss.IndexFlatIP(books_embs.shape[1])
|
189 |
-
index.add(books_embs)
|
190 |
-
return index
|
191 |
|
|
|
192 |
def embed_bert_cls(text, model, tokenizer):
|
193 |
t = tokenizer(text, padding=True, truncation=True, return_tensors='pt', max_length=MAX_LEN)
|
194 |
with torch.no_grad():
|
@@ -197,23 +134,12 @@ def embed_bert_cls(text, model, tokenizer):
|
|
197 |
embeddings = torch.nn.functional.normalize(embeddings)
|
198 |
return embeddings[0].cpu().numpy()
|
199 |
|
200 |
-
@st.cache_data
|
201 |
-
def
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
recommendation = {
|
207 |
-
'image_url': df['image_url'][i],
|
208 |
-
'title': df['title'][i],
|
209 |
-
'author': df['author'][i],
|
210 |
-
'page_url': df['page_url'][i],
|
211 |
-
'annotation': df['annotation'][i],
|
212 |
-
'category': df['category'][i],
|
213 |
-
'similarity_score': round(j, 2)
|
214 |
-
}
|
215 |
-
recommendations.append(recommendation)
|
216 |
-
return recommendations
|
217 |
|
218 |
st.title('Приложение для рекомендации книг')
|
219 |
|
@@ -226,22 +152,97 @@ recommend_button = st.button('Найти')
|
|
226 |
|
227 |
if text and recommend_button:
|
228 |
query_emb = embed_bert_cls(text, model, tokenizer)
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
247 |
|
|
|
109 |
# st.write(f'Автор: {df["author"][i]}')
|
110 |
# st.write(f'Ссылка: {df["page_url"][i]}')
|
111 |
# st.write(f'Аннотация: {df["annotation"][i]}')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
|
113 |
import streamlit as st
|
114 |
import pandas as pd
|
115 |
import numpy as np
|
116 |
import torch
|
117 |
from transformers import AutoTokenizer, AutoModel
|
118 |
+
from sklearn.metrics.pairwise import pairwise_distances, cosine_similarity
|
119 |
import faiss
|
120 |
|
121 |
tokenizer = AutoTokenizer.from_pretrained("cointegrated/rubert-tiny2")
|
|
|
124 |
df = pd.read_csv('data_final.csv')
|
125 |
|
126 |
MAX_LEN = 300
|
|
|
|
|
|
|
|
|
|
|
|
|
127 |
|
128 |
+
# @st.cache(hash_funcs={tokenizers.Tokenizer: my_hash_func})
|
129 |
def embed_bert_cls(text, model, tokenizer):
|
130 |
t = tokenizer(text, padding=True, truncation=True, return_tensors='pt', max_length=MAX_LEN)
|
131 |
with torch.no_grad():
|
|
|
134 |
embeddings = torch.nn.functional.normalize(embeddings)
|
135 |
return embeddings[0].cpu().numpy()
|
136 |
|
137 |
+
@st.cache_data
|
138 |
+
def load_faiss_index():
|
139 |
+
books_embs = np.loadtxt('vectors.txt')
|
140 |
+
index = faiss.IndexFlatIP(books_embs.shape[1])
|
141 |
+
index.add(books_embs)
|
142 |
+
return index
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
|
144 |
st.title('Приложение для рекомендации книг')
|
145 |
|
|
|
152 |
|
153 |
if text and recommend_button:
|
154 |
query_emb = embed_bert_cls(text, model, tokenizer)
|
155 |
+
index = load_faiss_index()
|
156 |
+
D, I = index.search(query_emb.reshape(1, -1), top_n)
|
157 |
+
|
158 |
+
st.subheader('Топ рекомендуемых книг:')
|
159 |
+
|
160 |
+
for i, j in zip(I[0], D[0]):
|
161 |
+
if category_filter == 'Все' or df['category'][i] == category_filter:
|
162 |
+
col_1, col_2 = st.columns([1, 3])
|
163 |
+
|
164 |
+
with col_1:
|
165 |
+
st.image(df['image_url'][i], use_column_width=True)
|
166 |
+
st.write(round(j, 2))
|
167 |
+
with col_2:
|
168 |
+
st.write(f'Название книги: **{df["title"][i]}**')
|
169 |
+
st.write(f'Автор: {df["author"][i]}')
|
170 |
+
st.write(f'Ссылка: {df["page_url"][i]}')
|
171 |
+
st.write(f'Аннотация: {df["annotation"][i]}')
|
172 |
+
|
173 |
+
# import streamlit as st
|
174 |
+
# import pandas as pd
|
175 |
+
# import numpy as np
|
176 |
+
# import torch
|
177 |
+
# from transformers import AutoTokenizer, AutoModel
|
178 |
+
# import faiss
|
179 |
+
|
180 |
+
# tokenizer = AutoTokenizer.from_pretrained("cointegrated/rubert-tiny2")
|
181 |
+
# model = AutoModel.from_pretrained("cointegrated/rubert-tiny2")
|
182 |
+
|
183 |
+
# df = pd.read_csv('data_final.csv')
|
184 |
+
|
185 |
+
# MAX_LEN = 300
|
186 |
+
# @st.cache_data
|
187 |
+
# def load_faiss_index():
|
188 |
+
# books_embs = np.loadtxt('vectors.txt')
|
189 |
+
# index = faiss.IndexFlatIP(books_embs.shape[1])
|
190 |
+
# index.add(books_embs)
|
191 |
+
# return index
|
192 |
+
|
193 |
+
# def embed_bert_cls(text, model, tokenizer):
|
194 |
+
# t = tokenizer(text, padding=True, truncation=True, return_tensors='pt', max_length=MAX_LEN)
|
195 |
+
# with torch.no_grad():
|
196 |
+
# model_output = model(**{k: v.to(model.device) for k, v in t.items()})
|
197 |
+
# embeddings = model_output.last_hidden_state[:, 0, :]
|
198 |
+
# embeddings = torch.nn.functional.normalize(embeddings)
|
199 |
+
# return embeddings[0].cpu().numpy()
|
200 |
+
|
201 |
+
# @st.cache_data()
|
202 |
+
# def get_recommendations(query_emb, top_n):
|
203 |
+
# index = load_faiss_index()
|
204 |
+
# D, I = index.search(query_emb.reshape(1, -1), top_n)
|
205 |
+
# recommendations = []
|
206 |
+
# for i, j in zip(I[0], D[0]):
|
207 |
+
# recommendation = {
|
208 |
+
# 'image_url': df['image_url'][i],
|
209 |
+
# 'title': df['title'][i],
|
210 |
+
# 'author': df['author'][i],
|
211 |
+
# 'page_url': df['page_url'][i],
|
212 |
+
# 'annotation': df['annotation'][i],
|
213 |
+
# 'category': df['category'][i],
|
214 |
+
# 'similarity_score': round(j, 2)
|
215 |
+
# }
|
216 |
+
# recommendations.append(recommendation)
|
217 |
+
# return recommendations
|
218 |
+
|
219 |
+
# st.title('Приложение для рекомендации книг')
|
220 |
+
|
221 |
+
# category_filter = st.selectbox('Выберите категорию книги (необязательно)', ['Все'] + list(df['category'].unique()))
|
222 |
+
|
223 |
+
# text = st.text_input('Введите запрос:')
|
224 |
+
# top_n = st.number_input('Введите количество рекомендаций:', min_value=1, max_value=50, value=1)
|
225 |
+
|
226 |
+
# recommend_button = st.button('Найти')
|
227 |
+
|
228 |
+
# if text and recommend_button:
|
229 |
+
# query_emb = embed_bert_cls(text, model, tokenizer)
|
230 |
+
# recommendations = get_recommendations(query_emb, top_n)
|
231 |
+
|
232 |
+
# if not recommendations: # Если рекомендации не найдены
|
233 |
+
# st.write('По вашему запросу ничего не найдено.')
|
234 |
+
# else:
|
235 |
+
# st.subheader('Топ рекомендуемых книг:')
|
236 |
+
# for recommendation in recommendations:
|
237 |
+
# if category_filter == 'Все' or recommendation['category'] == category_filter:
|
238 |
+
# col_1, col_2 = st.columns([1, 3])
|
239 |
+
# with col_1:
|
240 |
+
# st.image(recommendation['image_url'], use_column_width=True)
|
241 |
+
# st.write(recommendation['similarity_score'])
|
242 |
+
# with col_2:
|
243 |
+
# st.write(f'Название книги: **{recommendation["title"]}**')
|
244 |
+
# st.write(f'Автор: {recommendation["author"]}')
|
245 |
+
# st.write(f'Ссылка: {recommendation["page_url"]}')
|
246 |
+
# st.write(f'Аннотация: {recommendation["annotation"]}')
|
247 |
+
# st.write(f'Категория: {recommendation["category"]}')
|
248 |
|