find_my_book / app.py
valeriedaash's picture
filtering added
99c7207
raw
history blame
10.1 kB
# БЕЗ ФИЛЬТРА КАТЕГОРИЙ
# import streamlit as st
# import pandas as pd
# import numpy as np
# import torch
# from transformers import AutoTokenizer, AutoModel
# from sklearn.metrics.pairwise import pairwise_distances, cosine_similarity
# import faiss
# tokenizer = AutoTokenizer.from_pretrained("cointegrated/rubert-tiny2")
# model = AutoModel.from_pretrained("cointegrated/rubert-tiny2")
# df = pd.read_csv('data_final.csv')
# MAX_LEN = 300
# def embed_bert_cls(text, model, tokenizer):
# t = tokenizer(text, padding=True, truncation=True, return_tensors='pt', max_length=MAX_LEN)
# with torch.no_grad():
# model_output = model(**{k: v.to(model.device) for k, v in t.items()})
# embeddings = model_output.last_hidden_state[:, 0, :]
# embeddings = torch.nn.functional.normalize(embeddings)
# return embeddings[0].cpu().numpy()
# books_embs = np.loadtxt('vectors.txt')
# index = faiss.IndexFlatIP(books_embs.shape[1])
# index.add(books_embs)
# st.title('Приложение для рекомендации книг')
# text = st.text_input('Введите запрос:')
# top_n = st.number_input('Введите количество рекомендаций:', min_value=1, max_value=50, value=1)
# recommend_button = st.button('Найти')
# if text and recommend_button:
# query_emb = embed_bert_cls(text, model, tokenizer)
# D, I = index.search(query_emb.reshape(1, -1), top_n)
# st.subheader('Топ рекомендуемых книг:')
# for i, j in zip(I[0], D[0]):
# col_1, col_2 = st.columns([1, 3])
# with col_1:
# st.image(df['image_url'][i], use_column_width=True)
# st.write(round(j* 100, 2))
# with col_2:
# st.write(f'Название книги: **{df["title"][i]}**')
# st.write(f'Автор: {df["author"][i]}')
# st.write(f'Ссылка: {df["page_url"][i]}')
# st.write(f'Аннотация: {df["annotation"][i]}')
# БЕЗ КЭШИРОВАНИЯ
# import streamlit as st
# import pandas as pd
# import numpy as np
# import torch
# from transformers import AutoTokenizer, AutoModel
# from sklearn.metrics.pairwise import pairwise_distances, cosine_similarity
# import faiss
# tokenizer = AutoTokenizer.from_pretrained("cointegrated/rubert-tiny2")
# model = AutoModel.from_pretrained("cointegrated/rubert-tiny2")
# df = pd.read_csv('data_final.csv')
# MAX_LEN = 300
# def embed_bert_cls(text, model, tokenizer):
# t = tokenizer(text, padding=True, truncation=True, return_tensors='pt', max_length=MAX_LEN)
# with torch.no_grad():
# model_output = model(**{k: v.to(model.device) for k, v in t.items()})
# embeddings = model_output.last_hidden_state[:, 0, :]
# embeddings = torch.nn.functional.normalize(embeddings)
# return embeddings[0].cpu().numpy()
# books_embs = np.loadtxt('vectors.txt')
# index = faiss.IndexFlatIP(books_embs.shape[1])
# index.add(books_embs)
# st.title('Приложение для рекомендации книг')
# # Добавляем опциональный фильтр для выбора категории книги
# category_filter = st.selectbox('Выберите категорию книги (необязательно)', ['Все'] + list(df['category'].unique()))
# text = st.text_input('Введите запрос:')
# top_n = st.number_input('Введите количество рекомендаций:', min_value=1, max_value=50, value=1)
# recommend_button = st.button('Найти')
# if text and recommend_button:
# query_emb = embed_bert_cls(text, model, tokenizer)
# D, I = index.search(query_emb.reshape(1, -1), top_n)
# st.subheader('Топ рекомендуемых книг:')
# for i, j in zip(I[0], D[0]):
# # Добавляем фильтрацию по выбранной категории книги, если выбрана конкретная категория
# if category_filter == 'Все' or df['category'][i] == category_filter:
# col_1, col_2 = st.columns([1, 3])
# with col_1:
# st.image(df['image_url'][i], use_column_width=True)
# st.write(round(j* 100, 2))
# with col_2:
# st.write(f'Название книги: **{df["title"][i]}**')
# st.write(f'Автор: {df["author"][i]}')
# st.write(f'Ссылка: {df["page_url"][i]}')
# st.write(f'Аннотация: {df["annotation"][i]}')
# import streamlit as st
# import pandas as pd
# import numpy as np
# import torch
# from transformers import AutoTokenizer, AutoModel
# from sklearn.metrics.pairwise import pairwise_distances, cosine_similarity
# import faiss
# tokenizer = AutoTokenizer.from_pretrained("cointegrated/rubert-tiny2")
# model = AutoModel.from_pretrained("cointegrated/rubert-tiny2")
# df = pd.read_csv('data_final.csv')
# MAX_LEN = 300
# # @st.cache(hash_funcs={tokenizers.Tokenizer: my_hash_func})
# def embed_bert_cls(text, model, tokenizer):
# t = tokenizer(text, padding=True, truncation=True, return_tensors='pt', max_length=MAX_LEN)
# with torch.no_grad():
# model_output = model(**{k: v.to(model.device) for k, v in t.items()})
# embeddings = model_output.last_hidden_state[:, 0, :]
# embeddings = torch.nn.functional.normalize(embeddings)
# return embeddings[0].cpu().numpy()
# @st.cache_data
# def load_faiss_index():
# books_embs = np.loadtxt('vectors.txt')
# index = faiss.IndexFlatIP(books_embs.shape[1])
# index.add(books_embs)
# return index
# st.title('Приложение для рекомендации книг')
# category_filter = st.selectbox('Выберите категорию книги (необязательно)', ['Все'] + list(df['category'].unique()))
# text = st.text_input('Введите запрос:')
# top_n = st.number_input('Введите количество рекомендаций:', min_value=1, max_value=50, value=1)
# recommend_button = st.button('Найти')
# if text and recommend_button:
# query_emb = embed_bert_cls(text, model, tokenizer)
# index = load_faiss_index()
# D, I = index.search(query_emb.reshape(1, -1), top_n)
# st.subheader('Топ рекомендуемых книг:')
# for i, j in zip(I[0], D[0]):
# if category_filter == 'Все' or df['category'][i] == category_filter:
# col_1, col_2 = st.columns([1, 3])
# with col_1:
# st.image(df['image_url'][i], use_column_width=True)
# st.write(round(j, 2))
# with col_2:
# st.write(f'Название книги: **{df["title"][i]}**')
# st.write(f'Автор: {df["author"][i]}')
# st.write(f'Ссылка: {df["page_url"][i]}')
# st.write(f'Аннотация: {df["annotation"][i]}')
import streamlit as st
import pandas as pd
import numpy as np
import torch
from transformers import AutoTokenizer, AutoModel
import faiss
tokenizer = AutoTokenizer.from_pretrained("cointegrated/rubert-tiny2")
model = AutoModel.from_pretrained("cointegrated/rubert-tiny2")
df = pd.read_csv('data_final.csv')
MAX_LEN = 300
@st.cache_data
def load_faiss_index():
books_embs = np.loadtxt('vectors.txt')
index = faiss.IndexFlatIP(books_embs.shape[1])
index.add(books_embs)
return index
def embed_bert_cls(text, model, tokenizer):
t = tokenizer(text, padding=True, truncation=True, return_tensors='pt', max_length=MAX_LEN)
with torch.no_grad():
model_output = model(**{k: v.to(model.device) for k, v in t.items()})
embeddings = model_output.last_hidden_state[:, 0, :]
embeddings = torch.nn.functional.normalize(embeddings)
return embeddings[0].cpu().numpy()
@st.cache_data()
def get_recommendations(query_emb, top_n):
index = load_faiss_index()
D, I = index.search(query_emb.reshape(1, -1), top_n)
recommendations = []
for i, j in zip(I[0], D[0]):
recommendation = {
'image_url': df['image_url'][i],
'title': df['title'][i],
'author': df['author'][i],
'page_url': df['page_url'][i],
'annotation': df['annotation'][i],
'category': df['category'][i],
'similarity_score': round(j, 2)
}
recommendations.append(recommendation)
return recommendations
st.title('Приложение для рекомендации книг')
category_filter = st.selectbox('Выберите категорию книги (необязательно)', ['Все'] + list(df['category'].unique()))
text = st.text_input('Введите запрос:')
top_n = st.number_input('Введите количество рекомендаций:', min_value=1, max_value=50, value=1)
recommend_button = st.button('Найти')
if text and recommend_button:
query_emb = embed_bert_cls(text, model, tokenizer)
recommendations = get_recommendations(query_emb, top_n)
if not recommendations: # Если рекомендации не найдены
st.write('По вашему запросу ничего не найдено.')
else:
st.subheader('Топ рекомендуемых книг:')
for recommendation in recommendations:
if category_filter == 'Все' or recommendation['category'] == category_filter:
col_1, col_2 = st.columns([1, 3])
with col_1:
st.image(recommendation['image_url'], use_column_width=True)
st.write(recommendation['similarity_score'])
with col_2:
st.write(f'Название книги: **{recommendation["title"]}**')
st.write(f'Автор: {recommendation["author"]}')
st.write(f'Ссылка: {recommendation["page_url"]}')
st.write(f'Аннотация: {recommendation["annotation"]}')
st.write(f'Категория: {recommendation["category"]}')