# БЕЗ ФИЛЬТРА КАТЕГОРИЙ # import streamlit as st # import pandas as pd # import numpy as np # import torch # from transformers import AutoTokenizer, AutoModel # from sklearn.metrics.pairwise import pairwise_distances, cosine_similarity # import faiss # tokenizer = AutoTokenizer.from_pretrained("cointegrated/rubert-tiny2") # model = AutoModel.from_pretrained("cointegrated/rubert-tiny2") # df = pd.read_csv('data_final.csv') # MAX_LEN = 300 # def embed_bert_cls(text, model, tokenizer): # t = tokenizer(text, padding=True, truncation=True, return_tensors='pt', max_length=MAX_LEN) # with torch.no_grad(): # model_output = model(**{k: v.to(model.device) for k, v in t.items()}) # embeddings = model_output.last_hidden_state[:, 0, :] # embeddings = torch.nn.functional.normalize(embeddings) # return embeddings[0].cpu().numpy() # books_embs = np.loadtxt('vectors.txt') # index = faiss.IndexFlatIP(books_embs.shape[1]) # index.add(books_embs) # st.title('Приложение для рекомендации книг') # text = st.text_input('Введите запрос:') # top_n = st.number_input('Введите количество рекомендаций:', min_value=1, max_value=50, value=1) # recommend_button = st.button('Найти') # if text and recommend_button: # query_emb = embed_bert_cls(text, model, tokenizer) # D, I = index.search(query_emb.reshape(1, -1), top_n) # st.subheader('Топ рекомендуемых книг:') # for i, j in zip(I[0], D[0]): # col_1, col_2 = st.columns([1, 3]) # with col_1: # st.image(df['image_url'][i], use_column_width=True) # st.write(round(j* 100, 2)) # with col_2: # st.write(f'Название книги: **{df["title"][i]}**') # st.write(f'Автор: {df["author"][i]}') # st.write(f'Ссылка: {df["page_url"][i]}') # st.write(f'Аннотация: {df["annotation"][i]}') # БЕЗ КЭШИРОВАНИЯ # import streamlit as st # import pandas as pd # import numpy as np # import torch # from transformers import AutoTokenizer, AutoModel # from sklearn.metrics.pairwise import pairwise_distances, cosine_similarity # import faiss # tokenizer = AutoTokenizer.from_pretrained("cointegrated/rubert-tiny2") # model = AutoModel.from_pretrained("cointegrated/rubert-tiny2") # df = pd.read_csv('data_final.csv') # MAX_LEN = 300 # def embed_bert_cls(text, model, tokenizer): # t = tokenizer(text, padding=True, truncation=True, return_tensors='pt', max_length=MAX_LEN) # with torch.no_grad(): # model_output = model(**{k: v.to(model.device) for k, v in t.items()}) # embeddings = model_output.last_hidden_state[:, 0, :] # embeddings = torch.nn.functional.normalize(embeddings) # return embeddings[0].cpu().numpy() # books_embs = np.loadtxt('vectors.txt') # index = faiss.IndexFlatIP(books_embs.shape[1]) # index.add(books_embs) # st.title('Приложение для рекомендации книг') # # Добавляем опциональный фильтр для выбора категории книги # category_filter = st.selectbox('Выберите категорию книги (необязательно)', ['Все'] + list(df['category'].unique())) # text = st.text_input('Введите запрос:') # top_n = st.number_input('Введите количество рекомендаций:', min_value=1, max_value=50, value=1) # recommend_button = st.button('Найти') # if text and recommend_button: # query_emb = embed_bert_cls(text, model, tokenizer) # D, I = index.search(query_emb.reshape(1, -1), top_n) # st.subheader('Топ рекомендуемых книг:') # for i, j in zip(I[0], D[0]): # # Добавляем фильтрацию по выбранной категории книги, если выбрана конкретная категория # if category_filter == 'Все' or df['category'][i] == category_filter: # col_1, col_2 = st.columns([1, 3]) # with col_1: # st.image(df['image_url'][i], use_column_width=True) # st.write(round(j* 100, 2)) # with col_2: # st.write(f'Название книги: **{df["title"][i]}**') # st.write(f'Автор: {df["author"][i]}') # st.write(f'Ссылка: {df["page_url"][i]}') # st.write(f'Аннотация: {df["annotation"][i]}') import streamlit as st import pandas as pd import numpy as np import torch from transformers import AutoTokenizer, AutoModel from sklearn.metrics.pairwise import pairwise_distances, cosine_similarity import faiss tokenizer = AutoTokenizer.from_pretrained("cointegrated/rubert-tiny2") model = AutoModel.from_pretrained("cointegrated/rubert-tiny2") df = pd.read_csv('data_final.csv') MAX_LEN = 300 # @st.cache(hash_funcs={tokenizers.Tokenizer: my_hash_func}) def embed_bert_cls(text, model, tokenizer): t = tokenizer(text, padding=True, truncation=True, return_tensors='pt', max_length=MAX_LEN) with torch.no_grad(): model_output = model(**{k: v.to(model.device) for k, v in t.items()}) embeddings = model_output.last_hidden_state[:, 0, :] embeddings = torch.nn.functional.normalize(embeddings) return embeddings[0].cpu().numpy() @st.cache_data def load_faiss_index(): books_embs = np.loadtxt('vectors.txt') index = faiss.IndexFlatIP(books_embs.shape[1]) index.add(books_embs) return index st.title('Приложение для рекомендации книг') category_filter = st.selectbox('Выберите категорию книги (необязательно)', ['Все'] + list(df['category'].unique())) text = st.text_input('Введите запрос:') top_n = st.number_input('Введите количество рекомендаций:', min_value=1, max_value=50, value=1) recommend_button = st.button('Найти') if text and recommend_button: query_emb = embed_bert_cls(text, model, tokenizer) index = load_faiss_index() D, I = index.search(query_emb.reshape(1, -1), top_n) st.subheader('Топ рекомендуемых книг:') for i, j in zip(I[0], D[0]): if category_filter == 'Все' or df['category'][i] == category_filter: col_1, col_2 = st.columns([1, 3]) with col_1: st.image(df['image_url'][i], use_column_width=True) st.write(round(j, 2)) with col_2: st.write(f'Название книги: **{df["title"][i]}**') st.write(f'Автор: {df["author"][i]}') st.write(f'Ссылка: {df["page_url"][i]}') st.write(f'Аннотация: {df["annotation"][i]}') # import streamlit as st # import pandas as pd # import numpy as np # import torch # from transformers import AutoTokenizer, AutoModel # import faiss # tokenizer = AutoTokenizer.from_pretrained("cointegrated/rubert-tiny2") # model = AutoModel.from_pretrained("cointegrated/rubert-tiny2") # df = pd.read_csv('data_final.csv') # MAX_LEN = 300 # @st.cache_data # def load_faiss_index(): # books_embs = np.loadtxt('vectors.txt') # index = faiss.IndexFlatIP(books_embs.shape[1]) # index.add(books_embs) # return index # def embed_bert_cls(text, model, tokenizer): # t = tokenizer(text, padding=True, truncation=True, return_tensors='pt', max_length=MAX_LEN) # with torch.no_grad(): # model_output = model(**{k: v.to(model.device) for k, v in t.items()}) # embeddings = model_output.last_hidden_state[:, 0, :] # embeddings = torch.nn.functional.normalize(embeddings) # return embeddings[0].cpu().numpy() # @st.cache_data() # def get_recommendations(query_emb, top_n): # index = load_faiss_index() # D, I = index.search(query_emb.reshape(1, -1), top_n) # recommendations = [] # for i, j in zip(I[0], D[0]): # recommendation = { # 'image_url': df['image_url'][i], # 'title': df['title'][i], # 'author': df['author'][i], # 'page_url': df['page_url'][i], # 'annotation': df['annotation'][i], # 'category': df['category'][i], # 'similarity_score': round(j, 2) # } # recommendations.append(recommendation) # return recommendations # st.title('Приложение для рекомендации книг') # category_filter = st.selectbox('Выберите категорию книги (необязательно)', ['Все'] + list(df['category'].unique())) # text = st.text_input('Введите запрос:') # top_n = st.number_input('Введите количество рекомендаций:', min_value=1, max_value=50, value=1) # recommend_button = st.button('Найти') # if text and recommend_button: # query_emb = embed_bert_cls(text, model, tokenizer) # recommendations = get_recommendations(query_emb, top_n) # if not recommendations: # Если рекомендации не найдены # st.write('По вашему запросу ничего не найдено.') # else: # st.subheader('Топ рекомендуемых книг:') # for recommendation in recommendations: # if category_filter == 'Все' or recommendation['category'] == category_filter: # col_1, col_2 = st.columns([1, 3]) # with col_1: # st.image(recommendation['image_url'], use_column_width=True) # st.write(recommendation['similarity_score']) # with col_2: # st.write(f'Название книги: **{recommendation["title"]}**') # st.write(f'Автор: {recommendation["author"]}') # st.write(f'Ссылка: {recommendation["page_url"]}') # st.write(f'Аннотация: {recommendation["annotation"]}') # st.write(f'Категория: {recommendation["category"]}')