find_my_book / pages /sentence.py
valeriedaash's picture
sentence updated
a4cd013
import streamlit as st
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
import faiss
from sklearn.preprocessing import normalize
st.markdown('<div style="text-align: center; font-size: 24px;">Умный поиск книг с использованием SentenceTransformer (msmarco-distilbert-base-v4)</div>', unsafe_allow_html=True)
df = pd.read_csv("data_final.csv")
def load_embeddings_from_file(embedding_file):
embeddings = []
with open(embedding_file, "r") as f:
for line in f:
embedding = [float(x) for x in line.strip().split()]
embeddings.append(embedding)
embeddings = np.array(embeddings)
embeddings = normalize(embeddings) # Нормализация эмбеддингов
return embeddings
def search_similar_books(query, index, model, df, k):
query_embedding = model.encode([query])[0]
query_embedding = np.array(query_embedding, dtype=np.float32).reshape(1, -1)
faiss.normalize_L2(query_embedding)
distances, indices = index.search(query_embedding, k)
sorted_indices = indices[0][::-1]
sorted_distances = distances[0][::-1]
st.write("Результаты поиска для запроса '{}':".format(query))
for i, (annotation_index, similarity_score) in enumerate(zip(sorted_indices, sorted_distances)):
annotation = df.iloc[annotation_index]['annotation']
book_title = df.iloc[annotation_index]['title']
image_url = df.iloc[annotation_index]['image_url']
page_url = df.iloc[annotation_index]['page_url']
st.write("{}. Название книги: [{}]({})".format(i + 1, book_title, page_url))
st.image(image_url, caption='Обложка книги {}'.format(book_title))
st.write(" Аннотация: {}".format(annotation))
st.write(" Косинусное сходство: {:.4f}".format(similarity_score))
embedding_file = "annotation_embeddings.txt"
model = SentenceTransformer('msmarco-distilbert-base-v4')
embeddings = load_embeddings_from_file(embedding_file)
index = faiss.IndexFlatL2(embeddings.shape[1]) # Используем индекс для косинусного сходства
index.add(embeddings)
user_query = st.text_input("Введите ваш запрос:")
num_books = st.slider("Выберите количество книг для рекомендации", 1, 10, 5)
if st.button('Подобрать'):
search_similar_books(user_query, index, model, df, k=num_books)