Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
from sentence_transformers import SentenceTransformer | |
import faiss | |
from sklearn.preprocessing import normalize | |
st.markdown('<div style="text-align: center; font-size: 24px;">Умный поиск книг с использованием SentenceTransformer (msmarco-distilbert-base-v4)</div>', unsafe_allow_html=True) | |
df = pd.read_csv("data_final.csv") | |
def load_embeddings_from_file(embedding_file): | |
embeddings = [] | |
with open(embedding_file, "r") as f: | |
for line in f: | |
embedding = [float(x) for x in line.strip().split()] | |
embeddings.append(embedding) | |
embeddings = np.array(embeddings) | |
embeddings = normalize(embeddings) # Нормализация эмбеддингов | |
return embeddings | |
def search_similar_books(query, index, model, df, k): | |
query_embedding = model.encode([query])[0] | |
query_embedding = np.array(query_embedding, dtype=np.float32).reshape(1, -1) | |
faiss.normalize_L2(query_embedding) | |
distances, indices = index.search(query_embedding, k) | |
sorted_indices = indices[0][::-1] | |
sorted_distances = distances[0][::-1] | |
st.write("Результаты поиска для запроса '{}':".format(query)) | |
for i, (annotation_index, similarity_score) in enumerate(zip(sorted_indices, sorted_distances)): | |
annotation = df.iloc[annotation_index]['annotation'] | |
book_title = df.iloc[annotation_index]['title'] | |
image_url = df.iloc[annotation_index]['image_url'] | |
page_url = df.iloc[annotation_index]['page_url'] | |
st.write("{}. Название книги: [{}]({})".format(i + 1, book_title, page_url)) | |
st.image(image_url, caption='Обложка книги {}'.format(book_title)) | |
st.write(" Аннотация: {}".format(annotation)) | |
st.write(" Косинусное сходство: {:.4f}".format(similarity_score)) | |
embedding_file = "annotation_embeddings.txt" | |
model = SentenceTransformer('msmarco-distilbert-base-v4') | |
embeddings = load_embeddings_from_file(embedding_file) | |
index = faiss.IndexFlatL2(embeddings.shape[1]) # Используем индекс для косинусного сходства | |
index.add(embeddings) | |
user_query = st.text_input("Введите ваш запрос:") | |
num_books = st.slider("Выберите количество книг для рекомендации", 1, 10, 5) | |
if st.button('Подобрать'): | |
search_similar_books(user_query, index, model, df, k=num_books) |