import streamlit as st from sentence_transformers import SentenceTransformer import datasets @st.cache_resource def load_model(): return SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') @st.cache_resource def load_index(): index = datasets.load_dataset("eremeev-d/arxiv-abstracts-small", use_auth_token=True)["train"] index.add_faiss_index('embedding') return index def get_answers(query): model = load_model() index = load_index() query_embedding = model.encode(query) scores, answers = index.get_nearest_examples('embedding', query_embedding) return answers def display_answer(query): st.write("---") answers = get_answers(query) for answer_id in range(len(answers)): with st.container(): href = "https://arxiv.org/abs/{}".format(answers['id'][answer_id]) title = "

{}

".format(href, answers['title'][answer_id]) st.write(title, unsafe_allow_html=True) st.markdown(answers['abstract'][answer_id]) st.write("---")