eremeev-d commited on
Commit
762ed00
·
1 Parent(s): 6ed0d96

Basic search on small dataset

Browse files
Files changed (2) hide show
  1. core.py +28 -4
  2. requirements.txt +1 -0
core.py CHANGED
@@ -1,11 +1,35 @@
1
  import streamlit as st
2
- import random
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
 
5
  def display_answer(query):
6
  st.write("---")
7
- for doc_id in range(random.randint(3, 5)):
 
8
  with st.container():
9
- st.subheader("Paper #{}".format(doc_id))
10
- st.markdown(r"Let's say it is description with LaTeX $\alpha$")
 
 
11
  st.write("---")
 
1
  import streamlit as st
2
+ from sentence_transformers import SentenceTransformer
3
+ import datasets
4
+
5
+
6
+ @st.cache_resource
7
+ def load_model():
8
+ return SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
9
+
10
+
11
+ @st.cache_resource
12
+ def load_index():
13
+ index = datasets.load_dataset("eremeev-d/arxiv-abstracts-small", use_auth_token=True)["train"]
14
+ index.add_faiss_index('embedding')
15
+ return index
16
+
17
+
18
+ def get_answers(query):
19
+ model = load_model()
20
+ index = load_index()
21
+ query_embedding = model.encode(query)
22
+ scores, answers = index.get_nearest_examples('embedding', query_embedding)
23
+ return answers
24
 
25
 
26
  def display_answer(query):
27
  st.write("---")
28
+ answers = get_answers(query)
29
+ for answer_id in range(len(answers)):
30
  with st.container():
31
+ href = "https://arxiv.org/abs/{}".format(answers['id'][answer_id])
32
+ title = "<h3><a href=\"{}\">{}</a></h3>".format(href, answers['title'][answer_id])
33
+ st.write(title, unsafe_allow_html=True)
34
+ st.markdown(answers['abstract'][answer_id])
35
  st.write("---")
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ faiss-cpu~=1.7.2