eremeev-d commited on
Commit
7521548
·
1 Parent(s): 8b4800e

Full index with embeddings

Browse files
Files changed (3) hide show
  1. Data/embeddings.npy +3 -0
  2. core.py +8 -3
  3. requirements.txt +1 -0
Data/embeddings.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b03978d1bf25675f47526cc5480bbe019a20f8c85bed35e35092a53d906fbeeb
3
+ size 1713805184
core.py CHANGED
@@ -3,6 +3,7 @@ from huggingface_hub import HfApi, HfFolder
3
  import datasets
4
  import logging
5
  import os
 
6
 
7
  from transformers import AutoTokenizer, AutoModel
8
  import torch
@@ -11,7 +12,7 @@ import torch.nn.functional as F
11
 
12
  @st.cache_data
13
  def login():
14
- if not 'logged' in st.session_state:
15
  logging.info("Trying to log in to HF")
16
  st.session_state['logged'] = True
17
  HF_TOKEN = os.environ.get("HF_TOKEN")
@@ -45,8 +46,11 @@ def load_index():
45
  split="train"
46
  )
47
  logging.info("Index succesfully loaded")
 
 
 
48
  logging.info("Building index")
49
- index.add_faiss_index('embedding')
50
  logging.info("Index built successfully")
51
  return index
52
 
@@ -75,6 +79,7 @@ def get_answers(query):
75
  index = load_index()
76
  query_embedding = get_embedding(query, model, tokenizer).reshape(-1)
77
  scores, answers = index.get_nearest_examples('embedding', query_embedding)
 
78
  logging.info("Succesfully got answers for {}".format(query))
79
  return answers
80
 
@@ -82,7 +87,7 @@ def get_answers(query):
82
  def display_answer(query):
83
  st.write("---")
84
  answers = get_answers(query)
85
- for answer_id in range(len(answers)):
86
  with st.container():
87
  href = "https://arxiv.org/abs/{}".format(answers['id'][answer_id])
88
  title = "<h3><a href=\"{}\">{}</a></h3>".format(
 
3
  import datasets
4
  import logging
5
  import os
6
+ import numpy as np
7
 
8
  from transformers import AutoTokenizer, AutoModel
9
  import torch
 
12
 
13
  @st.cache_data
14
  def login():
15
+ if 'logged' not in st.session_state:
16
  logging.info("Trying to log in to HF")
17
  st.session_state['logged'] = True
18
  HF_TOKEN = os.environ.get("HF_TOKEN")
 
46
  split="train"
47
  )
48
  logging.info("Index succesfully loaded")
49
+ logging.info("Loading embeddings")
50
+ embeddings = np.load("Data/embeddings.npy")
51
+ logging.info("Loaded embeddings")
52
  logging.info("Building index")
53
+ index.add_faiss_index_from_external_arrays(embeddings, 'embedding')
54
  logging.info("Index built successfully")
55
  return index
56
 
 
79
  index = load_index()
80
  query_embedding = get_embedding(query, model, tokenizer).reshape(-1)
81
  scores, answers = index.get_nearest_examples('embedding', query_embedding)
82
+ logging.info(scores)
83
  logging.info("Succesfully got answers for {}".format(query))
84
  return answers
85
 
 
87
  def display_answer(query):
88
  st.write("---")
89
  answers = get_answers(query)
90
+ for answer_id in range(len(answers['id'])):
91
  with st.container():
92
  href = "https://arxiv.org/abs/{}".format(answers['id'][answer_id])
93
  title = "<h3><a href=\"{}\">{}</a></h3>".format(
requirements.txt CHANGED
@@ -2,4 +2,5 @@ faiss-cpu~=1.7.2
2
  sentence-transformers~=2.2.2
3
  datasets~=2.10.1
4
  huggingface_hub~=0.10.1
 
5
  torch
 
2
  sentence-transformers~=2.2.2
3
  datasets~=2.10.1
4
  huggingface_hub~=0.10.1
5
+ numpy~=1.23.5
6
  torch