Spaces:

eremeev-d
/

arxiv-search

Runtime error

eremeev-d commited on Apr 16, 2023

Commit

7521548

1 Parent(s): 8b4800e

Full index with embeddings

Files changed (3) hide show

Data/embeddings.npy ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:b03978d1bf25675f47526cc5480bbe019a20f8c85bed35e35092a53d906fbeeb
+size 1713805184

core.py CHANGED Viewed

@@ -3,6 +3,7 @@ from huggingface_hub import HfApi, HfFolder
 import datasets
 import logging
 import os
 from transformers import AutoTokenizer, AutoModel
 import torch
@@ -11,7 +12,7 @@ import torch.nn.functional as F
 @st.cache_data
 def login():
-    if not 'logged' in st.session_state:
         logging.info("Trying to log in to HF")
         st.session_state['logged'] = True
         HF_TOKEN = os.environ.get("HF_TOKEN")
@@ -45,8 +46,11 @@ def load_index():
         split="train"
     )
     logging.info("Index succesfully loaded")
     logging.info("Building index")
-    index.add_faiss_index('embedding')
     logging.info("Index built successfully")
     return index
@@ -75,6 +79,7 @@ def get_answers(query):
     index = load_index()
     query_embedding = get_embedding(query, model, tokenizer).reshape(-1)
     scores, answers = index.get_nearest_examples('embedding', query_embedding)
     logging.info("Succesfully got answers for {}".format(query))
     return answers
@@ -82,7 +87,7 @@ def get_answers(query):
 def display_answer(query):
     st.write("---")
     answers = get_answers(query)
-    for answer_id in range(len(answers)):
         with st.container():
             href = "https://arxiv.org/abs/{}".format(answers['id'][answer_id])
             title = "<h3><a href=\"{}\">{}</a></h3>".format(

 import datasets
 import logging
 import os
+import numpy as np
 from transformers import AutoTokenizer, AutoModel
 import torch
 @st.cache_data
 def login():
+    if 'logged' not in st.session_state:
         logging.info("Trying to log in to HF")
         st.session_state['logged'] = True
         HF_TOKEN = os.environ.get("HF_TOKEN")
         split="train"
     )
     logging.info("Index succesfully loaded")
+    logging.info("Loading embeddings")
+    embeddings = np.load("Data/embeddings.npy")
+    logging.info("Loaded embeddings")
     logging.info("Building index")
+    index.add_faiss_index_from_external_arrays(embeddings, 'embedding')
     logging.info("Index built successfully")
     return index
     index = load_index()
     query_embedding = get_embedding(query, model, tokenizer).reshape(-1)
     scores, answers = index.get_nearest_examples('embedding', query_embedding)
+    logging.info(scores)
     logging.info("Succesfully got answers for {}".format(query))
     return answers
 def display_answer(query):
     st.write("---")
     answers = get_answers(query)
+    for answer_id in range(len(answers['id'])):
         with st.container():
             href = "https://arxiv.org/abs/{}".format(answers['id'][answer_id])
             title = "<h3><a href=\"{}\">{}</a></h3>".format(

requirements.txt CHANGED Viewed

@@ -2,4 +2,5 @@ faiss-cpu~=1.7.2
 sentence-transformers~=2.2.2
 datasets~=2.10.1
 huggingface_hub~=0.10.1
 torch

 sentence-transformers~=2.2.2
 datasets~=2.10.1
 huggingface_hub~=0.10.1
+numpy~=1.23.5
 torch