Spaces:
Runtime error
Runtime error
Full index with embeddings
Browse files- Data/embeddings.npy +3 -0
- core.py +8 -3
- requirements.txt +1 -0
Data/embeddings.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b03978d1bf25675f47526cc5480bbe019a20f8c85bed35e35092a53d906fbeeb
|
3 |
+
size 1713805184
|
core.py
CHANGED
@@ -3,6 +3,7 @@ from huggingface_hub import HfApi, HfFolder
|
|
3 |
import datasets
|
4 |
import logging
|
5 |
import os
|
|
|
6 |
|
7 |
from transformers import AutoTokenizer, AutoModel
|
8 |
import torch
|
@@ -11,7 +12,7 @@ import torch.nn.functional as F
|
|
11 |
|
12 |
@st.cache_data
|
13 |
def login():
|
14 |
-
if
|
15 |
logging.info("Trying to log in to HF")
|
16 |
st.session_state['logged'] = True
|
17 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
@@ -45,8 +46,11 @@ def load_index():
|
|
45 |
split="train"
|
46 |
)
|
47 |
logging.info("Index succesfully loaded")
|
|
|
|
|
|
|
48 |
logging.info("Building index")
|
49 |
-
index.
|
50 |
logging.info("Index built successfully")
|
51 |
return index
|
52 |
|
@@ -75,6 +79,7 @@ def get_answers(query):
|
|
75 |
index = load_index()
|
76 |
query_embedding = get_embedding(query, model, tokenizer).reshape(-1)
|
77 |
scores, answers = index.get_nearest_examples('embedding', query_embedding)
|
|
|
78 |
logging.info("Succesfully got answers for {}".format(query))
|
79 |
return answers
|
80 |
|
@@ -82,7 +87,7 @@ def get_answers(query):
|
|
82 |
def display_answer(query):
|
83 |
st.write("---")
|
84 |
answers = get_answers(query)
|
85 |
-
for answer_id in range(len(answers)):
|
86 |
with st.container():
|
87 |
href = "https://arxiv.org/abs/{}".format(answers['id'][answer_id])
|
88 |
title = "<h3><a href=\"{}\">{}</a></h3>".format(
|
|
|
3 |
import datasets
|
4 |
import logging
|
5 |
import os
|
6 |
+
import numpy as np
|
7 |
|
8 |
from transformers import AutoTokenizer, AutoModel
|
9 |
import torch
|
|
|
12 |
|
13 |
@st.cache_data
|
14 |
def login():
|
15 |
+
if 'logged' not in st.session_state:
|
16 |
logging.info("Trying to log in to HF")
|
17 |
st.session_state['logged'] = True
|
18 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
|
|
46 |
split="train"
|
47 |
)
|
48 |
logging.info("Index succesfully loaded")
|
49 |
+
logging.info("Loading embeddings")
|
50 |
+
embeddings = np.load("Data/embeddings.npy")
|
51 |
+
logging.info("Loaded embeddings")
|
52 |
logging.info("Building index")
|
53 |
+
index.add_faiss_index_from_external_arrays(embeddings, 'embedding')
|
54 |
logging.info("Index built successfully")
|
55 |
return index
|
56 |
|
|
|
79 |
index = load_index()
|
80 |
query_embedding = get_embedding(query, model, tokenizer).reshape(-1)
|
81 |
scores, answers = index.get_nearest_examples('embedding', query_embedding)
|
82 |
+
logging.info(scores)
|
83 |
logging.info("Succesfully got answers for {}".format(query))
|
84 |
return answers
|
85 |
|
|
|
87 |
def display_answer(query):
|
88 |
st.write("---")
|
89 |
answers = get_answers(query)
|
90 |
+
for answer_id in range(len(answers['id'])):
|
91 |
with st.container():
|
92 |
href = "https://arxiv.org/abs/{}".format(answers['id'][answer_id])
|
93 |
title = "<h3><a href=\"{}\">{}</a></h3>".format(
|
requirements.txt
CHANGED
@@ -2,4 +2,5 @@ faiss-cpu~=1.7.2
|
|
2 |
sentence-transformers~=2.2.2
|
3 |
datasets~=2.10.1
|
4 |
huggingface_hub~=0.10.1
|
|
|
5 |
torch
|
|
|
2 |
sentence-transformers~=2.2.2
|
3 |
datasets~=2.10.1
|
4 |
huggingface_hub~=0.10.1
|
5 |
+
numpy~=1.23.5
|
6 |
torch
|