atrytone commited on
Commit
40e4418
·
0 Parent(s):

Duplicate from biodatlab/NBDT-Recommendation-Engine

Browse files
.gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ index.faiss filter=lfs diff=lfs merge=lfs -text
Build_VecStore.ipynb ADDED
@@ -0,0 +1,282 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {
6
+ "id": "QS0v2bceN4Or"
7
+ },
8
+ "source": [
9
+ "Builds a database of vector embeddings from list of abstracts"
10
+ ]
11
+ },
12
+ {
13
+ "cell_type": "markdown",
14
+ "metadata": {
15
+ "id": "l5RwcIG8OAjX"
16
+ },
17
+ "source": [
18
+ "## Some Setup"
19
+ ]
20
+ },
21
+ {
22
+ "cell_type": "code",
23
+ "execution_count": null,
24
+ "metadata": {
25
+ "id": "sfwT5YW2JCnu"
26
+ },
27
+ "outputs": [],
28
+ "source": [
29
+ "!pip install transformers==4.28.0\n",
30
+ "!pip install -U sentence-transformers\n",
31
+ "!pip install datasets\n",
32
+ "!pip install langchain\n",
33
+ "!pip install torch\n",
34
+ "!pip install faiss-cpu"
35
+ ]
36
+ },
37
+ {
38
+ "cell_type": "code",
39
+ "execution_count": null,
40
+ "metadata": {
41
+ "id": "psoTvOp4VkBE"
42
+ },
43
+ "outputs": [],
44
+ "source": [
45
+ "import os\n",
46
+ "import shutil\n",
47
+ "\n",
48
+ "import numpy as np\n",
49
+ "import pandas as pd\n",
50
+ "from tqdm.auto import tqdm\n",
51
+ "import torch"
52
+ ]
53
+ },
54
+ {
55
+ "cell_type": "code",
56
+ "execution_count": null,
57
+ "metadata": {
58
+ "id": "arZiN8QRHS_a"
59
+ },
60
+ "outputs": [],
61
+ "source": [
62
+ "import locale\n",
63
+ "locale.getpreferredencoding = lambda: \"UTF-8\""
64
+ ]
65
+ },
66
+ {
67
+ "cell_type": "code",
68
+ "execution_count": null,
69
+ "metadata": {
70
+ "id": "JwWs0-Uu6ohg"
71
+ },
72
+ "outputs": [],
73
+ "source": [
74
+ "from transformers import AutoTokenizer, BertForSequenceClassification\n",
75
+ "\n",
76
+ "m_tokenizer = AutoTokenizer.from_pretrained(\"biodatlab/MIReAD-Neuro-Large\")\n",
77
+ "m_model = BertForSequenceClassification.from_pretrained(\"biodatlab/MIReAD-Neuro-Large\")\n",
78
+ "miread_bundle = (m_tokenizer,m_model)"
79
+ ]
80
+ },
81
+ {
82
+ "cell_type": "code",
83
+ "execution_count": null,
84
+ "metadata": {
85
+ "id": "BR-adEUUz9su"
86
+ },
87
+ "outputs": [],
88
+ "source": [
89
+ "def create_lbert_embed(sents,bundle):\n",
90
+ " tokenizer = bundle[0]\n",
91
+ " model = bundle[1]\n",
92
+ " model.cuda()\n",
93
+ " tokens = tokenizer(sents,padding=True,truncation=True,return_tensors='pt')\n",
94
+ " device = torch.device('cuda')\n",
95
+ " tokens = tokens.to(device)\n",
96
+ " with torch.no_grad():\n",
97
+ " embeds = model(**tokens, output_hidden_states=True,return_dict=True).pooler_output\n",
98
+ " return embeds.cpu()\n",
99
+ "\n",
100
+ "def create_miread_embed(sents,bundle):\n",
101
+ " tokenizer = bundle[0]\n",
102
+ " model = bundle[1]\n",
103
+ " model.cuda()\n",
104
+ " tokens = tokenizer(sents,\n",
105
+ " max_length=512,\n",
106
+ " padding=True,\n",
107
+ " truncation=True,\n",
108
+ " return_tensors=\"pt\"\n",
109
+ " )\n",
110
+ " device = torch.device('cuda')\n",
111
+ " tokens = tokens.to(device)\n",
112
+ " with torch.no_grad():\n",
113
+ " out = model.bert(**tokens)\n",
114
+ " feature = out.last_hidden_state[:, 0, :]\n",
115
+ " return feature.cpu()"
116
+ ]
117
+ },
118
+ {
119
+ "cell_type": "code",
120
+ "execution_count": null,
121
+ "metadata": {
122
+ "id": "-wHpHmD3zNSR"
123
+ },
124
+ "outputs": [],
125
+ "source": [
126
+ "from langchain.vectorstores import FAISS\n",
127
+ "from langchain.embeddings import HuggingFaceEmbeddings\n",
128
+ "\n",
129
+ "model_name = \"biodatlab/MIReAD-Neuro-Large\"\n",
130
+ "model_kwargs = {'device': 'cuda'}\n",
131
+ "encode_kwargs = {'normalize_embeddings': False}\n",
132
+ "faiss_embedder = HuggingFaceEmbeddings(\n",
133
+ " model_name=model_name,\n",
134
+ " model_kwargs=model_kwargs,\n",
135
+ " encode_kwargs=encode_kwargs\n",
136
+ ")\n",
137
+ "\n",
138
+ "def add_to_db(data,create_embed,bundle,name=''):\n",
139
+ " batch_size = 128\n",
140
+ " \"\"\"\n",
141
+ " data : list of rows with an 'abstract' and an 'identifier' field\n",
142
+ " index : pinecone Index object\n",
143
+ " create_embed : function that creates the embedding given an abstract\n",
144
+ " \"\"\"\n",
145
+ " res = []\n",
146
+ " vecdb = None\n",
147
+ " for i in tqdm(range(0, len(data), batch_size)):\n",
148
+ " # find end of batch\n",
149
+ " i_end = min(i+batch_size, len(data))\n",
150
+ " # create IDs batch\n",
151
+ " ids = [name + '-' + str(x) for x in range(i, i_end)]\n",
152
+ " # create metadata batch\n",
153
+ " metadatas = [{\n",
154
+ " 'journal':row.get('journal','None'),\n",
155
+ " 'title':row['title'],\n",
156
+ " 'abstract': row['abstract'],\n",
157
+ " 'authors':row.get('authors','None'),\n",
158
+ " 'link':row.get('link','None'),\n",
159
+ " 'date':row.get('date','None'),\n",
160
+ " 'submitter':row.get('submitter','None'),\n",
161
+ " } for row in data[i:i_end]]\n",
162
+ " # create embeddings\n",
163
+ " em = [create_embed(row['abstract'],bundle).tolist()[0] for row in data[i:i_end]]\n",
164
+ " texts = [row['abstract'] for row in data[i:i_end]]\n",
165
+ " records = list(zip(texts, em))\n",
166
+ " if vecdb:\n",
167
+ " vecdb_batch = FAISS.from_embeddings(records,faiss_embedder,metadatas=metadatas,ids=ids)\n",
168
+ " vecdb.merge_from(vecdb_batch)\n",
169
+ " else:\n",
170
+ " vecdb = FAISS.from_embeddings(records,faiss_embedder,metadatas=metadatas,ids=ids)\n",
171
+ " return vecdb"
172
+ ]
173
+ },
174
+ {
175
+ "cell_type": "code",
176
+ "execution_count": null,
177
+ "metadata": {
178
+ "id": "PfsK3DE4MMou"
179
+ },
180
+ "outputs": [],
181
+ "source": [
182
+ "nbdt_data = pd.read_json('data_final.json')\n",
183
+ "aliases = pd.read_csv('id_list.csv')"
184
+ ]
185
+ },
186
+ {
187
+ "cell_type": "code",
188
+ "execution_count": null,
189
+ "metadata": {
190
+ "id": "JrGJh5XgNPvU"
191
+ },
192
+ "outputs": [],
193
+ "source": [
194
+ "aliases = aliases.drop_duplicates('Full Name')\n",
195
+ "aliases.head()"
196
+ ]
197
+ },
198
+ {
199
+ "cell_type": "code",
200
+ "execution_count": null,
201
+ "metadata": {
202
+ "id": "CShYwGwWMZh5"
203
+ },
204
+ "outputs": [],
205
+ "source": [
206
+ "nbdt_data.head()"
207
+ ]
208
+ },
209
+ {
210
+ "cell_type": "code",
211
+ "execution_count": null,
212
+ "metadata": {
213
+ "id": "SziJtbggMuyn"
214
+ },
215
+ "outputs": [],
216
+ "source": [
217
+ "def load_nbdt(data,aliases):\n",
218
+ " nbdt_records = []\n",
219
+ " urls = []\n",
220
+ " no_abst_count = 0\n",
221
+ " no_journal_count = 0\n",
222
+ " for row in aliases.itertuples():\n",
223
+ " name = row[1]\n",
224
+ " auth_ids = eval(row[2])\n",
225
+ " auth_ids = [int(x) for x in auth_ids]\n",
226
+ " papers = nbdt_data.loc[nbdt_data['authorId'].isin(auth_ids)]['papers']\n",
227
+ " all_papers = []\n",
228
+ " for paper_set in papers:\n",
229
+ " all_papers.extend(paper_set)\n",
230
+ " for paper in all_papers:\n",
231
+ " url = paper['url']\n",
232
+ " title = paper['title']\n",
233
+ " abst = paper['abstract']\n",
234
+ " year = paper['year']\n",
235
+ " journal = paper.get('journal')\n",
236
+ " if journal:\n",
237
+ " journal = journal.get('name')\n",
238
+ " else:\n",
239
+ " journal = 'None'\n",
240
+ " no_journal_count += 1\n",
241
+ " authors = [name]\n",
242
+ " if not(abst):\n",
243
+ " abst = ''\n",
244
+ " no_abst_count += 1\n",
245
+ " record = {'journal':journal,'title':title,'abstract':abst,'link':url,'date':year,'authors':authors,'submitter':'None'}\n",
246
+ " if url not in urls:\n",
247
+ " nbdt_records.append(record)\n",
248
+ " urls.append(url)\n",
249
+ " return nbdt_records, (no_abst_count,no_journal_count)\n",
250
+ "nbdt_recs, no_counts = load_nbdt(nbdt_data,aliases)"
251
+ ]
252
+ },
253
+ {
254
+ "cell_type": "code",
255
+ "execution_count": null,
256
+ "metadata": {
257
+ "id": "IovTlDINc2Ds"
258
+ },
259
+ "outputs": [],
260
+ "source": [
261
+ "nbdt_db = add_to_db(nbdt_recs,create_miread_embed,miread_bundle,'nbdt')\n",
262
+ "nbdt_db.save_local(\"nbdt_index\")"
263
+ ]
264
+ }
265
+ ],
266
+ "metadata": {
267
+ "accelerator": "GPU",
268
+ "colab": {
269
+ "gpuType": "T4",
270
+ "provenance": []
271
+ },
272
+ "kernelspec": {
273
+ "display_name": "Python 3",
274
+ "name": "python3"
275
+ },
276
+ "language_info": {
277
+ "name": "python"
278
+ }
279
+ },
280
+ "nbformat": 4,
281
+ "nbformat_minor": 0
282
+ }
NBDT_Data_Recs.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
README.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: NBDT Reviewer Recommendation System
3
+ emoji: 📊
4
+ colorFrom: indigo
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: 3.35.2
8
+ app_file: app.py
9
+ pinned: false
10
+ models:
11
+ - biodatlab/MIReAD-Neuro-Contrastive
12
+ duplicated_from: biodatlab/NBDT-Recommendation-Engine
13
+ ---
14
+
15
+ This space is a demo for a Reviewer Recommendation System for the Neurons, Behavior, Data Analysis and Theory Journal.
16
+ The index being used here includes papers from a variety of authors who have published in the NBDT Journal across various years.
17
+ The embedding model in use here is [biodatlab/MIReAD-Neuro-Contrastive](https://huggingface.co/biodatlab/MIReAD-Neuro-Contrastive).
app.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from langchain.vectorstores import FAISS
3
+ from langchain.embeddings import HuggingFaceEmbeddings
4
+
5
+
6
+ def get_matches(query, db_name="miread_contrastive"):
7
+ """
8
+ Wrapper to call the similarity search on the required index
9
+ """
10
+ matches = vecdbs[index_names.index(
11
+ db_name)].similarity_search_with_score(query, k=60)
12
+ return matches
13
+
14
+
15
+ def inference(query, model="miread_contrastive"):
16
+ """
17
+ This function processes information retrieved by the get_matches() function
18
+ Returns - Gradio update commands for the authors, abstracts and journals tablular output
19
+ """
20
+ matches = get_matches(query, model)
21
+ auth_counts = {}
22
+ j_bucket = {}
23
+ n_table = []
24
+ a_table = []
25
+ scores = [round(match[1].item(), 3) for match in matches]
26
+ min_score = min(scores)
27
+ max_score = max(scores)
28
+ def normaliser(x): return round(1 - (x-min_score)/max_score, 3)
29
+ for i, match in enumerate(matches):
30
+ doc = match[0]
31
+ score = round(normaliser(round(match[1].item(), 3)), 3)
32
+ title = doc.metadata['title']
33
+ author = doc.metadata['authors'][0].title()
34
+ date = doc.metadata.get('date', 'None')
35
+ link = doc.metadata.get('link', 'None')
36
+ submitter = doc.metadata.get('submitter', 'None')
37
+ journal = doc.metadata['journal']
38
+ if (journal is None or journal.strip() == ''):
39
+ journal = 'None'
40
+ else:
41
+ journal = journal.strip()
42
+
43
+ # For journals
44
+ if journal not in j_bucket:
45
+ j_bucket[journal] = score
46
+ else:
47
+ j_bucket[journal] += score
48
+
49
+ # For authors
50
+ record = [i+1,
51
+ score,
52
+ author,
53
+ title,
54
+ link,
55
+ date]
56
+ if auth_counts.get(author, 0) < 2:
57
+ n_table.append(record)
58
+ if auth_counts.get(author, 0) == 0:
59
+ auth_counts[author] = 1
60
+ else:
61
+ auth_counts[author] += 1
62
+
63
+ # For abstracts
64
+ record = [i+1,
65
+ title,
66
+ author,
67
+ submitter,
68
+ journal,
69
+ date,
70
+ link,
71
+ score
72
+ ]
73
+ a_table.append(record)
74
+
75
+ del j_bucket['None']
76
+ j_table = sorted([[journal, round(score, 3)] for journal,
77
+ score in j_bucket.items()],
78
+ key=lambda x: x[1], reverse=True)
79
+ j_table = [[i+1, item[0], item[1]] for i, item in enumerate(j_table)]
80
+ j_output = gr.Dataframe.update(value=j_table, visible=True)
81
+ n_output = gr.Dataframe.update(value=n_table, visible=True)
82
+ a_output = gr.Dataframe.update(value=a_table, visible=True)
83
+
84
+ return [a_output, j_output, n_output]
85
+
86
+
87
+ index_names = ["miread_large", "miread_contrastive", "scibert_contrastive"]
88
+ model_names = [
89
+ "biodatlab/MIReAD-Neuro-Large",
90
+ "biodatlab/MIReAD-Neuro-Contrastive",
91
+ "biodatlab/SciBERT-Neuro-Contrastive",
92
+ ]
93
+ model_kwargs = {'device': 'cpu'}
94
+ encode_kwargs = {'normalize_embeddings': False}
95
+ faiss_embedders = [HuggingFaceEmbeddings(
96
+ model_name=name,
97
+ model_kwargs=model_kwargs,
98
+ encode_kwargs=encode_kwargs) for name in model_names]
99
+
100
+ vecdbs = [FAISS.load_local(index_name, faiss_embedder)
101
+ for index_name, faiss_embedder in zip(index_names, faiss_embedders)]
102
+
103
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
104
+ gr.Markdown("# NBDT Recommendation Engine for Editors")
105
+ gr.Markdown("NBDT Recommendation Engine for Editors is a tool for neuroscience authors/abstracts/journalsrecommendation built for NBDT journal editors. \
106
+ It aims to help an editor to find similar reviewers, abstracts, and journals to a given submitted abstract.\
107
+ To find a recommendation, paste a `title[SEP]abstract` or `abstract` in the text box below and click on the appropriate \"Find Matches\" button.\
108
+ Then, you can hover to authors/abstracts/journals tab to find a suggested list.\
109
+ The data in our current demo includes authors associated with the NBDT Journal. We will update the data monthly for an up-to-date publications.")
110
+
111
+ abst = gr.Textbox(label="Abstract", lines=10)
112
+
113
+ action_btn1 = gr.Button(value="Find Matches with MIReAD-Neuro-Large")
114
+ action_btn2 = gr.Button(value="Find Matches with MIReAD-Neuro-Contrastive")
115
+ action_btn3 = gr.Button(
116
+ value="Find Matches with SciBERT-Neuro-Contrastive")
117
+
118
+ with gr.Tab("Authors"):
119
+ n_output = gr.Dataframe(
120
+ headers=['No.', 'Score', 'Name', 'Title', 'Link', 'Date'],
121
+ datatype=['number', 'number', 'str', 'str', 'str', 'str'],
122
+ col_count=(6, "fixed"),
123
+ wrap=True,
124
+ visible=False
125
+ )
126
+ with gr.Tab("Abstracts"):
127
+ a_output = gr.Dataframe(
128
+ headers=['No.', 'Title', 'Author', 'Corresponding Author',
129
+ 'Journal', 'Date', 'Link', 'Score'],
130
+ datatype=['number', 'str', 'str', 'str',
131
+ 'str', 'str', 'str', 'number'],
132
+ col_count=(8, "fixed"),
133
+ wrap=True,
134
+ visible=False
135
+ )
136
+ with gr.Tab("Journals"):
137
+ j_output = gr.Dataframe(
138
+ headers=['No.', 'Name', 'Score'],
139
+ datatype=['number', 'str', 'number'],
140
+ col_count=(3, "fixed"),
141
+ wrap=True,
142
+ visible=False
143
+ )
144
+
145
+ action_btn1.click(fn=lambda x: inference(x, index_names[0]),
146
+ inputs=[
147
+ abst,
148
+ ],
149
+ outputs=[a_output, j_output, n_output],
150
+ api_name="neurojane")
151
+ action_btn2.click(fn=lambda x: inference(x, index_names[1]),
152
+ inputs=[
153
+ abst,
154
+ ],
155
+ outputs=[a_output, j_output, n_output],
156
+ api_name="neurojane")
157
+ action_btn3.click(fn=lambda x: inference(x, index_names[2]),
158
+ inputs=[
159
+ abst,
160
+ ],
161
+ outputs=[a_output, j_output, n_output],
162
+ api_name="neurojane")
163
+
164
+ demo.launch(debug=True)
miread_contrastive/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:755fdfb97bca32f161080ce593de8c54313d0b18f7ffed97db39a59c3d32956c
3
+ size 108625965
miread_contrastive/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f7f45ee26f08ec61dd5f3f09acf0f116b1ed3466235af1621da12aae1b944b4
3
+ size 35224541
miread_large/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e618b6304914de46395f6dc334e33e6c4023f5210c76d088fa0128a7fc04b4c
3
+ size 108625965
miread_large/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:013b06aa858e6e44ecf550bc2e7a0c0b0d77404ff995dc2e96051df6e29355fb
3
+ size 35224532
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ sentence-transformers
2
+ torch
3
+ datasets
4
+ sentencepiece
5
+ langchain
6
+ faiss-cpu
7
+ accelerate
scibert_contrastive/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eeaf06c2b444705d5f25b6bea8702bff7183443a408561e49057bfd1ad5d86ac
3
+ size 108625965
scibert_contrastive/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40af555a4d2ff2ecc85995d1231e6161f56fb1dd122853ae2b376bf07c87a68f
3
+ size 35224541