Spaces:
Runtime error
Runtime error
File size: 6,467 Bytes
9f6b5cc c5239cd 9f6b5cc c5239cd 9f6b5cc b02d896 9f6b5cc c5239cd b02d896 c5239cd b02d896 9f6b5cc c5239cd 9f6b5cc b02d896 c5239cd b02d896 9f6b5cc c5239cd 9f6b5cc b02d896 9f6b5cc c5239cd b02d896 9f6b5cc c5239cd b02d896 9f6b5cc c5239cd 9f6b5cc b02d896 9f6b5cc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 |
import gradio as gr
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
import torch
def get_matches1(query):
matches = vecdb1.similarity_search_with_score(query, k=60)
return matches
def get_matches2(query):
matches = vecdb2.similarity_search_with_score(query, k=60)
return matches
def get_matches3(query):
matches = vecdb3.similarity_search_with_score(query, k=60)
def inference(query,method=1):
if method==1:
matches = get_matches1(query)
elif method==2:
matches = get_matches2(query)
else:
matches = get_matches3(query)
auth_counts = {}
j_bucket = {}
n_table = []
a_table = []
scores = [round(match[1].item(), 3) for match in matches]
min_score = min(scores)
max_score = max(scores)
def normaliser(x): return round(1 - (x-min_score)/max_score, 3)
for i, match in enumerate(matches):
doc = match[0]
score = round(normaliser(round(match[1].item(), 3)), 3)
title = doc.metadata['title']
author = doc.metadata['authors'][0].title()
date = doc.metadata.get('date', 'None')
link = doc.metadata.get('link', 'None')
submitter = doc.metadata.get('submitter', 'None')
# journal = doc.metadata.get('journal', 'None').strip()
journal = doc.metadata['journal']
if (journal is None or journal.strip() == ''):
journal = 'None'
else:
journal = journal.strip()
# For journals
if journal not in j_bucket:
j_bucket[journal] = score
else:
j_bucket[journal] += score
# For authors
record = [i+1,
score,
author,
title,
link,
date]
if auth_counts.get(author, 0) < 2:
n_table.append(record)
if auth_counts.get(author, 0) == 0:
auth_counts[author] = 1
else:
auth_counts[author] += 1
# For abstracts
record = [i+1,
title,
author,
submitter,
journal,
date,
link,
score
]
a_table.append(record)
del j_bucket['None']
j_table = sorted([[journal, round(score, 3)] for journal,
score in j_bucket.items()],
key=lambda x: x[1], reverse=True)
j_table = [[i+1, item[0], item[1]] for i, item in enumerate(j_table)]
j_output = gr.Dataframe.update(value=j_table, visible=True)
n_output = gr.Dataframe.update(value=n_table, visible=True)
a_output = gr.Dataframe.update(value=a_table, visible=True)
return [a_output, j_output, n_output]
def inference1(query):
return inference(query,1)
def inference2(query):
return inference(query,2)
def inference3(query):
return inference(query,3)
model1_name = "biodatlab/MIReAD-Neuro-Large"
model2_name = "biodatlab/MIReAD-Neuro-Contrastive"
model3_name = "biodatlab/SciBERT-Neuro-Contrastive"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
faiss_embedder1 = HuggingFaceEmbeddings(
model_name=model1_name,
model_kwargs=model_kwargs,
encode_kwargs=encode_kwargs
)
faiss_embedder2 = HuggingFaceEmbeddings(
model_name=model2_name,
model_kwargs=model_kwargs,
encode_kwargs=encode_kwargs
)
faiss_embedder3 = HuggingFaceEmbeddings(
model_name=model3_name,
model_kwargs=model_kwargs,
encode_kwargs=encode_kwargs
)
vecdb1 = FAISS.load_local("nbdt_index", faiss_embedder1)
vecdb2 = FAISS.load_local("indexes", faiss_embedder2)
vecdb3 = FAISS.load_local("indexes/scibert_contr",faiss_embedder3)
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# NBDT Recommendation Engine for Editors")
gr.Markdown("NBDT Recommendation Engine for Editors is a tool for neuroscience authors/abstracts/journalsrecommendation built for NBDT journal editors. \
It aims to help an editor to find similar reviewers, abstracts, and journals to a given submitted abstract.\
To find a recommendation, paste a `title[SEP]abstract` or `abstract` in the text box below and click \"Find Matches\".\
Then, you can hover to authors/abstracts/journals tab to find a suggested list.\
The data in our current demo includes authors associated with the NBDT Journal. We will update the data monthly for an up-to-date publications.")
abst = gr.Textbox(label="Abstract", lines=10)
action_btn = gr.Button(value="Find Matches with Normal Model")
action2_btn = gr.Button(value="Find Matches with MIReAD Contrastive Model")
action3_btn = gr.Button(value="Find Matches with SciBERT Contrastive Model")
with gr.Tab("Authors"):
n_output = gr.Dataframe(
headers=['No.', 'Score', 'Name', 'Title', 'Link', 'Date'],
datatype=['number', 'number', 'str', 'str', 'str', 'str'],
col_count=(6, "fixed"),
wrap=True,
visible=False
)
with gr.Tab("Abstracts"):
a_output = gr.Dataframe(
headers=['No.', 'Title', 'Author', 'Corresponding Author',
'Journal', 'Date', 'Link', 'Score'],
datatype=['number', 'str', 'str', 'str',
'str', 'str', 'str', 'number'],
col_count=(8, "fixed"),
wrap=True,
visible=False
)
with gr.Tab("Journals"):
j_output = gr.Dataframe(
headers=['No.', 'Name', 'Score'],
datatype=['number', 'str', 'number'],
col_count=(3, "fixed"),
wrap=True,
visible=False
)
action_btn.click(fn=inference1,
inputs=[
abst,
],
outputs=[a_output, j_output, n_output],
api_name="neurojane")
action2_btn.click(fn=inference2,
inputs=[
abst,
],
outputs=[a_output, j_output, n_output],
api_name="neurojane")
action3_btn.click(fn=inference3,
inputs=[
abst,
],
outputs=[a_output, j_output, n_output],
api_name="neurojane")
demo.launch(debug=True)
|