Spaces:
Runtime error
Runtime error
File size: 6,509 Bytes
c5ff415 0f56fb9 c5ff415 0f56fb9 c5ff415 0f56fb9 aad244d c5ff415 aad244d c5ff415 7b3e2ee c5ff415 540cb8a aad244d c71b079 540cb8a c5ff415 aad244d c5ff415 d10318a aad244d c5ff415 0f56fb9 c5ff415 0f56fb9 c5ff415 0f56fb9 c5ff415 0f56fb9 c5ff415 c3141d7 c5ff415 0f56fb9 c5ff415 0f56fb9 c5ff415 cfca6ca |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 |
import gradio as gr
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
import torch
def get_matches1(query):
matches = vecdb1.similarity_search_with_score(query, k=60)
return matches
def get_matches2(query):
matches = vecdb2.similarity_search_with_score(query, k=60)
return matches
def get_matches3(query):
matches = vecdb3.similarity_search_with_score(query, k=60)
return matches
def inference(query,model=1):
if model==1:
matches = get_matches1(query)
elif model==2:
matches = get_matches2(query)
else:
matches = get_matches3(query)
auth_counts = {}
j_bucket = {}
n_table = []
a_table = []
scores = [round(match[1].item(), 3) for match in matches]
min_score = min(scores)
max_score = max(scores)
def normaliser(x): return round(1 - (x-min_score)/max_score, 3)
for i, match in enumerate(matches):
doc = match[0]
score = round(normaliser(round(match[1].item(), 3)), 3)
title = doc.metadata['title']
author = doc.metadata['authors'][0].title()
date = doc.metadata.get('date', 'None')
link = doc.metadata.get('link', 'None')
submitter = doc.metadata.get('submitter', 'None')
# journal = doc.metadata.get('journal', 'None').strip()
journal = doc.metadata['journal']
if (journal is None or journal.strip() == ''):
journal = 'None'
else:
journal = journal.strip()
# For journals
if journal not in j_bucket:
j_bucket[journal] = score
else:
j_bucket[journal] += score
# For authors
record = [i+1,
score,
author,
title,
link,
date]
if auth_counts.get(author, 0) < 2:
n_table.append(record)
if auth_counts.get(author, 0) == 0:
auth_counts[author] = 1
else:
auth_counts[author] += 1
# For abstracts
record = [i+1,
title,
author,
submitter,
journal,
date,
link,
score
]
a_table.append(record)
del j_bucket['None']
j_table = sorted([[journal, round(score, 3)] for journal,
score in j_bucket.items()],
key=lambda x: x[1], reverse=True)
j_table = [[i+1, item[0], item[1]] for i, item in enumerate(j_table)]
j_output = gr.Dataframe.update(value=j_table, visible=True)
n_output = gr.Dataframe.update(value=n_table, visible=True)
a_output = gr.Dataframe.update(value=a_table, visible=True)
return [a_output, j_output, n_output]
def inference1(query):
return inference(query,1)
def inference2(query):
return inference(query,2)
def inference3(query):
return inference(query,3)
model1_name = "biodatlab/MIReAD-Neuro-Large"
model2_name = "biodatlab/MIReAD-Neuro-Contrastive"
model3_name = "biodatlab/SciBERT-Neuro-Contrastive"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
faiss_embedder1 = HuggingFaceEmbeddings(
model_name=model1_name,
model_kwargs=model_kwargs,
encode_kwargs=encode_kwargs
)
faiss_embedder2 = HuggingFaceEmbeddings(
model_name=model2_name,
model_kwargs=model_kwargs,
encode_kwargs=encode_kwargs
)
faiss_embedder3 = HuggingFaceEmbeddings(
model_name=model3_name,
model_kwargs=model_kwargs,
encode_kwargs=encode_kwargs
)
vecdb1 = FAISS.load_local("miread_large", faiss_embedder1)
vecdb2 = FAISS.load_local("miread_contrastive", faiss_embedder2)
vecdb3 = FAISS.load_local("scibert_contrastive", faiss_embedder3)
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# NBDT Recommendation Engine for Editors")
gr.Markdown("NBDT Recommendation Engine for Editors is a tool for neuroscience authors/abstracts/journalsrecommendation built for NBDT journal editors. \
It aims to help an editor to find similar reviewers, abstracts, and journals to a given submitted abstract.\
To find a recommendation, paste a `title[SEP]abstract` or `abstract` in the text box below and click \"Find Matches\".\
Then, you can hover to authors/abstracts/journals tab to find a suggested list.\
The data in our current demo includes authors associated with the NBDT Journal. We will update the data monthly for an up-to-date publications.")
abst = gr.Textbox(label="Abstract", lines=10)
action1_btn = gr.Button(value="Find Matches with MIReAD-Neuro-Large")
action2_btn = gr.Button(value="Find Matches with MIReAD-Neuro-Contrastive")
action3_btn = gr.Button(value="Find Matches with SciBERT-Neuro-Contrastive")
with gr.Tab("Authors"):
n_output = gr.Dataframe(
headers=['No.', 'Score', 'Name', 'Title', 'Link', 'Date'],
datatype=['number', 'number', 'str', 'str', 'str', 'str'],
col_count=(6, "fixed"),
wrap=True,
visible=False
)
with gr.Tab("Abstracts"):
a_output = gr.Dataframe(
headers=['No.', 'Title', 'Author', 'Corresponding Author',
'Journal', 'Date', 'Link', 'Score'],
datatype=['number', 'str', 'str', 'str',
'str', 'str', 'str', 'number'],
col_count=(8, "fixed"),
wrap=True,
visible=False
)
with gr.Tab("Journals"):
j_output = gr.Dataframe(
headers=['No.', 'Name', 'Score'],
datatype=['number', 'str', 'number'],
col_count=(3, "fixed"),
wrap=True,
visible=False
)
action_btn1.click(fn=inference1,
inputs=[
abst,
],
outputs=[a_output, j_output, n_output],
api_name="neurojane")
action_btn2.click(fn=inference2,
inputs=[
abst,
],
outputs=[a_output, j_output, n_output],
api_name="neurojane")
action_btn3.click(fn=inference3,
inputs=[
abst,
],
outputs=[a_output, j_output, n_output],
api_name="neurojane")
demo.launch(debug=True)
|