Spaces:
Runtime error
Runtime error
File size: 5,259 Bytes
c5ff415 39dc65a c5ff415 7b3e2ee c5ff415 540cb8a c71b079 540cb8a c5ff415 d10318a 519070e c5ff415 cfca6ca |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
import gradio as gr
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
import torch
def create_miread_embed(sents, bundle):
tokenizer = bundle[0]
model = bundle[1]
model.cpu()
tokens = tokenizer(sents,
max_length=512,
padding=True,
truncation=True,
return_tensors="pt"
)
device = torch.device('cpu')
tokens = tokens.to(device)
with torch.no_grad():
out = model.bert(**tokens)
feature = out.last_hidden_state[:, 0, :]
return feature.cpu()
def get_matches(query, k):
matches = vecdb.similarity_search_with_score(query, k=k)
return matches
def inference(query, k=30):
matches = get_matches(query, k)
j_bucket = {}
n_table = []
a_table = []
scores = [round(match[1].item(), 3) for match in matches]
min_score = min(scores)
max_score = max(scores)
def normaliser(x): return round(1 - (x-min_score)/max_score, 3)
for i, match in enumerate(matches):
doc = match[0]
score = round(normaliser(round(match[1].item(), 3)),3)
title = doc.metadata['title']
author = doc.metadata['authors'][0].title()
date = doc.metadata.get('date', 'None')
link = doc.metadata.get('link', 'None')
submitter = doc.metadata.get('submitter', 'None')
# journal = doc.metadata.get('journal', 'None').strip()
journal = doc.metadata['journal']
if (journal == None or journal.strip() == ''):
journal = 'None'
else:
journal = journal.strip()
# For journals
if journal not in j_bucket:
j_bucket[journal] = score
else:
j_bucket[journal] += score
# For authors
record = [i+1,
score,
author,
title,
link,
date]
n_table.append(record)
# For abstracts
record = [i+1,
title,
author,
submitter,
journal,
date,
link,
score
]
a_table.append(record)
del j_bucket['None']
j_table = sorted([[journal, round(score,3)] for journal,
score in j_bucket.items()],
key=lambda x: x[1], reverse=True)
j_table = [[i+1, item[0], item[1]] for i, item in enumerate(j_table)]
j_output = gr.Dataframe.update(value=j_table, visible=True)
n_output = gr.Dataframe.update(value=n_table, visible=True)
a_output = gr.Dataframe.update(value=a_table, visible=True)
return [a_output, j_output, n_output]
model_name = "biodatlab/MIReAD-Neuro"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
faiss_embedder = HuggingFaceEmbeddings(
model_name=model_name,
model_kwargs=model_kwargs,
encode_kwargs=encode_kwargs
)
vecdb = FAISS.load_local("nbdt_index", faiss_embedder)
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# NBDT Recommendation Engine for Editors")
gr.Markdown("NBDT Recommendation Engine for Editors is a tool for neuroscience authors/abstracts/journalsrecommendation built for NBDT journal editors. \
It aims to help an editor to find similar reviewers, abstracts, and journals to a given submitted abstract.\
To find a recommendation, paste a `title[SEP]abstract` or `abstract` in the text box below and click \"Find Matches\".\
Then, you can hover to authors/abstracts/journals tab to find a suggested list.\
The data in our current demo is selected from 2018 to 2022. We will update the data monthly for an up-to-date publications.")
abst = gr.Textbox(label="Abstract", lines=10)
k = gr.Slider(1, 100, step=1, value=50,
label="Number of matches to consider")
action_btn = gr.Button(value="Find Matches")
with gr.Tab("Authors"):
n_output = gr.Dataframe(
headers=['No.', 'Score', 'Name', 'Title', 'Link', 'Date'],
datatype=['number', 'number', 'str', 'str', 'str', 'str'],
col_count=(6, "fixed"),
wrap=True,
visible=False
)
with gr.Tab("Abstracts"):
a_output = gr.Dataframe(
headers=['No.', 'Title', 'Author', 'Corresponding Author',
'Journal', 'Date', 'Link', 'Score'],
datatype=['number', 'str', 'str', 'str',
'str', 'str', 'str', 'number'],
col_count=(8, "fixed"),
wrap=True,
visible=False
)
with gr.Tab("Journals"):
j_output = gr.Dataframe(
headers=['No.', 'Name', 'Score'],
datatype=['number', 'str', 'number'],
col_count=(3, "fixed"),
wrap=True,
visible=False
)
action_btn.click(fn=inference,
inputs=[
abst,
k,
],
outputs=[a_output, j_output, n_output],
api_name="neurojane")
demo.launch(debug=True)
|