File size: 4,763 Bytes
1a5ec75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import gradio as gr
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

import torch


def create_miread_embed(sents, bundle):
    tokenizer = bundle[0]
    model = bundle[1]
    model.cpu()
    tokens = tokenizer(sents,
                       max_length=512,
                       padding=True,
                       truncation=True,
                       return_tensors="pt"
                       )
    device = torch.device('cpu')
    tokens = tokens.to(device)
    with torch.no_grad():
        out = model.bert(**tokens)
        feature = out.last_hidden_state[:, 0, :]
    return feature.cpu()


def get_matches(query, db, k):
    matches = db.similarity_search_with_score(query, k=k)
    return matches


def inference(query, db, k=30):
    matches = get_matches(query, db, k)
    j_bucket = {}
    n_table = []
    a_table = []
    for i, match in enumerate(matches):
        doc = match[0]
        score = match[1]
        title = doc.metadata['title']
        author = eval(doc.metadata['authors'])[0]
        date = doc.metadata['date']
        link = doc.metadata['link']
        submitter = doc.metadata['submitter']
        journal = doc.metadata['journal']

        # For journals
        if journal not in j_bucket:
            j_bucket[journal] = score
        else:
            j_bucket[journal] += score

        # For authors
        record = [i+1,
                  round(score, 3),
                  author,
                  title,
                  link,
                  date]
        n_table.append(record)

        # For abstracts
        record = [i+1,
                  title,
                  author,
                  submitter,
                  journal,
                  date,
                  link,
                  round(score, 3)
                  ]
        a_table.append(record)
    j_table = sorted([[journal, round(score, 3)] for journal,
                     score in j_bucket.items()], key=lambda x: x[1], reverse=True)
    j_table = [[i+1, item[0], round(item[1], 3)]
               for i, item in enumerate(j_table)]
    j_output = gr.Dataframe.update(value=j_table, visible=True)
    n_output = gr.Dataframe.update(value=n_table, visible=True)
    a_output = gr.Dataframe.update(value=a_table, visible=True)
    return [a_output, j_output, n_output]


model_name = "biodatlab/MIReAD-Neuro"
model_kwargs = {'device': 'cuda'}
encode_kwargs = {'normalize_embeddings': False}
faiss_embedder = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

vecdb = FAISS.load_local("faiss_index", faiss_embedder)


with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# NBDT Recommendation Engine for Editors")
    gr.Markdown("NBDT Recommendation Engine for Editors is a tool for neuroscience authors/abstracts/journalsrecommendation built for NBDT journal editors. \
    It aims to help an editor to find similar reviewers, abstracts, and journals to a given submitted abstract.\
    To find a recommendation, paste a `title[SEP]abstract` or `abstract` in the text box below and click \"Find Matches\".\
    Then, you can hover to authors/abstracts/journals tab to find a suggested list.\
    The data in our current demo is selected from 2018 to 2022. We will update the data monthly for an up-to-date publications.")

    abst = gr.Textbox(label="Abstract", lines=10)

    k = gr.Slider(1, 100, step=1, value=50,
                  label="Number of matches to consider")

    action_btn = gr.Button(value="Find Matches")

    with gr.Tab("Authors"):
        n_output = gr.Dataframe(
            headers=['No.', 'Score', 'Name', 'Title', 'Link', 'Date'],
            datatype=['number', 'str', 'str', 'number', 'str', 'str'],
            col_count=(6, "fixed"),
            wrap=True,
            visible=False
        )
    with gr.Tab("Abstracts"):
        a_output = gr.Dataframe(
            headers=['No.', 'Title', 'Author', 'Submitter',
                     'Journal', 'Date', 'Link', 'Score'],
            datatype=['number', 'str', 'str', 'str', 'number'],
            col_count=(8, "fixed"),
            wrap=True,
            visible=False
        )
    with gr.Tab("Journals"):
        j_output = gr.Dataframe(
            headers=['No.', 'Name', 'Score'],
            datatype=['number', 'str', 'number'],
            col_count=(3, "fixed"),
            visible=False
        )
    action_btn.click(fn=inference,
                     inputs=[
                         abst,
                         k,
                         # modes,
                     ],
                     outputs=[a_output, j_output, n_output],
                     api_name="neurojane")

demo.launch(debug=True)