File size: 6,467 Bytes
9f6b5cc
 
 
 
 
c5239cd
 
 
9f6b5cc
c5239cd
 
9f6b5cc
 
b02d896
 
 
9f6b5cc
c5239cd
 
 
b02d896
c5239cd
b02d896
 
9f6b5cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c5239cd
 
 
 
 
9f6b5cc
b02d896
 
 
c5239cd
 
b02d896
9f6b5cc
 
c5239cd
 
 
 
 
 
 
9f6b5cc
 
 
b02d896
 
 
 
 
9f6b5cc
c5239cd
 
b02d896
9f6b5cc
 
 
 
 
 
 
 
 
 
 
 
c5239cd
b02d896
 
9f6b5cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c5239cd
 
 
 
 
 
 
9f6b5cc
 
 
 
 
b02d896
 
 
 
 
 
9f6b5cc
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
import gradio as gr
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
import torch

def get_matches1(query):
    matches = vecdb1.similarity_search_with_score(query, k=60)
    return matches

def get_matches2(query):
    matches = vecdb2.similarity_search_with_score(query, k=60)
    return matches

def get_matches3(query):
    matches = vecdb3.similarity_search_with_score(query, k=60)


def inference(query,method=1):
    if method==1:
        matches = get_matches1(query)
    elif method==2:
        matches = get_matches2(query)
    else:
        matches = get_matches3(query)
    auth_counts = {}
    j_bucket = {}
    n_table = []
    a_table = []
    scores = [round(match[1].item(), 3) for match in matches]
    min_score = min(scores)
    max_score = max(scores)
    def normaliser(x): return round(1 - (x-min_score)/max_score, 3)
    for i, match in enumerate(matches):
        doc = match[0]
        score = round(normaliser(round(match[1].item(), 3)), 3)
        title = doc.metadata['title']
        author = doc.metadata['authors'][0].title()
        date = doc.metadata.get('date', 'None')
        link = doc.metadata.get('link', 'None')
        submitter = doc.metadata.get('submitter', 'None')
        # journal = doc.metadata.get('journal', 'None').strip()
        journal = doc.metadata['journal']
        if (journal is None or journal.strip() == ''):
            journal = 'None'
        else:
            journal = journal.strip()
        # For journals
        if journal not in j_bucket:
            j_bucket[journal] = score
        else:
            j_bucket[journal] += score

        # For authors
        record = [i+1,
                  score,
                  author,
                  title,
                  link,
                  date]
        if auth_counts.get(author, 0) < 2:
            n_table.append(record)
            if auth_counts.get(author, 0) == 0:
                auth_counts[author] = 1
            else:
                auth_counts[author] += 1

        # For abstracts
        record = [i+1,
                  title,
                  author,
                  submitter,
                  journal,
                  date,
                  link,
                  score
                  ]
        a_table.append(record)

    del j_bucket['None']
    j_table = sorted([[journal, round(score, 3)] for journal,
                     score in j_bucket.items()],
                     key=lambda x: x[1], reverse=True)
    j_table = [[i+1, item[0], item[1]] for i, item in enumerate(j_table)]
    j_output = gr.Dataframe.update(value=j_table, visible=True)
    n_output = gr.Dataframe.update(value=n_table, visible=True)
    a_output = gr.Dataframe.update(value=a_table, visible=True)

    return [a_output, j_output, n_output]

def inference1(query):
    return inference(query,1)

def inference2(query):
    return inference(query,2)

def inference3(query):
    return inference(query,3)

model1_name = "biodatlab/MIReAD-Neuro-Large"
model2_name = "biodatlab/MIReAD-Neuro-Contrastive"
model3_name = "biodatlab/SciBERT-Neuro-Contrastive"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
faiss_embedder1 = HuggingFaceEmbeddings(
    model_name=model1_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)
faiss_embedder2 = HuggingFaceEmbeddings(
    model_name=model2_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)
faiss_embedder3 = HuggingFaceEmbeddings(
    model_name=model3_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

vecdb1 = FAISS.load_local("nbdt_index", faiss_embedder1)
vecdb2 = FAISS.load_local("indexes", faiss_embedder2)
vecdb3 = FAISS.load_local("indexes/scibert_contr",faiss_embedder3)


with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# NBDT Recommendation Engine for Editors")
    gr.Markdown("NBDT Recommendation Engine for Editors is a tool for neuroscience authors/abstracts/journalsrecommendation built for NBDT journal editors. \
    It aims to help an editor to find similar reviewers, abstracts, and journals to a given submitted abstract.\
    To find a recommendation, paste a `title[SEP]abstract` or `abstract` in the text box below and click \"Find Matches\".\
    Then, you can hover to authors/abstracts/journals tab to find a suggested list.\
    The data in our current demo includes authors associated with the NBDT Journal. We will update the data monthly for an up-to-date publications.")

    abst = gr.Textbox(label="Abstract", lines=10)

    action_btn = gr.Button(value="Find Matches with Normal Model")
    action2_btn = gr.Button(value="Find Matches with MIReAD Contrastive Model")
    action3_btn = gr.Button(value="Find Matches with SciBERT Contrastive Model")

    with gr.Tab("Authors"):
        n_output = gr.Dataframe(
            headers=['No.', 'Score', 'Name', 'Title', 'Link', 'Date'],
            datatype=['number', 'number', 'str', 'str', 'str', 'str'],
            col_count=(6, "fixed"),
            wrap=True,
            visible=False
        )
    with gr.Tab("Abstracts"):
        a_output = gr.Dataframe(
            headers=['No.', 'Title', 'Author', 'Corresponding Author',
                     'Journal', 'Date', 'Link', 'Score'],
            datatype=['number', 'str', 'str', 'str',
                      'str', 'str', 'str', 'number'],
            col_count=(8, "fixed"),
            wrap=True,
            visible=False
        )
    with gr.Tab("Journals"):
        j_output = gr.Dataframe(
            headers=['No.', 'Name', 'Score'],
            datatype=['number', 'str', 'number'],
            col_count=(3, "fixed"),
            wrap=True,
            visible=False
        )

    action_btn.click(fn=inference1,
                     inputs=[
                         abst,
                     ],
                     outputs=[a_output, j_output, n_output],
                     api_name="neurojane")
    action2_btn.click(fn=inference2,
                     inputs=[
                         abst,
                     ],
                     outputs=[a_output, j_output, n_output],
                     api_name="neurojane")
    action3_btn.click(fn=inference3,
                     inputs=[
                         abst,
                     ],
                     outputs=[a_output, j_output, n_output],
                     api_name="neurojane")

demo.launch(debug=True)