File size: 6,509 Bytes
c5ff415
 
 
 
 
 
0f56fb9
 
c5ff415
 
0f56fb9
 
 
 
 
 
 
c5ff415
0f56fb9
 
 
 
 
 
 
 
aad244d
c5ff415
 
 
 
 
 
 
 
 
aad244d
c5ff415
7b3e2ee
c5ff415
 
 
540cb8a
 
aad244d
c71b079
540cb8a
 
c5ff415
 
 
 
 
 
 
 
 
 
 
 
 
aad244d
 
 
 
 
 
c5ff415
 
 
 
 
 
 
 
 
 
 
 
 
d10318a
aad244d
 
c5ff415
 
 
 
 
 
 
 
0f56fb9
 
 
 
 
 
 
 
 
c5ff415
0f56fb9
 
 
c5ff415
 
0f56fb9
 
 
 
 
 
 
 
 
 
 
 
c5ff415
 
 
 
0f56fb9
 
 
c5ff415
 
 
 
 
 
 
 
c3141d7
c5ff415
 
 
0f56fb9
 
 
c5ff415
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0f56fb9
 
 
 
 
 
 
 
 
 
 
 
 
c5ff415
 
 
 
 
 
cfca6ca
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
import gradio as gr
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
import torch


def get_matches1(query):
    matches = vecdb1.similarity_search_with_score(query, k=60)
    return matches

def get_matches2(query):
    matches = vecdb2.similarity_search_with_score(query, k=60)
    return matches

def get_matches3(query):
    matches = vecdb3.similarity_search_with_score(query, k=60)
    return matches


def inference(query,model=1):
    if model==1:
        matches = get_matches1(query)
    elif model==2:
        matches = get_matches2(query)
    else:
        matches = get_matches3(query)
    auth_counts = {}
    j_bucket = {}
    n_table = []
    a_table = []
    scores = [round(match[1].item(), 3) for match in matches]
    min_score = min(scores)
    max_score = max(scores)
    def normaliser(x): return round(1 - (x-min_score)/max_score, 3)
    for i, match in enumerate(matches):
        doc = match[0]
        score = round(normaliser(round(match[1].item(), 3)), 3)
        title = doc.metadata['title']
        author = doc.metadata['authors'][0].title()
        date = doc.metadata.get('date', 'None')
        link = doc.metadata.get('link', 'None')
        submitter = doc.metadata.get('submitter', 'None')
        # journal = doc.metadata.get('journal', 'None').strip()
        journal = doc.metadata['journal']
        if (journal is None or journal.strip() == ''):
            journal = 'None'
        else:
            journal = journal.strip()
        # For journals
        if journal not in j_bucket:
            j_bucket[journal] = score
        else:
            j_bucket[journal] += score

        # For authors
        record = [i+1,
                  score,
                  author,
                  title,
                  link,
                  date]
        if auth_counts.get(author, 0) < 2:
            n_table.append(record)
            if auth_counts.get(author, 0) == 0:
                auth_counts[author] = 1
            else:
                auth_counts[author] += 1

        # For abstracts
        record = [i+1,
                  title,
                  author,
                  submitter,
                  journal,
                  date,
                  link,
                  score
                  ]
        a_table.append(record)

    del j_bucket['None']
    j_table = sorted([[journal, round(score, 3)] for journal,
                     score in j_bucket.items()],
                     key=lambda x: x[1], reverse=True)
    j_table = [[i+1, item[0], item[1]] for i, item in enumerate(j_table)]
    j_output = gr.Dataframe.update(value=j_table, visible=True)
    n_output = gr.Dataframe.update(value=n_table, visible=True)
    a_output = gr.Dataframe.update(value=a_table, visible=True)

    return [a_output, j_output, n_output]

def inference1(query):
    return inference(query,1)

def inference2(query):
    return inference(query,2)

def inference3(query):
    return inference(query,3)
    

model1_name = "biodatlab/MIReAD-Neuro-Large"
model2_name = "biodatlab/MIReAD-Neuro-Contrastive"
model3_name = "biodatlab/SciBERT-Neuro-Contrastive"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
faiss_embedder1 = HuggingFaceEmbeddings(
    model_name=model1_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)
faiss_embedder2 = HuggingFaceEmbeddings(
    model_name=model2_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)
faiss_embedder3 = HuggingFaceEmbeddings(
    model_name=model3_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

vecdb1 = FAISS.load_local("miread_large", faiss_embedder1)
vecdb2 = FAISS.load_local("miread_contrastive", faiss_embedder2)
vecdb3 = FAISS.load_local("scibert_contrastive", faiss_embedder3)


with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# NBDT Recommendation Engine for Editors")
    gr.Markdown("NBDT Recommendation Engine for Editors is a tool for neuroscience authors/abstracts/journalsrecommendation built for NBDT journal editors. \
    It aims to help an editor to find similar reviewers, abstracts, and journals to a given submitted abstract.\
    To find a recommendation, paste a `title[SEP]abstract` or `abstract` in the text box below and click \"Find Matches\".\
    Then, you can hover to authors/abstracts/journals tab to find a suggested list.\
    The data in our current demo includes authors associated with the NBDT Journal. We will update the data monthly for an up-to-date publications.")

    abst = gr.Textbox(label="Abstract", lines=10)

    action1_btn = gr.Button(value="Find Matches with MIReAD-Neuro-Large")
    action2_btn = gr.Button(value="Find Matches with MIReAD-Neuro-Contrastive")
    action3_btn = gr.Button(value="Find Matches with SciBERT-Neuro-Contrastive")

    with gr.Tab("Authors"):
        n_output = gr.Dataframe(
            headers=['No.', 'Score', 'Name', 'Title', 'Link', 'Date'],
            datatype=['number', 'number', 'str', 'str', 'str', 'str'],
            col_count=(6, "fixed"),
            wrap=True,
            visible=False
        )
    with gr.Tab("Abstracts"):
        a_output = gr.Dataframe(
            headers=['No.', 'Title', 'Author', 'Corresponding Author',
                     'Journal', 'Date', 'Link', 'Score'],
            datatype=['number', 'str', 'str', 'str',
                      'str', 'str', 'str', 'number'],
            col_count=(8, "fixed"),
            wrap=True,
            visible=False
        )
    with gr.Tab("Journals"):
        j_output = gr.Dataframe(
            headers=['No.', 'Name', 'Score'],
            datatype=['number', 'str', 'number'],
            col_count=(3, "fixed"),
            wrap=True,
            visible=False
        )

    action_btn1.click(fn=inference1,
                     inputs=[
                         abst,
                     ],
                     outputs=[a_output, j_output, n_output],
                     api_name="neurojane")
    action_btn2.click(fn=inference2,
                     inputs=[
                         abst,
                     ],
                     outputs=[a_output, j_output, n_output],
                     api_name="neurojane")
    action_btn3.click(fn=inference3,
                     inputs=[
                         abst,
                     ],
                     outputs=[a_output, j_output, n_output],
                     api_name="neurojane")

demo.launch(debug=True)