atrytone commited on
Commit
1a5ec75
·
1 Parent(s): 35d198b

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +142 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from langchain.vectorstores import FAISS
3
+ from langchain.embeddings import HuggingFaceEmbeddings
4
+
5
+ import torch
6
+
7
+
8
+ def create_miread_embed(sents, bundle):
9
+ tokenizer = bundle[0]
10
+ model = bundle[1]
11
+ model.cpu()
12
+ tokens = tokenizer(sents,
13
+ max_length=512,
14
+ padding=True,
15
+ truncation=True,
16
+ return_tensors="pt"
17
+ )
18
+ device = torch.device('cpu')
19
+ tokens = tokens.to(device)
20
+ with torch.no_grad():
21
+ out = model.bert(**tokens)
22
+ feature = out.last_hidden_state[:, 0, :]
23
+ return feature.cpu()
24
+
25
+
26
+ def get_matches(query, db, k):
27
+ matches = db.similarity_search_with_score(query, k=k)
28
+ return matches
29
+
30
+
31
+ def inference(query, db, k=30):
32
+ matches = get_matches(query, db, k)
33
+ j_bucket = {}
34
+ n_table = []
35
+ a_table = []
36
+ for i, match in enumerate(matches):
37
+ doc = match[0]
38
+ score = match[1]
39
+ title = doc.metadata['title']
40
+ author = eval(doc.metadata['authors'])[0]
41
+ date = doc.metadata['date']
42
+ link = doc.metadata['link']
43
+ submitter = doc.metadata['submitter']
44
+ journal = doc.metadata['journal']
45
+
46
+ # For journals
47
+ if journal not in j_bucket:
48
+ j_bucket[journal] = score
49
+ else:
50
+ j_bucket[journal] += score
51
+
52
+ # For authors
53
+ record = [i+1,
54
+ round(score, 3),
55
+ author,
56
+ title,
57
+ link,
58
+ date]
59
+ n_table.append(record)
60
+
61
+ # For abstracts
62
+ record = [i+1,
63
+ title,
64
+ author,
65
+ submitter,
66
+ journal,
67
+ date,
68
+ link,
69
+ round(score, 3)
70
+ ]
71
+ a_table.append(record)
72
+ j_table = sorted([[journal, round(score, 3)] for journal,
73
+ score in j_bucket.items()], key=lambda x: x[1], reverse=True)
74
+ j_table = [[i+1, item[0], round(item[1], 3)]
75
+ for i, item in enumerate(j_table)]
76
+ j_output = gr.Dataframe.update(value=j_table, visible=True)
77
+ n_output = gr.Dataframe.update(value=n_table, visible=True)
78
+ a_output = gr.Dataframe.update(value=a_table, visible=True)
79
+ return [a_output, j_output, n_output]
80
+
81
+
82
+ model_name = "biodatlab/MIReAD-Neuro"
83
+ model_kwargs = {'device': 'cuda'}
84
+ encode_kwargs = {'normalize_embeddings': False}
85
+ faiss_embedder = HuggingFaceEmbeddings(
86
+ model_name=model_name,
87
+ model_kwargs=model_kwargs,
88
+ encode_kwargs=encode_kwargs
89
+ )
90
+
91
+ vecdb = FAISS.load_local("faiss_index", faiss_embedder)
92
+
93
+
94
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
95
+ gr.Markdown("# NBDT Recommendation Engine for Editors")
96
+ gr.Markdown("NBDT Recommendation Engine for Editors is a tool for neuroscience authors/abstracts/journalsrecommendation built for NBDT journal editors. \
97
+ It aims to help an editor to find similar reviewers, abstracts, and journals to a given submitted abstract.\
98
+ To find a recommendation, paste a `title[SEP]abstract` or `abstract` in the text box below and click \"Find Matches\".\
99
+ Then, you can hover to authors/abstracts/journals tab to find a suggested list.\
100
+ The data in our current demo is selected from 2018 to 2022. We will update the data monthly for an up-to-date publications.")
101
+
102
+ abst = gr.Textbox(label="Abstract", lines=10)
103
+
104
+ k = gr.Slider(1, 100, step=1, value=50,
105
+ label="Number of matches to consider")
106
+
107
+ action_btn = gr.Button(value="Find Matches")
108
+
109
+ with gr.Tab("Authors"):
110
+ n_output = gr.Dataframe(
111
+ headers=['No.', 'Score', 'Name', 'Title', 'Link', 'Date'],
112
+ datatype=['number', 'str', 'str', 'number', 'str', 'str'],
113
+ col_count=(6, "fixed"),
114
+ wrap=True,
115
+ visible=False
116
+ )
117
+ with gr.Tab("Abstracts"):
118
+ a_output = gr.Dataframe(
119
+ headers=['No.', 'Title', 'Author', 'Submitter',
120
+ 'Journal', 'Date', 'Link', 'Score'],
121
+ datatype=['number', 'str', 'str', 'str', 'number'],
122
+ col_count=(8, "fixed"),
123
+ wrap=True,
124
+ visible=False
125
+ )
126
+ with gr.Tab("Journals"):
127
+ j_output = gr.Dataframe(
128
+ headers=['No.', 'Name', 'Score'],
129
+ datatype=['number', 'str', 'number'],
130
+ col_count=(3, "fixed"),
131
+ visible=False
132
+ )
133
+ action_btn.click(fn=inference,
134
+ inputs=[
135
+ abst,
136
+ k,
137
+ # modes,
138
+ ],
139
+ outputs=[a_output, j_output, n_output],
140
+ api_name="neurojane")
141
+
142
+ demo.launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ langchain
2
+ torch
3
+ faiss-cpu