lingbionlp commited on
Commit
d0e4d54
·
1 Parent(s): 16a0bfc

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -154
app.py DELETED
@@ -1,154 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- """
3
- Created on Tue Nov 22 09:54:41 2022
4
-
5
- @author: luol2
6
- """
7
-
8
-
9
-
10
- import streamlit as st
11
- import argparse
12
- from src.nn_model import bioTag_CNN,bioTag_BERT,bioTag_Bioformer
13
- from src.dic_ner import dic_ont
14
- from src.tagging_text import bioTag
15
- import os
16
- import time
17
- import json
18
- import sys
19
-
20
-
21
- st.set_page_config(
22
- page_title="PhenoTagger",
23
- page_icon=":shark:",
24
- # layout="wide",
25
- initial_sidebar_state="expanded",
26
- menu_items={
27
- 'Get Help': 'https://www.extremelycoolapp.com/help',
28
- 'Report a bug': "https://www.extremelycoolapp.com/bug",
29
- 'About': "# This is a header. This is an *extremely* cool app!"
30
- }
31
- )
32
- st.title('PhenoTagger Demo')
33
-
34
-
35
-
36
-
37
-
38
-
39
-
40
- # with st.spinner('Model is being loaded..'):
41
-
42
- # print('load model done!')
43
-
44
-
45
-
46
-
47
- with st.form(key="my_form"):
48
-
49
- @st.cache(allow_output_mutation=True)
50
- def load_model():
51
- ontfiles={'dic_file':'./dict_new/noabb_lemma.dic',
52
- 'word_hpo_file':'./dict_new/word_id_map.json',
53
- 'hpo_word_file':'./dict_new/id_word_map.json'}
54
-
55
- # if para_set['model_type']=='cnn':
56
- # vocabfiles={'w2vfile':'../vocab/bio_embedding_intrinsic.d200',
57
- # 'charfile':'../vocab/char.vocab',
58
- # 'labelfile':'../dict_new/lable.vocab',
59
- # 'posfile':'../vocab/pos.vocab'}
60
- # modelfile='../models/cnn_p5n5_b128_95_hponew1.h5'
61
-
62
- # elif para_set['model_type']=='bioformer':
63
- vocabfiles={'labelfile':'./dict_new/lable.vocab',
64
- 'config_path':'./vocab/bioformer-cased-v1.0/bert_config.json',
65
- 'checkpoint_path':'./vocab/bioformer-cased-v1.0/bioformer-cased-v1.0-model.ckpt-2000000',
66
- 'vocab_path':'./vocab/bioformer-cased-v1.0/vocab.txt'}
67
- modelfile='./vocab/bioformer_p5n5_b64_1e-5_95_hponew3.h5'
68
- # else:
69
- # print('Model type is wrong, please select cnn or bioformer.')
70
- # sys.exit()
71
-
72
-
73
- biotag_dic=dic_ont(ontfiles)
74
-
75
- # if para_set['model_type']=='cnn':
76
- # nn_model=bioTag_CNN(vocabfiles)
77
- # nn_model.load_model(modelfile)
78
- # elif para_set['model_type']=='bioformer':
79
- nn_model=bioTag_Bioformer(vocabfiles)
80
- session=nn_model.load_model(modelfile)
81
- test_tag='1232'
82
- return nn_model,biotag_dic,test_tag,session
83
-
84
-
85
- #hyper-parameter
86
- st.sidebar.header("Hyperparameter Settings")
87
- sbform = st.sidebar.form("Hyper-paramiters")
88
- # para_model=sbform.selectbox('Model', ['cnn', 'bioformer'])
89
- para_overlap=sbform.selectbox('Return overlapping concepts', ['True', 'False'])
90
- para_abbr=sbform.selectbox('Identify abbreviations', ['True', 'False'])
91
- para_threshold = sbform.slider('Threshold:', min_value=0.5, max_value=0.95, value=0.95, step=0.05)
92
- sbform.form_submit_button("Setting")
93
-
94
- st.write('parameters:', para_overlap,para_abbr,para_threshold)
95
- nn_model,biotag_dic,test_tag,session=load_model()
96
-
97
-
98
- input_text = st.text_area(
99
- "Paste your text below (max 500 words)",
100
- height=510,
101
- )
102
-
103
- MAX_WORDS = 500
104
- import re
105
- res = len(re.findall(r"\w+", input_text))
106
- if res > MAX_WORDS:
107
- st.warning(
108
- "⚠️ Your text contains "
109
- + str(res)
110
- + " words."
111
- + " Only the first 500 words will be reviewed. Stay tuned as increased allowance is coming! 😊"
112
- )
113
-
114
- input_text = input_text[:MAX_WORDS]
115
-
116
- submit_button = st.form_submit_button(label="✨ Get me the data!")
117
-
118
- if para_overlap=='True':
119
- para_overlap=True
120
- else:
121
- para_overlap=False
122
- if para_abbr=='True':
123
- para_abbr=True
124
- else:
125
- para_abbr=False
126
- para_set={
127
- #model_type':para_model, # cnn or bioformer
128
- 'onlyLongest':para_overlap, # False: return overlap concepts, True only longgest
129
- 'abbrRecog':para_abbr,# False: don't identify abbr, True: identify abbr
130
- 'ML_Threshold':para_threshold,# the Threshold of deep learning model
131
- }
132
-
133
-
134
-
135
- if not submit_button:
136
- st.stop()
137
-
138
-
139
- st.markdown(f"""**Results:**\n""")
140
- # print('dic...........:',biotag_dic.keys())
141
- print('........:',test_tag)
142
- print('........!!!!!!:',input_text)
143
- print('...input:',input_text)
144
- tag_result=bioTag(session,input_text,biotag_dic,nn_model,onlyLongest=para_set['onlyLongest'], abbrRecog=para_set['abbrRecog'],Threshold=para_set['ML_Threshold'])
145
- for ele in tag_result:
146
- start = ele[0]
147
- last = ele[1]
148
- mention = input_text[int(ele[0]):int(ele[1])]
149
- type='Phenotype'
150
- id=ele[2]
151
- score=ele[3]
152
- output=start+"\t"+last+"\t"+mention+"\t"+id+"\n"
153
- st.info(output)
154
-