Spaces:
Build error
Build error
File size: 4,911 Bytes
ae5152f 442e885 09ca6ba acdbc7f ae5152f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
# -*- coding: utf-8 -*-
"""
Created on Tue Nov 22 09:54:41 2022
@author: luol2
"""
import streamlit as st
import argparse
from src.nn_model import bioTag_CNN,bioTag_BERT,bioTag_Bioformer
from src.dic_ner import dic_ont
from src.tagging_text import bioTag
import os
import time
import json
import sys
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')
st.set_page_config(
page_title="PhenoTagger",
page_icon=":shark:",
# layout="wide",
initial_sidebar_state="expanded",
menu_items={
'Get Help': 'https://www.extremelycoolapp.com/help',
'Report a bug': "https://www.extremelycoolapp.com/bug",
'About': "# This is a header. This is an *extremely* cool app!"
}
)
st.title('PhenoTagger Demo')
# with st.spinner('Model is being loaded..'):
# print('load model done!')
with st.form(key="my_form"):
@st.cache(allow_output_mutation=True)
def load_model():
ontfiles={'dic_file':'./dict_new/noabb_lemma.dic',
'word_hpo_file':'./dict_new/word_id_map.json',
'hpo_word_file':'./dict_new/id_word_map.json'}
# if para_set['model_type']=='cnn':
# vocabfiles={'w2vfile':'../vocab/bio_embedding_intrinsic.d200',
# 'charfile':'../vocab/char.vocab',
# 'labelfile':'../dict_new/lable.vocab',
# 'posfile':'../vocab/pos.vocab'}
# modelfile='../models/cnn_p5n5_b128_95_hponew1.h5'
# elif para_set['model_type']=='bioformer':
vocabfiles={'labelfile':'./dict_new/lable.vocab',
'config_path':'./vocab/bioformer-cased-v1.0/bert_config.json',
'checkpoint_path':'./vocab/bioformer-cased-v1.0/bioformer-cased-v1.0-model.ckpt-2000000',
'vocab_path':'./vocab/bioformer-cased-v1.0/vocab.txt'}
modelfile='./vocab/bioformer_p5n5_b64_1e-5_95_hponew3.h5'
# else:
# print('Model type is wrong, please select cnn or bioformer.')
# sys.exit()
biotag_dic=dic_ont(ontfiles)
# if para_set['model_type']=='cnn':
# nn_model=bioTag_CNN(vocabfiles)
# nn_model.load_model(modelfile)
# elif para_set['model_type']=='bioformer':
nn_model=bioTag_Bioformer(vocabfiles)
session=nn_model.load_model(modelfile)
test_tag='1232'
return nn_model,biotag_dic,test_tag,session
#hyper-parameter
st.sidebar.header("Hyperparameter Settings")
sbform = st.sidebar.form("Hyper-paramiters")
# para_model=sbform.selectbox('Model', ['cnn', 'bioformer'])
para_overlap=sbform.selectbox('Return overlapping concepts', ['True', 'False'])
para_abbr=sbform.selectbox('Identify abbreviations', ['True', 'False'])
para_threshold = sbform.slider('Threshold:', min_value=0.5, max_value=0.95, value=0.95, step=0.05)
sbform.form_submit_button("Setting")
st.write('parameters:', para_overlap,para_abbr,para_threshold)
nn_model,biotag_dic,test_tag,session=load_model()
input_text = st.text_area(
"Paste your text below (max 500 words)",
height=510,
)
MAX_WORDS = 500
import re
res = len(re.findall(r"\w+", input_text))
if res > MAX_WORDS:
st.warning(
"⚠️ Your text contains "
+ str(res)
+ " words."
+ " Only the first 500 words will be reviewed. Stay tuned as increased allowance is coming! 😊"
)
input_text = input_text[:MAX_WORDS]
submit_button = st.form_submit_button(label="✨ Get me the data!")
if para_overlap=='True':
para_overlap=True
else:
para_overlap=False
if para_abbr=='True':
para_abbr=True
else:
para_abbr=False
para_set={
#model_type':para_model, # cnn or bioformer
'onlyLongest':para_overlap, # False: return overlap concepts, True only longgest
'abbrRecog':para_abbr,# False: don't identify abbr, True: identify abbr
'ML_Threshold':para_threshold,# the Threshold of deep learning model
}
if not submit_button:
st.stop()
st.markdown(f"""**Results:**\n""")
# print('dic...........:',biotag_dic.keys())
print('........:',test_tag)
print('........!!!!!!:',input_text)
print('...input:',input_text)
tag_result=bioTag(session,input_text,biotag_dic,nn_model,onlyLongest=para_set['onlyLongest'], abbrRecog=para_set['abbrRecog'],Threshold=para_set['ML_Threshold'])
for ele in tag_result:
start = ele[0]
last = ele[1]
mention = input_text[int(ele[0]):int(ele[1])]
type='Phenotype'
id=ele[2]
score=ele[3]
output=start+"\t"+last+"\t"+mention+"\t"+id+'\t'+score+"\n"
st.info(output)
|