aemin commited on
Commit
f82b177
·
1 Parent(s): 450afd2

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +146 -0
app.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ st.set_page_config(
3
+ layout="centered", # Can be "centered" or "wide". In the future also "dashboard", etc.
4
+ initial_sidebar_state="auto", # Can be "auto", "expanded", "collapsed"
5
+ page_title='Extractive Summarization', # String or None. Strings get appended with "• Streamlit".
6
+ page_icon='./favicon.png', # String, anything supported by st.image, or None.
7
+ )
8
+ import pandas as pd
9
+ import numpy as np
10
+ import os
11
+ import sys
12
+ sys.path.append(os.path.abspath('./'))
13
+ import streamlit_apps_config as config
14
+ from streamlit_ner_output import show_html2, jsl_display_annotations, get_color
15
+
16
+ import sparknlp
17
+ from sparknlp.base import *
18
+ from sparknlp.annotator import *
19
+ from pyspark.sql import functions as F
20
+ from sparknlp_display import NerVisualizer
21
+ from pyspark.ml import Pipeline
22
+ from pyspark.sql.types import StringType
23
+ spark= sparknlp.start()
24
+
25
+ ## Marking down NER Style
26
+ st.markdown(config.STYLE_CONFIG, unsafe_allow_html=True)
27
+
28
+ root_path = config.project_path
29
+
30
+ ########## To Remove the Main Menu Hamburger ########
31
+
32
+ hide_menu_style = """
33
+ <style>
34
+ #MainMenu {visibility: hidden;}
35
+ </style>
36
+ """
37
+ st.markdown(hide_menu_style, unsafe_allow_html=True)
38
+
39
+ ########## Side Bar ########
40
+
41
+ ## loading logo(newer version with href)
42
+ import base64
43
+ @st.cache(allow_output_mutation=True)
44
+ def get_base64_of_bin_file(bin_file):
45
+ with open(bin_file, 'rb') as f:
46
+ data = f.read()
47
+ return base64.b64encode(data).decode()
48
+
49
+ @st.cache(allow_output_mutation=True)
50
+ def get_img_with_href(local_img_path, target_url):
51
+ img_format = os.path.splitext(local_img_path)[-1].replace('.', '')
52
+ bin_str = get_base64_of_bin_file(local_img_path)
53
+ html_code = f'''
54
+ <a href="{target_url}">
55
+ <img height="90%" width="90%" src="data:image/{img_format};base64,{bin_str}" />
56
+ </a>'''
57
+ return html_code
58
+
59
+ logo_html = get_img_with_href('./jsl-logo.png', 'https://www.johnsnowlabs.com/')
60
+ st.sidebar.markdown(logo_html, unsafe_allow_html=True)
61
+
62
+
63
+ #sidebar info
64
+ model_name= ["nerdl_fewnerd_100d"]
65
+ st.sidebar.title("Pretrained model to test")
66
+ selected_model = st.sidebar.selectbox("", model_name)
67
+
68
+ ######## Main Page #########
69
+ app_title= "Detect up to 8 entity types in general domain texts"
70
+ app_description= "Named Entity Recognition model aimed to detect up to 8 entity types from general domain texts. This model was trained on the Few-NERD/inter public dataset using Spark NLP, and is available in Spark NLP Models hub (https://nlp.johnsnowlabs.com/models)"
71
+ st.title(app_title)
72
+ st.markdown("<h2>"+app_description+"</h2>" , unsafe_allow_html=True)
73
+ if selected_model == "nerdl_fewnerd_100d":
74
+ st.markdown("**`PERSON`** **,** **`ORGANIZATION`** **,** **`LOCATION`** **,** **`ART`** **,** **`BUILDING`** **,** **`PRODUCT`** **,** **`EVENT`** **,** **`OTHER`**", unsafe_allow_html=True)
75
+
76
+ st.subheader("")
77
+
78
+
79
+ #### Running model and creating pipeline
80
+ st.cache(allow_output_mutation=True)
81
+ def get_pipeline(text):
82
+ documentAssembler = DocumentAssembler()\
83
+ .setInputCol("text")\
84
+ .setOutputCol("document")
85
+
86
+ sentenceDetector= SentenceDetector()\
87
+ .setInputCols(["document"])\
88
+ .setOutputCol("sentence")
89
+
90
+ tokenizer = Tokenizer()\
91
+ .setInputCols(["sentence"])\
92
+ .setOutputCol("token")
93
+
94
+ embeddings= WordEmbeddingsModel.pretrained("glove_100d")\
95
+ .setInputCols(["sentence", "token"])\
96
+ .setOutputCol("embeddings")
97
+
98
+
99
+ ner= NerDLModel.pretrained("nerdl_fewnerd_100d")\
100
+ .setInputCols(["document", "token", "embeddings"])\
101
+ .setOutputCol("ner")
102
+
103
+
104
+ ner_converter= NerConverter()\
105
+ .setInputCols(["sentence", "token", "ner"])\
106
+ .setOutputCol("ner_chunk")
107
+
108
+
109
+ pipeline = Pipeline(
110
+ stages = [
111
+ documentAssembler,
112
+ sentenceDetector,
113
+ tokenizer,
114
+ embeddings,
115
+ ner,
116
+ ner_converter
117
+ ])
118
+
119
+ empty_df = spark.createDataFrame([[""]]).toDF("text")
120
+ pipeline_model = pipeline.fit(empty_df)
121
+
122
+ text_df= spark.createDataFrame(pd.DataFrame({"text": [text]}))
123
+ result= pipeline_model.transform(text_df).toPandas()
124
+
125
+ return result
126
+
127
+
128
+
129
+ text= st.text_input("Type here your text and press enter to run:")
130
+
131
+ result= get_pipeline(text)
132
+
133
+ #Displaying Ner Visualization
134
+ df= pd.DataFrame({"ner_chunk": result["ner_chunk"].iloc[0]})
135
+
136
+ labels_set = set()
137
+ for i in df['ner_chunk'].values:
138
+ labels_set.add(i[4]['entity'])
139
+ labels_set = list(labels_set)
140
+
141
+ labels = st.sidebar.multiselect(
142
+ "NER Labels", options=labels_set, default=list(labels_set)
143
+ )
144
+
145
+ show_html2(text, df, labels, "Text annotated with identified Named Entities")
146
+