abdullahmubeen10 commited on
Commit
32f711b
·
verified ·
1 Parent(s): ef1c428

Update Demo.py

Browse files
Files changed (1) hide show
  1. Demo.py +127 -117
Demo.py CHANGED
@@ -1,117 +1,127 @@
1
- import streamlit as st
2
- import sparknlp
3
- import os
4
- import pandas as pd
5
-
6
- from sparknlp.base import *
7
- from sparknlp.annotator import *
8
- from pyspark.ml import Pipeline
9
- from sparknlp.pretrained import PretrainedPipeline
10
-
11
- # Page configuration
12
- st.set_page_config(
13
- layout="wide",
14
- page_title="Spark NLP Demos App",
15
- initial_sidebar_state="auto"
16
- )
17
-
18
- # CSS for styling
19
- st.markdown("""
20
- <style>
21
- .main-title {
22
- font-size: 36px;
23
- color: #4A90E2;
24
- font-weight: bold;
25
- text-align: center;
26
- }
27
- .section p, .section ul {
28
- color: #666666;
29
- }
30
- </style>
31
- """, unsafe_allow_html=True)
32
-
33
- @st.cache_resource
34
- def init_spark():
35
- return sparknlp.start()
36
-
37
- @st.cache_resource
38
- def create_pipeline():
39
- document_assembler = DocumentAssembler() \
40
- .setInputCol("text") \
41
- .setOutputCol("document")
42
-
43
- tokenizer = Tokenizer() \
44
- .setInputCols(["document"]) \
45
- .setOutputCol("token")
46
-
47
- postagger = PerceptronModel.pretrained("pos_anc", "en") \
48
- .setInputCols(["document", "token"]) \
49
- .setOutputCol("pos")
50
-
51
- pipeline = Pipeline(stages=[document_assembler, tokenizer, postagger])
52
- return pipeline
53
-
54
- def fit_data(pipeline, data):
55
- empty_df = spark.createDataFrame([['']]).toDF('text')
56
- pipeline_model = pipeline.fit(empty_df)
57
- model = LightPipeline(pipeline_model)
58
- results = model.fullAnnotate(data)
59
- return results
60
-
61
- # Set up the page layout
62
- st.markdown('<div class="main-title">State-of-the-Art Part-of-Speech Tagging with Spark NLP</div>', unsafe_allow_html=True)
63
-
64
- # Sidebar content
65
- model_name = st.sidebar.selectbox(
66
- "Choose the pretrained model",
67
- ['pos_anc'],
68
- help="For more info about the models visit: https://sparknlp.org/models"
69
- )
70
-
71
- # Reference notebook link in sidebar
72
- link = """
73
- <a href="https://github.com/JohnSnowLabs/spark-nlp/blob/master/examples/python/annotation/text/english/coreference-resolution/Coreference_Resolution_SpanBertCorefModel.ipynb#L117">
74
- <img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>
75
- </a>
76
- """
77
- st.sidebar.markdown('Reference notebook:')
78
- st.sidebar.markdown(link, unsafe_allow_html=True)
79
-
80
- # Load examples
81
- examples = [
82
- "Alice went to the market. She bought some fresh vegetables there. The tomatoes she purchased were particularly ripe.",
83
- "Dr. Smith is a renowned surgeon. He has performed over a thousand successful operations. His colleagues respect him a lot.",
84
- "The company announced a new product launch. It is expected to revolutionize the industry. The CEO was very excited about it.",
85
- "Jennifer enjoys hiking. She goes to the mountains every weekend. Her favorite spot is the Blue Ridge Mountains.",
86
- "The team won the championship. They celebrated their victory with a huge party. Their coach praised their hard work and dedication.",
87
- "Michael is studying computer science. He finds artificial intelligence fascinating. His dream is to work at a leading tech company.",
88
- "Tom is a skilled guitarist. He plays in a local band. His performances are always energetic and captivating."
89
- ]
90
-
91
- # st.subheader("Automatically detect phrases expressing dates and normalize them with respect to a reference date.")
92
- selected_text = st.selectbox("Select an example", examples)
93
- custom_input = st.text_input("Try it with your own Sentence!")
94
-
95
- text_to_analyze = custom_input if custom_input else selected_text
96
-
97
- st.subheader('Full example text')
98
- st.write(text_to_analyze)
99
-
100
- # Initialize Spark and create pipeline
101
- spark = init_spark()
102
- pipeline = create_pipeline()
103
- output = fit_data(pipeline, text_to_analyze)
104
-
105
- # Display matched sentence
106
- st.subheader("Processed output:")
107
-
108
- results = {
109
- 'Token': [t.result for t in output[0]['token']],
110
- 'Begin': [p.begin for p in output[0]['pos']],
111
- 'End': [p.end for p in output[0]['pos']],
112
- 'POS': [p.result for p in output[0]['pos']]
113
- }
114
-
115
- df = pd.DataFrame(results)
116
- df.index += 1
117
- st.dataframe(df)
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import sparknlp
3
+ import os
4
+ import pandas as pd
5
+
6
+ from sparknlp.base import *
7
+ from sparknlp.annotator import *
8
+ from pyspark.ml import Pipeline
9
+ from sparknlp.pretrained import PretrainedPipeline
10
+
11
+ # Page configuration
12
+ st.set_page_config(
13
+ layout="wide",
14
+ page_title="Spark NLP Demos App",
15
+ initial_sidebar_state="auto"
16
+ )
17
+
18
+ # CSS for styling
19
+ st.markdown("""
20
+ <style>
21
+ .main-title {
22
+ font-size: 36px;
23
+ color: #4A90E2;
24
+ font-weight: bold;
25
+ text-align: center;
26
+ }
27
+ .section p, .section ul {
28
+ color: #666666;
29
+ }
30
+ </style>
31
+ """, unsafe_allow_html=True)
32
+
33
+ @st.cache_resource
34
+ def init_spark():
35
+ return sparknlp.start()
36
+
37
+ @st.cache_resource
38
+ def create_pipeline():
39
+ document_assembler = DocumentAssembler() \
40
+ .setInputCol("text") \
41
+ .setOutputCol("document")
42
+
43
+ tokenizer = Tokenizer() \
44
+ .setInputCols(["document"]) \
45
+ .setOutputCol("token")
46
+
47
+ postagger = PerceptronModel.pretrained("pos_anc", "en") \
48
+ .setInputCols(["document", "token"]) \
49
+ .setOutputCol("pos")
50
+
51
+ pipeline = Pipeline(stages=[document_assembler, tokenizer, postagger])
52
+ return pipeline
53
+
54
+ def fit_data(pipeline, data):
55
+ empty_df = spark.createDataFrame([['']]).toDF('text')
56
+ pipeline_model = pipeline.fit(empty_df)
57
+ model = LightPipeline(pipeline_model)
58
+ results = model.fullAnnotate(data)
59
+ return results
60
+
61
+ # Set up the page layout
62
+ st.markdown('<div class="main-title">State-of-the-Art Part-of-Speech Tagging with Spark NLP</div>', unsafe_allow_html=True)
63
+
64
+ # Sidebar content
65
+ model_name = st.sidebar.selectbox(
66
+ "Choose the pretrained model",
67
+ ['pos_anc'],
68
+ help="For more info about the models visit: https://sparknlp.org/models"
69
+ )
70
+
71
+ # Reference notebook link in sidebar
72
+ link = """
73
+ <a href="https://github.com/JohnSnowLabs/spark-nlp/blob/master/examples/python/annotation/text/english/coreference-resolution/Coreference_Resolution_SpanBertCorefModel.ipynb#L117">
74
+ <img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>
75
+ </a>
76
+ """
77
+ st.sidebar.markdown('Reference notebook:')
78
+ st.sidebar.markdown(link, unsafe_allow_html=True)
79
+
80
+ # Load examples
81
+ examples = [
82
+ "Alice went to the market. She bought some fresh vegetables there. The tomatoes she purchased were particularly ripe.",
83
+ "Dr. Smith is a renowned surgeon. He has performed over a thousand successful operations. His colleagues respect him a lot.",
84
+ "The company announced a new product launch. It is expected to revolutionize the industry. The CEO was very excited about it.",
85
+ "Jennifer enjoys hiking. She goes to the mountains every weekend. Her favorite spot is the Blue Ridge Mountains.",
86
+ "The team won the championship. They celebrated their victory with a huge party. Their coach praised their hard work and dedication.",
87
+ "Michael is studying computer science. He finds artificial intelligence fascinating. His dream is to work at a leading tech company.",
88
+ "Tom is a skilled guitarist. He plays in a local band. His performances are always energetic and captivating."
89
+ ]
90
+
91
+ # st.subheader("Automatically detect phrases expressing dates and normalize them with respect to a reference date.")
92
+ selected_text = st.selectbox("Select an example", examples)
93
+ custom_input = st.text_input("Try it with your own Sentence!")
94
+
95
+ text_to_analyze = custom_input if custom_input else selected_text
96
+
97
+ st.subheader('Full example text')
98
+ st.write(text_to_analyze)
99
+
100
+ # Initialize Spark and create pipeline
101
+ spark = init_spark()
102
+ pipeline = create_pipeline()
103
+ output = fit_data(pipeline, text_to_analyze)
104
+
105
+ # Display matched sentence
106
+ st.subheader("Processed output:")
107
+
108
+ results = {
109
+ 'Token': [t.result for t in output[0]['token']],
110
+ 'Begin': [p.begin for p in output[0]['pos']],
111
+ 'End': [p.end for p in output[0]['pos']],
112
+ 'POS': [p.result for p in output[0]['pos']]
113
+ }
114
+
115
+ df = pd.DataFrame(results)
116
+ df.index += 1
117
+ st.dataframe(df)
118
+
119
+ from annotated_text import annotated_text
120
+
121
+ # Create annotated text
122
+ annotated_tokens = []
123
+ for token, pos in zip(results['Token'], results['POS']):
124
+ annotated_tokens.append((token, pos.lower()))
125
+
126
+ # Annotate the entire text with annotated tokens
127
+ annotated_text(*annotated_tokens)