import streamlit as st
import sparknlp
import os
import pandas as pd
from sparknlp.base import *
from sparknlp.annotator import *
from pyspark.ml import Pipeline
from sparknlp.pretrained import PretrainedPipeline
# Page configuration
st.set_page_config(
layout="wide",
page_title="Spark NLP Demos App",
initial_sidebar_state="auto"
)
# CSS for styling
st.markdown("""
""", unsafe_allow_html=True)
@st.cache_resource
def init_spark():
return sparknlp.start()
@st.cache_resource
def create_pipeline():
document_assembler = DocumentAssembler() \
.setInputCol("text") \
.setOutputCol("document")
tokenizer = Tokenizer() \
.setInputCols(["document"]) \
.setOutputCol("token")
postagger = PerceptronModel.pretrained("pos_anc", "en") \
.setInputCols(["document", "token"]) \
.setOutputCol("pos")
pipeline = Pipeline(stages=[document_assembler, tokenizer, postagger])
return pipeline
def fit_data(pipeline, data):
empty_df = spark.createDataFrame([['']]).toDF('text')
pipeline_model = pipeline.fit(empty_df)
model = LightPipeline(pipeline_model)
results = model.fullAnnotate(data)
return results
# Set up the page layout
st.markdown('
State-of-the-Art Part-of-Speech Tagging with Spark NLP
', unsafe_allow_html=True)
# Sidebar content
model_name = st.sidebar.selectbox(
"Choose the pretrained model",
['pos_anc'],
help="For more info about the models visit: https://sparknlp.org/models"
)
# Reference notebook link in sidebar
link = """
"""
st.sidebar.markdown('Reference notebook:')
st.sidebar.markdown(link, unsafe_allow_html=True)
# Load examples
examples = [
"Alice went to the market. She bought some fresh vegetables there. The tomatoes she purchased were particularly ripe.",
"Dr. Smith is a renowned surgeon. He has performed over a thousand successful operations. His colleagues respect him a lot.",
"The company announced a new product launch. It is expected to revolutionize the industry. The CEO was very excited about it.",
"Jennifer enjoys hiking. She goes to the mountains every weekend. Her favorite spot is the Blue Ridge Mountains.",
"The team won the championship. They celebrated their victory with a huge party. Their coach praised their hard work and dedication.",
"Michael is studying computer science. He finds artificial intelligence fascinating. His dream is to work at a leading tech company.",
"Tom is a skilled guitarist. He plays in a local band. His performances are always energetic and captivating."
]
# st.subheader("Automatically detect phrases expressing dates and normalize them with respect to a reference date.")
selected_text = st.selectbox("Select an example", examples)
custom_input = st.text_input("Try it with your own Sentence!")
text_to_analyze = custom_input if custom_input else selected_text
st.markdown('**Full example text**')
st.write(text_to_analyze)
# Initialize Spark and create pipeline
spark = init_spark()
pipeline = create_pipeline()
output = fit_data(pipeline, text_to_analyze)
# Display matched sentence
st.markdown("**Processed output:**")
results = {
'Token': [t.result for t in output[0]['token']],
'Begin': [p.begin for p in output[0]['pos']],
'End': [p.end for p in output[0]['pos']],
'POS': [p.result for p in output[0]['pos']]
}
# from annotated_text import annotated_text
# # Create annotated text
# annotated_tokens = []
# for token, pos in zip(results['Token'], results['POS']):
# annotated_tokens.append((token, pos.lower()))
# # Annotate the entire text with annotated tokens
# annotated_text(*annotated_tokens)
df = pd.DataFrame(results)
df.index += 1
st.dataframe(df)