import streamlit as st import sparknlp import os import pandas as pd from sparknlp.base import * from sparknlp.annotator import * from pyspark.ml import Pipeline from sparknlp.pretrained import PretrainedPipeline # Page configuration st.set_page_config( layout="wide", page_title="Spark NLP Demos App", initial_sidebar_state="auto" ) # CSS for styling st.markdown(""" """, unsafe_allow_html=True) @st.cache_resource def init_spark(): return sparknlp.start() @st.cache_resource def create_pipeline(): document_assembler = DocumentAssembler() \ .setInputCol("text") \ .setOutputCol("document") tokenizer = Tokenizer() \ .setInputCols(["document"]) \ .setOutputCol("token") postagger = PerceptronModel.pretrained("pos_anc", "en") \ .setInputCols(["document", "token"]) \ .setOutputCol("pos") pipeline = Pipeline(stages=[document_assembler, tokenizer, postagger]) return pipeline def fit_data(pipeline, data): empty_df = spark.createDataFrame([['']]).toDF('text') pipeline_model = pipeline.fit(empty_df) model = LightPipeline(pipeline_model) results = model.fullAnnotate(data) return results # Set up the page layout st.markdown('
State-of-the-Art Part-of-Speech Tagging with Spark NLP
', unsafe_allow_html=True) # Sidebar content model_name = st.sidebar.selectbox( "Choose the pretrained model", ['pos_anc'], help="For more info about the models visit: https://sparknlp.org/models" ) # Reference notebook link in sidebar link = """ Open In Colab """ st.sidebar.markdown('Reference notebook:') st.sidebar.markdown(link, unsafe_allow_html=True) # Load examples examples = [ "Alice went to the market. She bought some fresh vegetables there. The tomatoes she purchased were particularly ripe.", "Dr. Smith is a renowned surgeon. He has performed over a thousand successful operations. His colleagues respect him a lot.", "The company announced a new product launch. It is expected to revolutionize the industry. The CEO was very excited about it.", "Jennifer enjoys hiking. She goes to the mountains every weekend. Her favorite spot is the Blue Ridge Mountains.", "The team won the championship. They celebrated their victory with a huge party. Their coach praised their hard work and dedication.", "Michael is studying computer science. He finds artificial intelligence fascinating. His dream is to work at a leading tech company.", "Tom is a skilled guitarist. He plays in a local band. His performances are always energetic and captivating." ] # st.subheader("Automatically detect phrases expressing dates and normalize them with respect to a reference date.") selected_text = st.selectbox("Select an example", examples) custom_input = st.text_input("Try it with your own Sentence!") text_to_analyze = custom_input if custom_input else selected_text st.markdown('**Full example text**') st.write(text_to_analyze) # Initialize Spark and create pipeline spark = init_spark() pipeline = create_pipeline() output = fit_data(pipeline, text_to_analyze) # Display matched sentence st.markdown("**Processed output:**") results = { 'Token': [t.result for t in output[0]['token']], 'Begin': [p.begin for p in output[0]['pos']], 'End': [p.end for p in output[0]['pos']], 'POS': [p.result for p in output[0]['pos']] } # from annotated_text import annotated_text # # Create annotated text # annotated_tokens = [] # for token, pos in zip(results['Token'], results['POS']): # annotated_tokens.append((token, pos.lower())) # # Annotate the entire text with annotated tokens # annotated_text(*annotated_tokens) df = pd.DataFrame(results) df.index += 1 st.dataframe(df)