Spaces:

ProfessorLeVesseur
/

Kaleidoscope

Sleeping

File size: 15,533 Bytes

6c9d6da
 
 
 
 
 
 
 
 
 
9cc96e3
 
6c9d6da
 
 
 
 
 
 
 
 
8f06cb3
6c9d6da

#------------------------------------------------------------------------
# Import Modules
#------------------------------------------------------------------------

import streamlit as st
import spacy
import string
from annotated_text import annotated_text
from PIL import Image

spacy.cli.download("en_core_web_sm")  # Download and install the model

# Load the English NLP model
nlp = spacy.load("en_core_web_sm")

#------------------------------------------------------------------------
# Configurations
#------------------------------------------------------------------------

# Streamlit page setup
# icon = Image.open("MTSS.ai_Icon.png")
icon = Image.open("MTSS.ai_Icon.png")
st.set_page_config(
    page_title="Kaleidoscope | Text Annotation", 
    page_icon=icon,
    layout="centered", 
    initial_sidebar_state="auto",
    menu_items={
        'About': "### *This application was created by*  \n### LeVesseur Ph.D | MTSS.ai"
    }
)

#------------------------------------------------------------------------
# Header
#------------------------------------------------------------------------

# st.image('MTSS.ai_Logo.png', width=300)

st.title('MTSS:grey[.ai]')
st.header('Kaleidoscope:grey[ | Parts of Speech Annotation]')

#------------------------------------------------------------------------
# Sidebar
#------------------------------------------------------------------------    

contact = st.sidebar.toggle('Handmade by  \n**LeVesseur** :grey[ PhD]  \n| :grey[MTSS.ai]')
if contact:
    st.sidebar.write('Inquiries: [[email protected]](mailto:[email protected])  \nProfile: [levesseur.com](http://levesseur.com)  \nCheck out: [InkQA | Dynamic PDFs](http://www.inkqa.com)') 

# Color options
colors = {
    "Green (DAF1E7)": "#DAF1E7",
    "Blue (BDE5FF)": "#BDE5FF",
    "Navy (D1DBE9)": "#D1DBE9",
    "Teal (D6EAED)": "#D6EAED",
    "Iceburg (E4EEF6)": "#E4EEF6",
    "Vermillion (F6DCDD)": "#F6DCDD",
}

with st.sidebar:
    st.divider()
    # Sidebar display (Option 1: Color blocks with hex)
    st.sidebar.header("Recommended Colors")

    for color_name, hex_code in colors.items():
        st.sidebar.color_picker(color_name, hex_code)
    
    st.subheader("Example")
    
    annotated_text(
    ("I", "Pronoun", "#F6DCDD"),
    " ",
    "really",
    " ",
    ("appreciate", "Verb", "#DAF1E7"),
    " ",
    ("all", "Pronoun", "#F6DCDD"),
    " ",
    ("that", "Pronoun", "#F6DCDD"),
    " ",
    "the",
    " ",
    ("social", "Adj", "#BDE5FF"),
    " ",
    "committee",
    " ",
    "has",
    " ",
    ("done", "Verb", "#DAF1E7"),
    " ",
    "to",
    " ",
    ("keep", "Verb", "#DAF1E7"),
    " ",
    ("us", "Pronoun", "#F6DCDD"),
    " ",
    ("feeling", "Verb", "#DAF1E7"),
    " ",
    ("connected", "Adj", "#BDE5FF"),
    " ",
    ".",
    " ",
    "I",
    " ",
    "also",
    " ",
    "really",
    " ",
    ("value", "Verb", "#DAF1E7"),
    " ",
    ("our", "Pronoun", "#F6DCDD"),
    " ",
    "in",
    " ",
    "-person",
    " ",
    ("meetings", "Noun", "#D1DBE9"),
    " ",
    "and",
    " ",
    "the",
    " ",
    "social",
    " ",
    ("opportunities", "Noun", "#D1DBE9"),
    " ",
    ("built", "Verb", "#DAF1E7"),
    " ",
    "into",
    " ",
    "these",
    " ",
    "meetings",
    " ",
    ".",
)
    
    st.divider()
   
    st.subheader("Directions for Using the Text Annotation Tool")

    directions = """
    1. **Enter Your Text**:
    - Type the text you want to annotate in the text area provided.

    2. **Select Parts of Speech**:
    - Choose which parts of speech you want to include in the annotation by checking the corresponding boxes (e.g., Verbs, Adjectives, Nouns, Pronouns).

    3. **Submit Your Text**:
    - Click the "Submit Text" button to process your input. The app will automatically label and color the words based on the selected parts of speech.

    4. **Review the Annotations**:
    - The annotated text will be displayed, showing the parts of speech labels and colors applied to the words.

    5. **Adjust Annotations (Optional)**:
    - You can manually adjust the labels and colors for each word if needed. 

    6. **Generate Annotated Text**:
    - After reviewing and adjusting the annotations, click the "Generate Annotated Text" button.
    - The final annotated text will be displayed.

    7. **Take a Screenshot**:
    - To use the annotated text, take a screenshot of the displayed text.

    8. **Adjust Text Width** (Optional):
    - If you want to adjust the width of the sentences for a better screenshot, minimize or resize your browser window accordingly before taking the screenshot.
    """

    st.markdown(directions)
        
#------------------------------------------------------------------------
# Functions: Parts of Speech
#------------------------------------------------------------------------

# # Function to split text into words
# def split_text(text):
#     # Add a space before punctuation marks
#     for char in string.punctuation:
#         text = text.replace(char, f" {char}")
#     return text.split()

# # Function to automatically label and color words based on parts of speech
# def auto_label_and_color_words(doc, words):
#     labels = [""] * len(words)
#     colors = ["#FFFFFF"] * len(words)
#     word_positions = {i: word for i, word in enumerate(words)}
    
#     for token in doc:
#         # Match token with the words from the original text
#         for index, word in word_positions.items():
#             if token.text == word:
#                 if token.pos_ == "VERB":
#                     labels[index] = "Verb"
#                     colors[index] = "#DAF1E7"
#                 elif token.pos_ == "ADJ":
#                     labels[index] = "Adj"
#                     colors[index] = "#BDE5FF"
#                 elif token.pos_ == "NOUN":
#                     labels[index] = "Noun"
#                     colors[index] = "#D1DBE9"
#                 elif token.pos_ == "PRON":
#                     labels[index] = "Pronoun"
#                     colors[index] = "#F6DCDD"
#                 break  # Exit loop once the word is found and processed
#     return labels, colors

# # Main Streamlit application
# st.title("Text Annotation Tool")

# # Initialize session state to store text and annotations
# if 'user_text' not in st.session_state:
#     st.session_state.user_text = ""
# if 'words' not in st.session_state:
#     st.session_state.words = []
# if 'labels' not in st.session_state:
#     st.session_state.labels = []
# if 'colors' not in st.session_state:
#     st.session_state.colors = []
# if 'extracted_pos' not in st.session_state:
#     st.session_state.extracted_pos = {}

# # User input for the text
# user_text = st.text_area("Enter the text you want to annotate:", value=st.session_state.user_text, height=100)

# # Button to process the text
# if st.button("Submit Text"):
#     st.session_state.user_text = user_text
#     st.session_state.words = split_text(user_text)
    
#     # Process the text with spaCy
#     doc = nlp(user_text)
    
#     # Automatically label and color words based on parts of speech
#     st.session_state.labels, st.session_state.colors = auto_label_and_color_words(doc, st.session_state.words)

#     # Extract parts of speech
#     st.session_state.extracted_pos = {
#         "verbs": [token.text for token in doc if token.pos_ == "VERB"],
#         "adjectives": [token.text for token in doc if token.pos_ == "ADJ"],
#         "nouns": [token.text for token in doc if token.pos_ == "NOUN"],
#         "pronouns": [token.text for token in doc if token.pos_ == "PRON"]
#     }

# # Display extracted parts of speech
# if st.session_state.extracted_pos:
#     st.subheader("Extracted Parts of Speech")
#     st.write("**Verbs:**", st.session_state.extracted_pos.get("verbs", []))
#     st.write("**Adjectives:**", st.session_state.extracted_pos.get("adjectives", []))
#     st.write("**Nouns:**", st.session_state.extracted_pos.get("nouns", []))
#     st.write("**Pronouns:**", st.session_state.extracted_pos.get("pronouns", []))

# # Collect annotation inputs for each word
# if st.session_state.words:
#     for i, word in enumerate(st.session_state.words):
#         st.write(f"Annotate the word: {word}")
#         st.session_state.labels[i] = st.selectbox(
#             f"Label for '{word}'", ["", "Verb", "Adj", "Noun", "Pronoun"], 
#             key=f"label_{i}", index=["", "Verb", "Adj", "Noun", "Pronoun"].index(st.session_state.labels[i])
#         )
#         st.session_state.colors[i] = st.color_picker(
#             f"Color for '{word}'", 
#             value=st.session_state.colors[i], 
#             key=f"color_{i}"
#         )

#     # Generate button to process the annotations
#     if st.button("Generate Annotated Text"):
#         annotated_elements = []
#         for i, word in enumerate(st.session_state.words):
#             if st.session_state.labels[i] and st.session_state.colors[i] != "#FFFFFF":
#                 annotated_elements.append((word, st.session_state.labels[i], st.session_state.colors[i]))
#             else:
#                 annotated_elements.append(word)
#             annotated_elements.append(" ")  # Add space between words

#         # Remove the last extra space added
#         if annotated_elements and annotated_elements[-1] == " ":
#             annotated_elements.pop()

#         # Display the annotated text using the `annotated_text` function
#         st.subheader("Annotated Text:")
#         annotated_text(*annotated_elements)

#         # Print the code for the annotated text
#         st.subheader("Generated Code:")
#         code_str = 'annotated_text(\n'
#         for elem in annotated_elements:
#             if isinstance(elem, tuple):
#                 code_str += f'    ("{elem[0]}", "{elem[1]}", "{elem[2]}"),\n'
#             else:
#                 code_str += f'    "{elem}",\n'
#         code_str += ')'
#         st.code(code_str, language='python')


#------------------------------------------------------------------------
# Functions: Parts of Speech + Buttons
#------------------------------------------------------------------------

# Function to split text into words
def split_text(text):
    # Add a space before punctuation marks
    for char in string.punctuation:
        text = text.replace(char, f" {char}")
    return text.split()

# Function to automatically label and color words based on parts of speech
def auto_label_and_color_words(doc, words, include_verbs, include_adjectives, include_nouns, include_pronouns):
    labels = [""] * len(words)
    colors = ["#FFFFFF"] * len(words)
    word_positions = {i: word for i, word in enumerate(words)}
    
    for token in doc:
        # Match token with the words from the original text
        for index, word in word_positions.items():
            if token.text == word:
                if token.pos_ == "VERB" and include_verbs:
                    labels[index] = "Verb"
                    colors[index] = "#DAF1E7"
                elif token.pos_ == "ADJ" and include_adjectives:
                    labels[index] = "Adj"
                    colors[index] = "#BDE5FF"
                elif token.pos_ == "NOUN" and include_nouns:
                    labels[index] = "Noun"
                    colors[index] = "#D1DBE9"
                elif token.pos_ == "PRON" and include_pronouns:
                    labels[index] = "Pronoun"
                    colors[index] = "#F6DCDD"
                break  # Exit loop once the word is found and processed
    return labels, colors

# Initialize session state to store text and annotations
if 'user_text' not in st.session_state:
    st.session_state.user_text = ""
if 'words' not in st.session_state:
    st.session_state.words = []
if 'labels' not in st.session_state:
    st.session_state.labels = []
if 'colors' not in st.session_state:
    st.session_state.colors = []
if 'extracted_pos' not in st.session_state:
    st.session_state.extracted_pos = {}

# User input for the text
user_text = st.text_area("Enter the text you want to annotate:", value=st.session_state.user_text, height=100)

# Checkboxes for parts of speech to include
include_verbs = st.checkbox("Include Verbs", value=True)
include_adjectives = st.checkbox("Include Adjectives", value=True)
include_nouns = st.checkbox("Include Nouns", value=True)
include_pronouns = st.checkbox("Include Pronouns", value=True)

# Button to process the text
if st.button("Submit Text"):
    st.session_state.user_text = user_text
    st.session_state.words = split_text(user_text)
    
    # Process the text with spaCy
    doc = nlp(user_text)
    
    # Automatically label and color words based on parts of speech
    st.session_state.labels, st.session_state.colors = auto_label_and_color_words(
        doc, st.session_state.words, include_verbs, include_adjectives, include_nouns, include_pronouns)

    # Extract parts of speech
    st.session_state.extracted_pos = {
        "verbs": [token.text for token in doc if token.pos_ == "VERB"],
        "adjectives": [token.text for token in doc if token.pos_ == "ADJ"],
        "nouns": [token.text for token in doc if token.pos_ == "NOUN"],
        "pronouns": [token.text for token in doc if token.pos_ == "PRON"]
    }

# Display extracted parts of speech
if st.session_state.extracted_pos:
    st.subheader("Extracted Parts of Speech")
    st.write("**Verbs:**", st.session_state.extracted_pos.get("verbs", []))
    st.write("**Adjectives:**", st.session_state.extracted_pos.get("adjectives", []))
    st.write("**Nouns:**", st.session_state.extracted_pos.get("nouns", []))
    st.write("**Pronouns:**", st.session_state.extracted_pos.get("pronouns", []))

# Collect annotation inputs for each word
if st.session_state.words:
    for i, word in enumerate(st.session_state.words):
        st.write(f"Annotate the word: {word}")
        st.session_state.labels[i] = st.selectbox(
            f"Label for '{word}'", ["", "Verb", "Adj", "Noun", "Pronoun"], 
            key=f"label_{i}", index=["", "Verb", "Adj", "Noun", "Pronoun"].index(st.session_state.labels[i])
        )
        st.session_state.colors[i] = st.color_picker(
            f"Color for '{word}'", 
            value=st.session_state.colors[i], 
            key=f"color_{i}"
        )

    # Generate button to process the annotations
    if st.button("Generate Annotated Text", type="primary"):
        annotated_elements = []
        for i, word in enumerate(st.session_state.words):
            if st.session_state.labels[i] and st.session_state.colors[i] != "#FFFFFF":
                annotated_elements.append((word, st.session_state.labels[i], st.session_state.colors[i]))
            else:
                annotated_elements.append(word)
            annotated_elements.append(" ")  # Add space between words

        # Remove the last extra space added
        if annotated_elements and annotated_elements[-1] == " ":
            annotated_elements.pop()

        # Display the annotated text using the `annotated_text` function
        st.subheader("Annotated Text:")
        annotated_text(*annotated_elements)

        # Print the code for the annotated text
        st.subheader("Generated Code:")
        code_str = 'annotated_text(\n'
        for elem in annotated_elements:
            if isinstance(elem, tuple):
                code_str += f'    ("{elem[0]}", "{elem[1]}", "{elem[2]}"),\n'
            else:
                code_str += f'    "{elem}",\n'
        code_str += ')'
        st.code(code_str, language='python')