Spaces:

ProfessorLeVesseur
/

Kaleidoscope

Sleeping

App Files Files Community

ProfessorLeVesseur commited on May 22, 2024

Commit

6c9d6da

verified ·

1 Parent(s): 7ab40c0

Upload 01_Parts of Speech Annotation.py

Browse files

Files changed (1) hide show

pages/01_Parts of Speech Annotation.py +420 -0

pages/01_Parts of Speech Annotation.py ADDED Viewed

	@@ -0,0 +1,420 @@

+#------------------------------------------------------------------------
+# Import Modules
+#------------------------------------------------------------------------
+import streamlit as st
+import spacy
+import string
+from annotated_text import annotated_text
+from PIL import Image
+# Load the English NLP model
+nlp = spacy.load("en_core_web_sm")
+#------------------------------------------------------------------------
+# Configurations
+#------------------------------------------------------------------------
+# Streamlit page setup
+# icon = Image.open("MTSS.ai_Icon.png")
+icon = Image.open("/Users/cheynelevesseur/Desktop/Python_Code/LLM_Projects/LLM_Prxmpting/MTSS.ai_Icon.png")
+st.set_page_config(
+    page_title="Kaleidoscope | Text Annotation",
+    page_icon=icon,
+    layout="centered",
+    initial_sidebar_state="auto",
+    menu_items={
+        'About': "### *This application was created by*  \n### LeVesseur Ph.D | MTSS.ai"
+    }
+)
+#------------------------------------------------------------------------
+# Header
+#------------------------------------------------------------------------
+# st.image('MTSS.ai_Logo.png', width=300)
+st.title('MTSS:grey[.ai]')
+st.header('Kaleidoscope:grey[ | Parts of Speech Annotation]')
+#------------------------------------------------------------------------
+# Sidebar
+#------------------------------------------------------------------------
+contact = st.sidebar.toggle('Handmade by  \n**LeVesseur** :grey[ PhD]  \n| :grey[MTSS.ai]')
+if contact:
+    st.sidebar.write('Inquiries: [[email protected]](mailto:[email protected])  \nProfile: [levesseur.com](http://levesseur.com)  \nCheck out: [InkQA | Dynamic PDFs](http://www.inkqa.com)')
+# Color options
+colors = {
+    "Green (DAF1E7)": "#DAF1E7",
+    "Blue (BDE5FF)": "#BDE5FF",
+    "Navy (D1DBE9)": "#D1DBE9",
+    "Teal (D6EAED)": "#D6EAED",
+    "Iceburg (E4EEF6)": "#E4EEF6",
+    "Vermillion (F6DCDD)": "#F6DCDD",
+}
+with st.sidebar:
+    st.divider()
+    # Sidebar display (Option 1: Color blocks with hex)
+    st.sidebar.header("Recommended Colors")
+    for color_name, hex_code in colors.items():
+        st.sidebar.color_picker(color_name, hex_code)
+    st.subheader("Example")
+    annotated_text(
+    ("I", "Pronoun", "#F6DCDD"),
+    " ",
+    "really",
+    " ",
+    ("appreciate", "Verb", "#DAF1E7"),
+    " ",
+    ("all", "Pronoun", "#F6DCDD"),
+    " ",
+    ("that", "Pronoun", "#F6DCDD"),
+    " ",
+    "the",
+    " ",
+    ("social", "Adj", "#BDE5FF"),
+    " ",
+    "committee",
+    " ",
+    "has",
+    " ",
+    ("done", "Verb", "#DAF1E7"),
+    " ",
+    "to",
+    " ",
+    ("keep", "Verb", "#DAF1E7"),
+    " ",
+    ("us", "Pronoun", "#F6DCDD"),
+    " ",
+    ("feeling", "Verb", "#DAF1E7"),
+    " ",
+    ("connected", "Adj", "#BDE5FF"),
+    " ",
+    ".",
+    " ",
+    "I",
+    " ",
+    "also",
+    " ",
+    "really",
+    " ",
+    ("value", "Verb", "#DAF1E7"),
+    " ",
+    ("our", "Pronoun", "#F6DCDD"),
+    " ",
+    "in",
+    " ",
+    "-person",
+    " ",
+    ("meetings", "Noun", "#D1DBE9"),
+    " ",
+    "and",
+    " ",
+    "the",
+    " ",
+    "social",
+    " ",
+    ("opportunities", "Noun", "#D1DBE9"),
+    " ",
+    ("built", "Verb", "#DAF1E7"),
+    " ",
+    "into",
+    " ",
+    "these",
+    " ",
+    "meetings",
+    " ",
+    ".",
+)
+    st.divider()
+    st.subheader("Directions for Using the Text Annotation Tool")
+    directions = """
+    1. **Enter Your Text**:
+    - Type the text you want to annotate in the text area provided.
+    2. **Select Parts of Speech**:
+    - Choose which parts of speech you want to include in the annotation by checking the corresponding boxes (e.g., Verbs, Adjectives, Nouns, Pronouns).
+    3. **Submit Your Text**:
+    - Click the "Submit Text" button to process your input. The app will automatically label and color the words based on the selected parts of speech.
+    4. **Review the Annotations**:
+    - The annotated text will be displayed, showing the parts of speech labels and colors applied to the words.
+    5. **Adjust Annotations (Optional)**:
+    - You can manually adjust the labels and colors for each word if needed.
+    6. **Generate Annotated Text**:
+    - After reviewing and adjusting the annotations, click the "Generate Annotated Text" button.
+    - The final annotated text will be displayed.
+    7. **Take a Screenshot**:
+    - To use the annotated text, take a screenshot of the displayed text.
+    8. **Adjust Text Width** (Optional):
+    - If you want to adjust the width of the sentences for a better screenshot, minimize or resize your browser window accordingly before taking the screenshot.
+    """
+    st.markdown(directions)
+#------------------------------------------------------------------------
+# Functions: Parts of Speech
+#------------------------------------------------------------------------
+# # Function to split text into words
+# def split_text(text):
+#     # Add a space before punctuation marks
+#     for char in string.punctuation:
+#         text = text.replace(char, f" {char}")
+#     return text.split()
+# # Function to automatically label and color words based on parts of speech
+# def auto_label_and_color_words(doc, words):
+#     labels = [""] * len(words)
+#     colors = ["#FFFFFF"] * len(words)
+#     word_positions = {i: word for i, word in enumerate(words)}
+#     for token in doc:
+#         # Match token with the words from the original text
+#         for index, word in word_positions.items():
+#             if token.text == word:
+#                 if token.pos_ == "VERB":
+#                     labels[index] = "Verb"
+#                     colors[index] = "#DAF1E7"
+#                 elif token.pos_ == "ADJ":
+#                     labels[index] = "Adj"
+#                     colors[index] = "#BDE5FF"
+#                 elif token.pos_ == "NOUN":
+#                     labels[index] = "Noun"
+#                     colors[index] = "#D1DBE9"
+#                 elif token.pos_ == "PRON":
+#                     labels[index] = "Pronoun"
+#                     colors[index] = "#F6DCDD"
+#                 break  # Exit loop once the word is found and processed
+#     return labels, colors
+# # Main Streamlit application
+# st.title("Text Annotation Tool")
+# # Initialize session state to store text and annotations
+# if 'user_text' not in st.session_state:
+#     st.session_state.user_text = ""
+# if 'words' not in st.session_state:
+#     st.session_state.words = []
+# if 'labels' not in st.session_state:
+#     st.session_state.labels = []
+# if 'colors' not in st.session_state:
+#     st.session_state.colors = []
+# if 'extracted_pos' not in st.session_state:
+#     st.session_state.extracted_pos = {}
+# # User input for the text
+# user_text = st.text_area("Enter the text you want to annotate:", value=st.session_state.user_text, height=100)
+# # Button to process the text
+# if st.button("Submit Text"):
+#     st.session_state.user_text = user_text
+#     st.session_state.words = split_text(user_text)
+#     # Process the text with spaCy
+#     doc = nlp(user_text)
+#     # Automatically label and color words based on parts of speech
+#     st.session_state.labels, st.session_state.colors = auto_label_and_color_words(doc, st.session_state.words)
+#     # Extract parts of speech
+#     st.session_state.extracted_pos = {
+#         "verbs": [token.text for token in doc if token.pos_ == "VERB"],
+#         "adjectives": [token.text for token in doc if token.pos_ == "ADJ"],
+#         "nouns": [token.text for token in doc if token.pos_ == "NOUN"],
+#         "pronouns": [token.text for token in doc if token.pos_ == "PRON"]
+#     }
+# # Display extracted parts of speech
+# if st.session_state.extracted_pos:
+#     st.subheader("Extracted Parts of Speech")
+#     st.write("**Verbs:**", st.session_state.extracted_pos.get("verbs", []))
+#     st.write("**Adjectives:**", st.session_state.extracted_pos.get("adjectives", []))
+#     st.write("**Nouns:**", st.session_state.extracted_pos.get("nouns", []))
+#     st.write("**Pronouns:**", st.session_state.extracted_pos.get("pronouns", []))
+# # Collect annotation inputs for each word
+# if st.session_state.words:
+#     for i, word in enumerate(st.session_state.words):
+#         st.write(f"Annotate the word: {word}")
+#         st.session_state.labels[i] = st.selectbox(
+#             f"Label for '{word}'", ["", "Verb", "Adj", "Noun", "Pronoun"],
+#             key=f"label_{i}", index=["", "Verb", "Adj", "Noun", "Pronoun"].index(st.session_state.labels[i])
+#         )
+#         st.session_state.colors[i] = st.color_picker(
+#             f"Color for '{word}'",
+#             value=st.session_state.colors[i],
+#             key=f"color_{i}"
+#         )
+#     # Generate button to process the annotations
+#     if st.button("Generate Annotated Text"):
+#         annotated_elements = []
+#         for i, word in enumerate(st.session_state.words):
+#             if st.session_state.labels[i] and st.session_state.colors[i] != "#FFFFFF":
+#                 annotated_elements.append((word, st.session_state.labels[i], st.session_state.colors[i]))
+#             else:
+#                 annotated_elements.append(word)
+#             annotated_elements.append(" ")  # Add space between words
+#         # Remove the last extra space added
+#         if annotated_elements and annotated_elements[-1] == " ":
+#             annotated_elements.pop()
+#         # Display the annotated text using the `annotated_text` function
+#         st.subheader("Annotated Text:")
+#         annotated_text(*annotated_elements)
+#         # Print the code for the annotated text
+#         st.subheader("Generated Code:")
+#         code_str = 'annotated_text(\n'
+#         for elem in annotated_elements:
+#             if isinstance(elem, tuple):
+#                 code_str += f'    ("{elem[0]}", "{elem[1]}", "{elem[2]}"),\n'
+#             else:
+#                 code_str += f'    "{elem}",\n'
+#         code_str += ')'
+#         st.code(code_str, language='python')
+#------------------------------------------------------------------------
+# Functions: Parts of Speech + Buttons
+#------------------------------------------------------------------------
+# Function to split text into words
+def split_text(text):
+    # Add a space before punctuation marks
+    for char in string.punctuation:
+        text = text.replace(char, f" {char}")
+    return text.split()
+# Function to automatically label and color words based on parts of speech
+def auto_label_and_color_words(doc, words, include_verbs, include_adjectives, include_nouns, include_pronouns):
+    labels = [""] * len(words)
+    colors = ["#FFFFFF"] * len(words)
+    word_positions = {i: word for i, word in enumerate(words)}
+    for token in doc:
+        # Match token with the words from the original text
+        for index, word in word_positions.items():
+            if token.text == word:
+                if token.pos_ == "VERB" and include_verbs:
+                    labels[index] = "Verb"
+                    colors[index] = "#DAF1E7"
+                elif token.pos_ == "ADJ" and include_adjectives:
+                    labels[index] = "Adj"
+                    colors[index] = "#BDE5FF"
+                elif token.pos_ == "NOUN" and include_nouns:
+                    labels[index] = "Noun"
+                    colors[index] = "#D1DBE9"
+                elif token.pos_ == "PRON" and include_pronouns:
+                    labels[index] = "Pronoun"
+                    colors[index] = "#F6DCDD"
+                break  # Exit loop once the word is found and processed
+    return labels, colors
+# Initialize session state to store text and annotations
+if 'user_text' not in st.session_state:
+    st.session_state.user_text = ""
+if 'words' not in st.session_state:
+    st.session_state.words = []
+if 'labels' not in st.session_state:
+    st.session_state.labels = []
+if 'colors' not in st.session_state:
+    st.session_state.colors = []
+if 'extracted_pos' not in st.session_state:
+    st.session_state.extracted_pos = {}
+# User input for the text
+user_text = st.text_area("Enter the text you want to annotate:", value=st.session_state.user_text, height=100)
+# Checkboxes for parts of speech to include
+include_verbs = st.checkbox("Include Verbs", value=True)
+include_adjectives = st.checkbox("Include Adjectives", value=True)
+include_nouns = st.checkbox("Include Nouns", value=True)
+include_pronouns = st.checkbox("Include Pronouns", value=True)
+# Button to process the text
+if st.button("Submit Text"):
+    st.session_state.user_text = user_text
+    st.session_state.words = split_text(user_text)
+    # Process the text with spaCy
+    doc = nlp(user_text)
+    # Automatically label and color words based on parts of speech
+    st.session_state.labels, st.session_state.colors = auto_label_and_color_words(
+        doc, st.session_state.words, include_verbs, include_adjectives, include_nouns, include_pronouns)
+    # Extract parts of speech
+    st.session_state.extracted_pos = {
+        "verbs": [token.text for token in doc if token.pos_ == "VERB"],
+        "adjectives": [token.text for token in doc if token.pos_ == "ADJ"],
+        "nouns": [token.text for token in doc if token.pos_ == "NOUN"],
+        "pronouns": [token.text for token in doc if token.pos_ == "PRON"]
+    }
+# Display extracted parts of speech
+if st.session_state.extracted_pos:
+    st.subheader("Extracted Parts of Speech")
+    st.write("**Verbs:**", st.session_state.extracted_pos.get("verbs", []))
+    st.write("**Adjectives:**", st.session_state.extracted_pos.get("adjectives", []))
+    st.write("**Nouns:**", st.session_state.extracted_pos.get("nouns", []))
+    st.write("**Pronouns:**", st.session_state.extracted_pos.get("pronouns", []))
+# Collect annotation inputs for each word
+if st.session_state.words:
+    for i, word in enumerate(st.session_state.words):
+        st.write(f"Annotate the word: {word}")
+        st.session_state.labels[i] = st.selectbox(
+            f"Label for '{word}'", ["", "Verb", "Adj", "Noun", "Pronoun"],
+            key=f"label_{i}", index=["", "Verb", "Adj", "Noun", "Pronoun"].index(st.session_state.labels[i])
+        )
+        st.session_state.colors[i] = st.color_picker(
+            f"Color for '{word}'",
+            value=st.session_state.colors[i],
+            key=f"color_{i}"
+        )
+    # Generate button to process the annotations
+    if st.button("Generate Annotated Text", type="primary"):
+        annotated_elements = []
+        for i, word in enumerate(st.session_state.words):
+            if st.session_state.labels[i] and st.session_state.colors[i] != "#FFFFFF":
+                annotated_elements.append((word, st.session_state.labels[i], st.session_state.colors[i]))
+            else:
+                annotated_elements.append(word)
+            annotated_elements.append(" ")  # Add space between words
+        # Remove the last extra space added
+        if annotated_elements and annotated_elements[-1] == " ":
+            annotated_elements.pop()
+        # Display the annotated text using the `annotated_text` function
+        st.subheader("Annotated Text:")
+        annotated_text(*annotated_elements)
+        # Print the code for the annotated text
+        st.subheader("Generated Code:")
+        code_str = 'annotated_text(\n'
+        for elem in annotated_elements:
+            if isinstance(elem, tuple):
+                code_str += f'    ("{elem[0]}", "{elem[1]}", "{elem[2]}"),\n'
+            else:
+                code_str += f'    "{elem}",\n'
+        code_str += ')'
+        st.code(code_str, language='python')