import os from git import Repo import streamlit as st import time from PIL import Image import base64 from transformers import pipeline import spacy import numpy as np from sentence_transformers import SentenceTransformer from matplotlib import colormaps from matplotlib.colors import ListedColormap GITHUB_PAT = os.environ['GITHUB'] SENTIMENT = os.environ['SENTIMENT'] EMBEDDING = os.environ['EMBEDDING'] if not os.path.exists('repo_directory'): try: Repo.clone_from(f'https://marcus-t-s:{GITHUB_PAT}@github.com/marcus-t-s/yt-comment-analyser.git', 'repo_directory' ) except: st.error("Error: Oops there's an issue on our end, please wait a moment and try again.") st.stop() from repo_directory.utils.chart_utils import * from repo_directory.youtube_comment_class import * # Streamlit configuration st.set_page_config( page_title="ViewerVoice | YouTube Comment Analyser", layout="wide", page_icon=Image.open('images/page_icon.png') ) # Define and load cached resources @st.cache_resource def load_models(): sentiment_pipeline = pipeline("sentiment-analysis", model=r"cardiffnlp/twitter-roberta-base-sentiment") embedding_model = SentenceTransformer('flax-sentence-embeddings/all_datasets_v4_MiniLM-L6') spacy_nlp = spacy.load("en_core_web_sm") add_custom_stopwords(spacy_nlp, {"bring", "know", "come"}) return sentiment_pipeline, embedding_model, spacy_nlp @st.cache_resource def load_colors_image(): mask = np.array(Image.open('images/youtube_icon.jpg')) Reds = colormaps['Reds'] colors = ListedColormap(Reds(np.linspace(0.4, 0.8, 256))) with open("images/viewervoice_logo_crop.png", "rb") as img_file: logo_image = base64.b64encode(img_file.read()).decode("utf-8") return mask, colors, logo_image sentiment_pipeline, embedding_model, spacy_nlp = load_models() mask, colors, logo_image = load_colors_image() # Hide line at the top and "made with streamlit" text hide_decoration_bar_style = """ """ st.markdown(hide_decoration_bar_style, unsafe_allow_html=True) if 'YouTubeParser' not in st.session_state: st.session_state['YouTubeParser'] = YoutubeCommentParser() if 'comment_fig' not in st.session_state: st.session_state["comment_fig"] = None st.session_state["wordcloud_fig"] = None st.session_state["topic_fig"] = None st.session_state["sentiment_fig"] = None if 'rerun_button' not in st.session_state: st.session_state['rerun_button'] = "INIT" if 'topic_filter' not in st.session_state: st.session_state['topic_filter'] = False if 'sentiment_filter' not in st.session_state: st.session_state['sentiment_filter'] = False if 'filter_state' not in st.session_state: st.session_state['filter_state'] = "INIT" if 'video_link' not in st.session_state: st.session_state["video_link"] = None if 'num_comments' not in st.session_state: st.session_state['num_comments'] = None # Set reference to YouTubeParser object for more concise code yt_parser = st.session_state['YouTubeParser'] main_page = st.container() def query_comments_button(): # Delete larger objects from session state to later replace del st.session_state["comment_fig"] del st.session_state["wordcloud_fig"] del st.session_state["topic_fig"] del st.session_state["sentiment_fig"] del st.session_state["YouTubeParser"] # Reset session state variables back to placeholder values st.session_state.rerun_button = "QUERYING" st.session_state['filter_state'] = "INIT" st.session_state["topic_filter"] = False st.session_state["sentiment_filter"] = False st.session_state["semantic_filter"] = False st.session_state["figures_built"] = False st.session_state["comment_fig"] = None st.session_state["wordcloud_fig"] = None st.session_state["topic_fig"] = None st.session_state["sentiment_fig"] = None st.session_state["YouTubeParser"] = YoutubeCommentParser() def filter_visuals_button(): st.session_state["filter_state"] = "FILTERING" with st.sidebar: st.session_state["video_link"] = st.text_input('YouTube Video URL', value="") st.session_state["max_comments"] = st.slider(label="Maximum number of comments to query", min_value=100, max_value=2000, step=100) st.session_state["max_topics"] = st.slider(label="Maximum number of topics", min_value=5, max_value=20, step=1) st.button('Query comments :left_speech_bubble:', on_click=query_comments_button) with main_page: # Reduce space at the top reduce_header_height_style = """ """ st.markdown(reduce_header_height_style, unsafe_allow_html=True) # Title and intro section markdown_content = f"""
""" st.markdown(markdown_content, unsafe_allow_html=True) # LinkedIn links lnk = '' st.markdown(lnk + """

Made by  Afiba Annor  Marcus Singh

""", unsafe_allow_html=True) st.markdown("
", unsafe_allow_html=True) # Notes section st.markdown("

📝 Notes

", unsafe_allow_html=True) html_content = """
""" # Display the HTML content using st.markdown() st.markdown(html_content, unsafe_allow_html=True) # Query comments section if (st.session_state.rerun_button == "QUERYING") and (st.session_state["video_link"] is not None): with st.spinner('Querying comments and running models'): yt_parser = st.session_state["YouTubeParser"] try: yt_parser.get_comments(st.session_state['video_link'], st.session_state['max_comments']) yt_parser.get_video_title(st.session_state['video_link']) except: st.error("Error: Unable to query comments, incorrect YouTube URL.") st.stop() # Run formatting and models try: yt_parser.format_comments() yt_parser.clean_comments() yt_parser.run_sentiment_pipeline(sentiment_pipeline) yt_parser.run_topic_modelling_pipeline(embedding_model, nlp=spacy_nlp, max_topics=st.session_state['max_topics']) except ValueError: st.error("Error: Oops there are not enough comments to analyse, please try a different video.") st.stop() except: st.error("Error: Oops there's an issue on our end, please wait a moment and try again.") st.stop() # Set "QUERY COMPLETE" to bypass running this section on script re-run st.session_state.rerun_button = "QUERY COMPLETE" # Once comments are queried, build charts ready to visualise if st.session_state.rerun_button == "QUERY COMPLETE": # Check for built figures: if (not st.session_state["figures_built"]) or (st.session_state.filter_state == "FILTERING"): # Select colors for wordcloud # If filtering button pressed if st.session_state.filter_state == "FILTERING": df_filtered = yt_parser.df_comments.copy() if st.session_state["topic_filter"]: df_filtered = df_filtered.query(f"Topic == {st.session_state.topic_filter}") if st.session_state["sentiment_filter"]: df_filtered = df_filtered.query(f"Sentiment == {st.session_state.sentiment_filter}") if st.session_state["semantic_filter"]: df_filtered = semantic_search(df=df_filtered, query=st.session_state["semantic_filter"], embedding_model=embedding_model, text_col='Comment_Clean') if len(df_filtered) == 0: st.session_state['num_comments'] = 0 else: st.session_state['num_comments'] = len(df_filtered) # Build filtered table figure st.session_state["table_fig"] = comments_table(df_filtered, ['publishedAt', 'Comment_Formatted', 'Likes', 'Sentiment', 'Topic'], {'publishedAt': 'Date', 'Comment_Formatted': 'Comment'}) # Build filtered wordcloud figure st.session_state["wordcloud_fig"] = comment_wordcloud(df_filtered, mask, colors) # Build filtered topic figure st.session_state["topic_fig"] = topic_treemap(df_filtered, "Topic") # Build filtered sentiment figure st.session_state["sentiment_fig"] = sentiment_chart(df_filtered, "Sentiment") st.session_state["figures_built"] = True st.session_state.filter_state = "FILTERED" # No filtering selected else: st.session_state['num_comments'] = len(yt_parser.df_comments) # Can only build graphs if we have comments if st.session_state['num_comments'] > 0: try: # Build unfiltered table figure st.session_state["table_fig"] = comments_table(yt_parser.df_comments, ['publishedAt', 'Comment_Formatted', 'Likes', 'Sentiment', 'Topic'], {'publishedAt': 'Date', 'Comment_Formatted': 'Comment'}) # Build unfiltered wordcloud figure st.session_state["wordcloud_fig"] = comment_wordcloud(yt_parser.df_comments, mask, colors) # Build unfiltered topic figure st.session_state["topic_fig"] = topic_treemap(yt_parser.df_comments, "Topic") # Build unfiltered sentiment figure st.session_state["sentiment_fig"] = sentiment_chart(yt_parser.df_comments, "Sentiment") st.session_state["figures_built"] = True except: st.error("Error: Oops there's an issue on our end, please wait a moment and try again.") st.stop() with main_page: if st.session_state.rerun_button == "QUERY COMPLETE": st.subheader(f"{yt_parser.title}") st.markdown("

", unsafe_allow_html=True) if st.session_state['num_comments'] > 0: table_col, word_cloud_col = st.columns([0.55, 0.45]) with table_col: st.markdown(f"""

Comments

""", unsafe_allow_html=True) st.plotly_chart(st.session_state["table_fig"], use_container_width=True) with word_cloud_col: st.markdown(f"""

Word Cloud

""", unsafe_allow_html=True) st.pyplot(st.session_state["wordcloud_fig"], use_container_width=True) treemap_col, sentiment_donut_col = st.columns([0.55, 0.45]) with treemap_col: st.markdown(f"""

Topic Proportions

""", unsafe_allow_html=True) st.plotly_chart(st.session_state["topic_fig"], use_container_width=True) with sentiment_donut_col: st.markdown(f"""

Sentiment Distribution

""", unsafe_allow_html=True) st.plotly_chart(st.session_state["sentiment_fig"], use_container_width=True) # st.table(yt_parser.df_comments.head()) else: st.write("Unfortunately we couldn't find any comments for this set of filters, please try " "editing the filters and try again") with st.sidebar: # Define the HTML and CSS for the button-style container if st.session_state['num_comments'] is not None: num_comments = st.session_state['num_comments'] else: num_comments = 0 htmlstr = f"""

 {num_comments}

""" # Display the button-style container with number of comments st.subheader("Number of comments") st.markdown(htmlstr, unsafe_allow_html=True) # Filters section st.subheader("Filters") if yt_parser.df_comments is not None: st.session_state["topic_filter"] = st.multiselect("Topic", options=sorted(list(yt_parser.df_comments['Topic'].unique()))) st.session_state["sentiment_filter"] = st.multiselect("Sentiment", options=list(yt_parser.df_comments['Sentiment'].unique())) st.session_state["semantic_filter"] = st.text_input("Keyword search", max_chars=30) st.button('Filter visualisations :sleuth_or_spy:', on_click=filter_visuals_button) else: st.multiselect("Topic", options=["Please query comments from a video"], disabled=True) st.multiselect("Sentiment", options=["Please query comments from a video"], disabled=True) st.text_input("Keyword search", disabled=True) st.button('Please query comments before filtering', disabled=True)