aannor commited on
Commit
b64e04f
1 Parent(s): ef2ba90

updated methodology

Browse files
Files changed (1) hide show
  1. app.py +16 -37
app.py CHANGED
@@ -31,15 +31,15 @@ from repo_directory.youtube_comment_class import *
31
  st.set_page_config(
32
  page_title="ViewerVoice | YouTube Comment Analyser",
33
  layout="wide",
34
- page_icon=Image.open('page_icon.png')
35
  )
36
 
37
 
38
  # Define and load cached resources
39
  @st.cache_resource
40
  def load_models():
41
- sentiment_pipeline = pipeline("sentiment-analysis", model=f'{SENTIMENT}')
42
- embedding_model = SentenceTransformer(f'{EMBEDDING}')
43
  spacy_nlp = spacy.load("en_core_web_sm")
44
  add_custom_stopwords(spacy_nlp, {"bring", "know", "come"})
45
  return sentiment_pipeline, embedding_model, spacy_nlp
@@ -47,10 +47,10 @@ def load_models():
47
 
48
  @st.cache_resource
49
  def load_colors_image():
50
- mask = np.array(Image.open('youtube_icon.jpg'))
51
  Reds = colormaps['Reds']
52
  colors = ListedColormap(Reds(np.linspace(0.4, 0.8, 256)))
53
- with open("viewervoice_logo_crop.png", "rb") as img_file:
54
  logo_image = base64.b64encode(img_file.read()).decode("utf-8")
55
  return mask, colors, logo_image
56
 
@@ -68,7 +68,6 @@ hide_decoration_bar_style = """
68
  """
69
  st.markdown(hide_decoration_bar_style, unsafe_allow_html=True)
70
 
71
- main_page = st.container()
72
 
73
  if 'YouTubeParser' not in st.session_state:
74
  st.session_state['YouTubeParser'] = YoutubeCommentParser()
@@ -93,6 +92,7 @@ if 'num_comments' not in st.session_state:
93
  # Set reference to YouTubeParser object for more concise code
94
  yt_parser = st.session_state['YouTubeParser']
95
 
 
96
 
97
  def query_comments_button():
98
  # Delete larger objects from session state to later replace
@@ -121,7 +121,6 @@ def filter_visuals_button():
121
 
122
 
123
  with st.sidebar:
124
- st.session_state["api_key"] = st.text_input('YouTube API key', value="", type='password')
125
  st.session_state["video_link"] = st.text_input('YouTube Video URL', value="")
126
  st.session_state["max_comments"] = st.slider(label="Maximum number of comments to query",
127
  min_value=100,
@@ -176,12 +175,7 @@ with main_page:
176
  <ul>
177
  <li style='font-size: 0.95rem;'>This dashboard is still under development; further updates will be implemented
178
  in due course.</li>
179
- <li style='font-size: 0.95rem;'>Kindly refer to the instructions provided in this
180
- <a href='https://medium.com/@afibannor/beyond-the-views-how-viewervoice-enriches-content-performance-analytics-3c46854db697?source=friends_link&sk=042267d3c0ed460c3ad0743ec4e16456'>link</a>.
181
- This will guide you in acquiring your API key to retrieve comments.</li>
182
- <li style='font-size: 0.95rem;'>Please be aware that each API key facilitates up to 10,000 API calls within
183
- a 24-hour period.</li>
184
- <li style='font-size: 0.95rem;'>Currently, the dashboard caters to comments in English and does not
185
  include comment replies.</li>
186
  <li style='font-size: 0.95rem;'>Comments undergo cleaning and pre-processing to optimise modelling. As a result,
187
  the returned comment count may fall short of the maximum queried amount.</li>
@@ -202,35 +196,20 @@ if (st.session_state.rerun_button == "QUERYING") and (st.session_state["video_li
202
  with st.spinner('Querying comments and running models'):
203
  yt_parser = st.session_state["YouTubeParser"]
204
  try:
205
- yt_parser.build_youtube_api(st.session_state['api_key'])
206
- except:
207
- st.error("Error: Unable to query comments, please check your API key")
208
- st.stop()
209
- try:
210
- yt_parser.query_comments(st.session_state['video_link'], st.session_state['max_comments'])
211
- except googleapiclient.errors.HttpError:
212
- st.error("Error: Unable to query comments, incorrect YouTube URL or API key.")
213
- st.stop()
214
  except:
215
  st.error("Error: Unable to query comments, incorrect YouTube URL or maximum \
216
  API call limit reached.")
217
  st.stop()
218
 
219
  # Run formatting and models
220
- try:
221
- yt_parser.format_comments()
222
- yt_parser.clean_comments()
223
- yt_parser.run_sentiment_pipeline(sentiment_pipeline)
224
- yt_parser.run_topic_modelling_pipeline(embedding_model,
225
- nlp=spacy_nlp,
226
- max_topics=st.session_state['max_topics'])
227
- except ValueError:
228
- st.error("Error: Oops there are not enough comments to analyse, please try a different video.")
229
- st.stop()
230
- except:
231
- st.error("Error: Oops there's an issue on our end, please wait a moment and try again.")
232
- st.stop()
233
-
234
  # Set "QUERY COMPLETE" to bypass running this section on script re-run
235
  st.session_state.rerun_button = "QUERY COMPLETE"
236
 
@@ -389,4 +368,4 @@ with st.sidebar:
389
  st.text_input("Keyword search",
390
  disabled=True)
391
  st.button('Please query comments before filtering',
392
- disabled=True)
 
31
  st.set_page_config(
32
  page_title="ViewerVoice | YouTube Comment Analyser",
33
  layout="wide",
34
+ page_icon=Image.open('images/page_icon.png')
35
  )
36
 
37
 
38
  # Define and load cached resources
39
  @st.cache_resource
40
  def load_models():
41
+ sentiment_pipeline = pipeline("sentiment-analysis", model=r"cardiffnlp/twitter-roberta-base-sentiment")
42
+ embedding_model = SentenceTransformer('flax-sentence-embeddings/all_datasets_v4_MiniLM-L6')
43
  spacy_nlp = spacy.load("en_core_web_sm")
44
  add_custom_stopwords(spacy_nlp, {"bring", "know", "come"})
45
  return sentiment_pipeline, embedding_model, spacy_nlp
 
47
 
48
  @st.cache_resource
49
  def load_colors_image():
50
+ mask = np.array(Image.open('images/youtube_icon.jpg'))
51
  Reds = colormaps['Reds']
52
  colors = ListedColormap(Reds(np.linspace(0.4, 0.8, 256)))
53
+ with open("images/viewervoice_logo_crop.png", "rb") as img_file:
54
  logo_image = base64.b64encode(img_file.read()).decode("utf-8")
55
  return mask, colors, logo_image
56
 
 
68
  """
69
  st.markdown(hide_decoration_bar_style, unsafe_allow_html=True)
70
 
 
71
 
72
  if 'YouTubeParser' not in st.session_state:
73
  st.session_state['YouTubeParser'] = YoutubeCommentParser()
 
92
  # Set reference to YouTubeParser object for more concise code
93
  yt_parser = st.session_state['YouTubeParser']
94
 
95
+ main_page = st.container()
96
 
97
  def query_comments_button():
98
  # Delete larger objects from session state to later replace
 
121
 
122
 
123
  with st.sidebar:
 
124
  st.session_state["video_link"] = st.text_input('YouTube Video URL', value="")
125
  st.session_state["max_comments"] = st.slider(label="Maximum number of comments to query",
126
  min_value=100,
 
175
  <ul>
176
  <li style='font-size: 0.95rem;'>This dashboard is still under development; further updates will be implemented
177
  in due course.</li>
178
+ <li style='font-size: 0.95rem;'>Currently, the dashboard exclusively caters to comments in English and does not
 
 
 
 
 
179
  include comment replies.</li>
180
  <li style='font-size: 0.95rem;'>Comments undergo cleaning and pre-processing to optimise modelling. As a result,
181
  the returned comment count may fall short of the maximum queried amount.</li>
 
196
  with st.spinner('Querying comments and running models'):
197
  yt_parser = st.session_state["YouTubeParser"]
198
  try:
199
+ yt_parser.scrape_comments(st.session_state['video_link'])
200
+ yt_parser.scrape_video_title()
 
 
 
 
 
 
 
201
  except:
202
  st.error("Error: Unable to query comments, incorrect YouTube URL or maximum \
203
  API call limit reached.")
204
  st.stop()
205
 
206
  # Run formatting and models
207
+ yt_parser.format_comments()
208
+ yt_parser.clean_comments()
209
+ yt_parser.run_sentiment_pipeline(sentiment_pipeline)
210
+ yt_parser.run_topic_modelling_pipeline(embedding_model,
211
+ nlp=spacy_nlp,
212
+ max_topics=st.session_state['max_topics'])
 
 
 
 
 
 
 
 
213
  # Set "QUERY COMPLETE" to bypass running this section on script re-run
214
  st.session_state.rerun_button = "QUERY COMPLETE"
215
 
 
368
  st.text_input("Keyword search",
369
  disabled=True)
370
  st.button('Please query comments before filtering',
371
+ disabled=True)