ASokirka commited on
Commit
69b1327
·
verified ·
1 Parent(s): f4e0a69

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +154 -0
  2. requirements.txt +0 -0
app.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import streamlit as st
4
+ import googleapiclient.discovery
5
+ import pandas as pd
6
+ from transformers import pipeline
7
+ import matplotlib.pyplot as plt
8
+ import seaborn as sns
9
+
10
+ st.title('Анализатор комментариев :red[YouTube] :sunglasses:')
11
+
12
+
13
+ # Инициализируем модель Hugging Face для анализа тональности текста
14
+ # Кэшируем ресурс для одной загрузки модели на все сессии
15
+ #@st.cache_resource
16
+ def load_model():
17
+ """
18
+ Loads the 'blanchefort/rubert-base-cased-sentiment' model from HuggingFace
19
+ and saves to cache for consecutive loads.
20
+ """
21
+ model = pipeline(
22
+ "sentiment-analysis",
23
+ "blanchefort/rubert-base-cased-sentiment")
24
+ return model
25
+
26
+
27
+ def extract_video_id(url: str) -> str:
28
+ """
29
+ Extracts the video ID from a YouTube video URL.
30
+ Args: url (str): The YouTube video URL.
31
+ Returns: str: The extracted video ID,
32
+ or an empty string if the URL is not valid.
33
+ """
34
+ pattern = r"(?<=v=)[\w-]+(?=&|\b)"
35
+ match = re.search(pattern, url)
36
+ if match:
37
+ return match.group()
38
+ else:
39
+ return ""
40
+
41
+
42
+ def download_comments(video_id: str) -> pd.DataFrame:
43
+ """
44
+ Downloads comments from a YouTube video based on the provided video ID
45
+ and returns them as a DataFrame.
46
+ Args: video_id (str): The video ID of the YouTube video.
47
+ Returns: DataFrame: A DataFrame containing the downloaded comments from the video.
48
+ """
49
+ DEV_KEY = os.getenv('API_KEY_YOUTUBE')
50
+ youtube = googleapiclient.discovery.build("youtube",
51
+ "v3",
52
+ developerKey=DEV_KEY)
53
+ request = youtube.commentThreads().list(part="snippet",
54
+ videoId=video_id,
55
+ maxResults=100)
56
+ response = request.execute()
57
+ comments = []
58
+ for item in response['items']:
59
+ comment = item['snippet']['topLevelComment']['snippet']
60
+ comments.append([comment['authorDisplayName'],
61
+ comment['publishedAt'],
62
+ comment['updatedAt'],
63
+ comment['likeCount'],
64
+ comment['textDisplay'],])
65
+ return pd.DataFrame(comments,
66
+ columns=['author',
67
+ 'published_at',
68
+ 'updated_at',
69
+ 'like_count',
70
+ 'text',])
71
+
72
+
73
+ def analyze_emotions_in_comments(df: pd.DataFrame) -> tuple:
74
+ """
75
+ Takes a DataFrame with comments,
76
+ processes the emotional sentiment of each comment in the DataFrame
77
+ Args: dataframe (pandas.DataFrame): DataFrame containing comments to analyze.
78
+ Returns: tuple: containing the updated DataFrame with the added 'Emotional Sentiment' column
79
+ and the total count of processed comments.
80
+ """
81
+ model = load_model()
82
+ selected_columns = ['text', 'author', 'published_at']
83
+ df = df[selected_columns]
84
+ res_list = []
85
+ res_list = model(df['text'][:513].to_list())
86
+ full_df = pd.concat([pd.DataFrame(res_list), df], axis=1)
87
+ return (full_df, len(res_list))
88
+
89
+
90
+ def plot_heatmap_from_dataframe(df: pd.DataFrame) -> plt:
91
+ """
92
+ Visualizes the data from the input DataFrame and returns a matplotlib plot object.
93
+ Args: df (DataFrame): The input DataFrame containing the data to be visualized.
94
+ Returns: plt: A matplotlib plot object showing the visualization of the data.
95
+ """
96
+ df['published_at'] = pd.to_datetime(df['published_at'])
97
+ df['Date'] = df['published_at'].dt.date
98
+ df['Hour'] = df['published_at'].dt.hour
99
+ pivot_table = df.pivot_table(index='Hour',
100
+ columns='Date',
101
+ values='text',
102
+ aggfunc='count')
103
+ plt.figure(figsize=(10, 6))
104
+ sns.heatmap(pivot_table,
105
+ cmap='YlGnBu')
106
+ plt.title('Количество комментариев по часам и датам')
107
+ plt.xlabel('Дата')
108
+ plt.ylabel('Час')
109
+ return plt
110
+
111
+
112
+ def visualize_data(df: pd.DataFrame):
113
+ """
114
+ Visualizes the data from the input DataFrame and returns a matplotlib figure object.
115
+ Args: df (DataFrame): The input DataFrame containing the data to be visualized.
116
+ Returns: fig: A matplotlib figure object
117
+ """
118
+ data = df['label'].value_counts()
119
+ fig, ax = plt.subplots()
120
+ plt.title("Эмоциональная окраска комментариев на YouTube")
121
+ label = data.index
122
+ ax.pie(data, labels=label, autopct='%1.1f%%')
123
+ return fig
124
+
125
+
126
+ def change_url():
127
+ st.session_state.start = False
128
+
129
+
130
+ if "start" not in st.session_state:
131
+ st.session_state.start = False
132
+
133
+ # Получаем id видеоролика из URL для отправки запроса
134
+ url = st.text_input(label="Enter URL from YouTube", on_change=change_url)
135
+ video_id = extract_video_id(url)
136
+ if video_id != "":
137
+ if btn_start := st.button('Загрузить комментарии'):
138
+ st.session_state.start = True
139
+
140
+ if st.session_state.start:
141
+ # Выводим таблицу с результатами на странице
142
+ comments_df = download_comments(video_id)
143
+ with st.spinner('Analyzing comments...'):
144
+ full_df, num_comments = analyze_emotions_in_comments(comments_df)
145
+ st.success(f'Готово! Обработано {num_comments} комментариев.')
146
+ st.write(full_df)
147
+ st.markdown('***')
148
+
149
+ # Выводим heatmap комментариев по часам и датам
150
+ st.pyplot(plot_heatmap_from_dataframe(full_df))
151
+ st.markdown('***')
152
+
153
+ # Выводим круговую диаграмму
154
+ st.pyplot(visualize_data(full_df))
requirements.txt ADDED
Binary file (2.87 kB). View file