Spaces:
Running
Running
molokhovdmitry
commited on
Commit
•
35581ce
1
Parent(s):
af5df29
Add batch prediction, max comment size
Browse files
main.py
CHANGED
@@ -8,6 +8,8 @@ from models import init_emotions_model
|
|
8 |
|
9 |
class Settings(BaseSettings):
|
10 |
YT_API_KEY: str
|
|
|
|
|
11 |
model_config = SettingsConfigDict(env_file='.env')
|
12 |
|
13 |
|
@@ -25,12 +27,21 @@ def home():
|
|
25 |
@app.get('/predict')
|
26 |
def predict(video_id):
|
27 |
# Get comments
|
28 |
-
comments = get_comments(
|
|
|
|
|
|
|
|
|
29 |
comments_df = pd.DataFrame(comments)
|
30 |
|
31 |
-
# Predict emotions
|
32 |
text_list = comments_df['text_display'].to_list()
|
33 |
-
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
# Add predictions to DataFrame
|
36 |
preds_df = []
|
|
|
8 |
|
9 |
class Settings(BaseSettings):
|
10 |
YT_API_KEY: str
|
11 |
+
PRED_BATCH_SIZE: int
|
12 |
+
MAX_COMMENT_SIZE: int
|
13 |
model_config = SettingsConfigDict(env_file='.env')
|
14 |
|
15 |
|
|
|
27 |
@app.get('/predict')
|
28 |
def predict(video_id):
|
29 |
# Get comments
|
30 |
+
comments = get_comments(
|
31 |
+
video_id,
|
32 |
+
settings.MAX_COMMENT_SIZE,
|
33 |
+
settings.YT_API_KEY
|
34 |
+
)
|
35 |
comments_df = pd.DataFrame(comments)
|
36 |
|
37 |
+
# Predict emotions in batches
|
38 |
text_list = comments_df['text_display'].to_list()
|
39 |
+
batch_size = settings.PRED_BATCH_SIZE
|
40 |
+
text_batches = [text_list[i:i + batch_size]
|
41 |
+
for i in range(0, len(text_list), batch_size)]
|
42 |
+
preds = []
|
43 |
+
for batch in text_batches:
|
44 |
+
preds.extend(emotions_clf(batch))
|
45 |
|
46 |
# Add predictions to DataFrame
|
47 |
preds_df = []
|
yt_api.py
CHANGED
@@ -1,19 +1,19 @@
|
|
1 |
import requests
|
2 |
-
|
3 |
|
4 |
|
5 |
-
def get_comments(video_id, api_key):
|
6 |
"""Yields all `commentThreads` from a YouTube video in batches."""
|
7 |
|
8 |
-
# Get comments from the first page
|
9 |
response = get_response(video_id, api_key, max_results=100)
|
10 |
-
comment_list = response_to_comments(response)
|
11 |
|
12 |
-
# Get comments from the other pages
|
13 |
while 'nextPageToken' in response.keys():
|
14 |
response = get_response(
|
15 |
video_id, api_key, page_token=response['nextPageToken'])
|
16 |
-
comment_list += (response_to_comments(response))
|
17 |
|
18 |
return comment_list
|
19 |
|
@@ -32,16 +32,20 @@ def get_response(video_id, api_key, page_token=None, max_results=100):
|
|
32 |
return response.json()
|
33 |
|
34 |
|
35 |
-
def response_to_comments(response):
|
36 |
"""Converts JSON response to `comment_list` dict."""
|
37 |
comment_list = []
|
38 |
-
for
|
39 |
-
comment =
|
40 |
can_reply = comment['canReply']
|
41 |
total_reply_count = comment['totalReplyCount']
|
42 |
comment = comment['topLevelComment']
|
43 |
comment_id = comment['id']
|
44 |
comment = comment['snippet']
|
|
|
|
|
|
|
|
|
45 |
try:
|
46 |
comment_list.append({
|
47 |
'comment_id': comment_id,
|
@@ -57,7 +61,7 @@ def response_to_comments(response):
|
|
57 |
'total_reply_count': total_reply_count,
|
58 |
})
|
59 |
except Exception as e:
|
60 |
-
print(f"Error: {e}\nComment:
|
61 |
-
|
62 |
|
63 |
return comment_list
|
|
|
1 |
import requests
|
2 |
+
from pprint import pprint
|
3 |
|
4 |
|
5 |
+
def get_comments(video_id, max_comment_size, api_key):
|
6 |
"""Yields all `commentThreads` from a YouTube video in batches."""
|
7 |
|
8 |
+
# Get comments from the first page
|
9 |
response = get_response(video_id, api_key, max_results=100)
|
10 |
+
comment_list = response_to_comments(response, max_comment_size)
|
11 |
|
12 |
+
# Get comments from the other pages
|
13 |
while 'nextPageToken' in response.keys():
|
14 |
response = get_response(
|
15 |
video_id, api_key, page_token=response['nextPageToken'])
|
16 |
+
comment_list += (response_to_comments(response, max_comment_size))
|
17 |
|
18 |
return comment_list
|
19 |
|
|
|
32 |
return response.json()
|
33 |
|
34 |
|
35 |
+
def response_to_comments(response, max_comment_size):
|
36 |
"""Converts JSON response to `comment_list` dict."""
|
37 |
comment_list = []
|
38 |
+
for full_comment in response['items']:
|
39 |
+
comment = full_comment['snippet']
|
40 |
can_reply = comment['canReply']
|
41 |
total_reply_count = comment['totalReplyCount']
|
42 |
comment = comment['topLevelComment']
|
43 |
comment_id = comment['id']
|
44 |
comment = comment['snippet']
|
45 |
+
|
46 |
+
# Skip if comment is too long
|
47 |
+
if len(comment['textDisplay']) > max_comment_size:
|
48 |
+
continue
|
49 |
try:
|
50 |
comment_list.append({
|
51 |
'comment_id': comment_id,
|
|
|
61 |
'total_reply_count': total_reply_count,
|
62 |
})
|
63 |
except Exception as e:
|
64 |
+
print(f"Error: {e}\nComment:")
|
65 |
+
pprint(full_comment)
|
66 |
|
67 |
return comment_list
|