Spaces:

molokhovdmitry
/

social-stat

Running

App Files Files Community

molokhovdmitry commited on Mar 3, 2024

Commit

35581ce

•

1 Parent(s): af5df29

Add batch prediction, max comment size

Browse files

Files changed (2) hide show

main.py +14 -3
yt_api.py +15 -11

main.py CHANGED Viewed

@@ -8,6 +8,8 @@ from models import init_emotions_model
 class Settings(BaseSettings):
     YT_API_KEY: str
     model_config = SettingsConfigDict(env_file='.env')
@@ -25,12 +27,21 @@ def home():
 @app.get('/predict')
 def predict(video_id):
     # Get comments
-    comments = get_comments(video_id, settings.YT_API_KEY)
     comments_df = pd.DataFrame(comments)
-    # Predict emotions
     text_list = comments_df['text_display'].to_list()
-    preds = emotions_clf(text_list)
     # Add predictions to DataFrame
     preds_df = []

 class Settings(BaseSettings):
     YT_API_KEY: str
+    PRED_BATCH_SIZE: int
+    MAX_COMMENT_SIZE: int
     model_config = SettingsConfigDict(env_file='.env')
 @app.get('/predict')
 def predict(video_id):
     # Get comments
+    comments = get_comments(
+        video_id,
+        settings.MAX_COMMENT_SIZE,
+        settings.YT_API_KEY
+    )
     comments_df = pd.DataFrame(comments)
+    # Predict emotions in batches
     text_list = comments_df['text_display'].to_list()
+    batch_size = settings.PRED_BATCH_SIZE
+    text_batches = [text_list[i:i + batch_size]
+                    for i in range(0, len(text_list), batch_size)]
+    preds = []
+    for batch in text_batches:
+        preds.extend(emotions_clf(batch))
     # Add predictions to DataFrame
     preds_df = []

yt_api.py CHANGED Viewed

@@ -1,19 +1,19 @@
 import requests
-# from pprint import pprint
-def get_comments(video_id, api_key):
     """Yields all `commentThreads` from a YouTube video in batches."""
-    # Get comments from the first page.
     response = get_response(video_id, api_key, max_results=100)
-    comment_list = response_to_comments(response)
-    # Get comments from the other pages.
     while 'nextPageToken' in response.keys():
         response = get_response(
             video_id, api_key, page_token=response['nextPageToken'])
-        comment_list += (response_to_comments(response))
     return comment_list
@@ -32,16 +32,20 @@ def get_response(video_id, api_key, page_token=None, max_results=100):
     return response.json()
-def response_to_comments(response):
     """Converts JSON response to `comment_list` dict."""
     comment_list = []
-    for comment in response['items']:
-        comment = comment['snippet']
         can_reply = comment['canReply']
         total_reply_count = comment['totalReplyCount']
         comment = comment['topLevelComment']
         comment_id = comment['id']
         comment = comment['snippet']
         try:
             comment_list.append({
                 'comment_id': comment_id,
@@ -57,7 +61,7 @@ def response_to_comments(response):
                 'total_reply_count': total_reply_count,
             })
         except Exception as e:
-            print(f"Error: {e}\nComment: {comment}")
-            continue
     return comment_list

 import requests
+from pprint import pprint
+def get_comments(video_id, max_comment_size, api_key):
     """Yields all `commentThreads` from a YouTube video in batches."""
+    # Get comments from the first page
     response = get_response(video_id, api_key, max_results=100)
+    comment_list = response_to_comments(response, max_comment_size)
+    # Get comments from the other pages
     while 'nextPageToken' in response.keys():
         response = get_response(
             video_id, api_key, page_token=response['nextPageToken'])
+        comment_list += (response_to_comments(response, max_comment_size))
     return comment_list
     return response.json()
+def response_to_comments(response, max_comment_size):
     """Converts JSON response to `comment_list` dict."""
     comment_list = []
+    for full_comment in response['items']:
+        comment = full_comment['snippet']
         can_reply = comment['canReply']
         total_reply_count = comment['totalReplyCount']
         comment = comment['topLevelComment']
         comment_id = comment['id']
         comment = comment['snippet']
+        # Skip if comment is too long
+        if len(comment['textDisplay']) > max_comment_size:
+            continue
         try:
             comment_list.append({
                 'comment_id': comment_id,
                 'total_reply_count': total_reply_count,
             })
         except Exception as e:
+            print(f"Error: {e}\nComment:")
+            pprint(full_comment)
     return comment_list