|
import gradio as gr |
|
import re |
|
import emoji |
|
import logging |
|
from typing import Tuple, Optional |
|
from functools import lru_cache |
|
from collections import Counter |
|
|
|
logging.basicConfig(level=logging.INFO) |
|
logger = logging.getLogger(__name__) |
|
|
|
def count_emojis(text: str) -> int: |
|
"""Подсчет количества эмодзи в тексте""" |
|
return len([c for c in text if c in emoji.EMOJI_DATA]) |
|
|
|
def extract_mentions(text: str) -> list: |
|
"""Извлечение упоминаний пользователей""" |
|
return re.findall(r'@(\w+)', text) |
|
|
|
def is_spam(text: str) -> bool: |
|
"""Определение спам-комментариев""" |
|
spam_indicators = ['🔥' * 3, '❤️' * 3, 'follow me', 'check my'] |
|
return any(indicator in text.lower() for indicator in spam_indicators) |
|
|
|
def extract_comment_data(comment_text: str) -> Tuple[Optional[str], Optional[str], int, int]: |
|
"""Извлекает данные из комментария""" |
|
try: |
|
|
|
username_match = re.search(r'Фото профиля\s+(.+?)\n', comment_text) |
|
username = username_match.group(1).strip() if username_match else None |
|
|
|
if not username: |
|
return None, None, 0, 0 |
|
|
|
|
|
comment_lines = comment_text.split('\n') |
|
comment = "" |
|
time_pattern = r'\d+\s*(?:ч\.|нед\.)' |
|
|
|
|
|
for i, line in enumerate(comment_lines): |
|
if re.search(time_pattern, line): |
|
if i + 1 < len(comment_lines): |
|
comment = comment_lines[i + 1].strip() |
|
break |
|
|
|
|
|
comment = re.sub(r'\d+\s*(?:ч\.|нед\.)\s*$', '', comment) |
|
comment = re.sub(r'"Нравится":\s*\d+\s*Ответить\s*$', '', comment) |
|
|
|
|
|
likes_match = re.search(r'"Нравится":\s*(\d+)', comment_text) |
|
likes = int(likes_match.group(1)) if likes_match else 0 |
|
|
|
|
|
time_match = re.search(r'(\d+)\s*(?:ч\.|нед\.)', comment_text) |
|
time = int(time_match.group(1)) if time_match else 0 |
|
|
|
return username, comment.strip(), likes, time |
|
|
|
except Exception as e: |
|
logger.error(f"Error extracting data: {e}") |
|
return None, None, 0, 0 |
|
|
|
@lru_cache(maxsize=100) |
|
def analyze_post(content_type: str, link: str, post_likes: int, |
|
post_date: str, description: str, comment_count: int, |
|
all_comments: str) -> Tuple[str, str, str, str, str]: |
|
"""Анализирует пост и комментарии""" |
|
try: |
|
if not all_comments or 'Фото профиля' not in all_comments: |
|
return "Ошибка: неверный формат данных", "", "", "", "0" |
|
|
|
blocks = re.split(r'(?=Фото профиля)', all_comments) |
|
blocks = [b.strip() for b in blocks if b.strip()] |
|
|
|
comments_data = [] |
|
total_emojis = 0 |
|
mentions = [] |
|
spam_count = 0 |
|
|
|
for block in blocks: |
|
username, comment, likes, time = extract_comment_data(block) |
|
if username and comment: |
|
emoji_count = count_emojis(comment) |
|
comment_mentions = extract_mentions(comment) |
|
is_spam_comment = is_spam(comment) |
|
|
|
comments_data.append({ |
|
'username': username, |
|
'comment': comment, |
|
'likes': likes, |
|
'time': time, |
|
'emoji_count': emoji_count, |
|
'mentions': comment_mentions, |
|
'is_spam': is_spam_comment |
|
}) |
|
|
|
total_emojis += emoji_count |
|
mentions.extend(comment_mentions) |
|
if is_spam_comment: |
|
spam_count += 1 |
|
|
|
|
|
total_comments = len(comments_data) |
|
unique_users = len(set(item['username'] for item in comments_data)) |
|
total_likes = sum(item['likes'] for item in comments_data) |
|
avg_likes = total_likes / total_comments if total_comments > 0 else 0 |
|
|
|
|
|
commenter_counts = Counter(item['username'] for item in comments_data) |
|
top_commenters = commenter_counts.most_common(5) |
|
|
|
analytics = f""" |
|
📊 Подробный анализ комментариев: |
|
Основные метрики: |
|
• Всего комментариев: {total_comments} |
|
• Уникальных пользователей: {unique_users} |
|
• Общее количество лайков: {total_likes} |
|
• Среднее количество лайков: {avg_likes:.1f} |
|
Дополнительная информация: |
|
• Использовано эмодзи: {total_emojis} |
|
• Количество упоминаний: {len(mentions)} |
|
• Выявлено спам-комментариев: {spam_count} |
|
Топ комментаторы: |
|
{chr(10).join(f'• {user}: {count} комментария' for user, count in top_commenters if count > 1)} |
|
""" |
|
|
|
return ( |
|
analytics, |
|
"\n".join(item['username'] for item in comments_data), |
|
"\n".join(item['comment'] for item in comments_data), |
|
"\n".join(str(item['likes']) for item in comments_data), |
|
str(total_likes) |
|
) |
|
|
|
except Exception as e: |
|
logger.error(f"Analysis error: {e}") |
|
return str(e), "", "", "", "0" |
|
|
|
|
|
iface = gr.Interface( |
|
fn=analyze_post, |
|
inputs=[ |
|
gr.Radio( |
|
choices=["Photo", "Video"], |
|
label="Content Type", |
|
value="Photo" |
|
), |
|
gr.Textbox( |
|
label="Link to Post", |
|
placeholder="Вставьте ссылку на пост" |
|
), |
|
gr.Number( |
|
label="Likes", |
|
value=0, |
|
minimum=0 |
|
), |
|
gr.Textbox( |
|
label="Post Date", |
|
placeholder="YYYY-MM-DD" |
|
), |
|
gr.Textbox( |
|
label="Description", |
|
lines=3, |
|
placeholder="Описание поста" |
|
), |
|
gr.Number( |
|
label="Comment Count", |
|
value=0, |
|
minimum=0 |
|
), |
|
gr.Textbox( |
|
label="Comments", |
|
lines=10, |
|
placeholder="Вставьте комментарии" |
|
) |
|
], |
|
outputs=[ |
|
gr.Textbox(label="Analytics Summary", lines=15), |
|
gr.Textbox(label="Usernames"), |
|
gr.Textbox(label="Comments"), |
|
gr.Textbox(label="Likes Chronology"), |
|
gr.Textbox(label="Total Likes on Comments") |
|
], |
|
title="Enhanced Instagram Comment Analyzer", |
|
description="Анализатор комментариев Instagram с расширенной аналитикой", |
|
theme="default" |
|
) |
|
|
|
if __name__ == "__main__": |
|
try: |
|
iface.launch( |
|
share=True, |
|
debug=True, |
|
show_error=True |
|
) |
|
except Exception as e: |
|
logger.error(f"Error launching interface: {e}", exc_info=True) |
|
import re |
|
import emoji |
|
import gradio as gr |
|
from collections import defaultdict, Counter |
|
|
|
def extract_comment_data(comment_text: str) -> dict: |
|
"""Extracts data from a comment string.""" |
|
comment_data = {} |
|
|
|
|
|
match = re.search(r"Фото профиля\s*(.+?)\n", comment_text) |
|
comment_data["username"] = match.group(1).strip() if match else None |
|
if not comment_data["username"]: |
|
return None |
|
|
|
|
|
lines = comment_text.splitlines() |
|
comment_text = "" |
|
for i, line in enumerate(lines): |
|
if re.search(r"\d+\s*(?:нед\.|ч\.)", line): |
|
comment_text = "\n".join(lines[i+1:]).strip() |
|
break |
|
comment_text += line + "\n" |
|
comment_text = comment_text.strip() |
|
comment_data["comment"] = comment_text |
|
|
|
|
|
match = re.search(r'"Нравится":\s*(\d+)', comment_text) |
|
comment_data["likes"] = int(match.group(1)) if match else 0 |
|
|
|
|
|
time_match = re.search(r"(\d+)\s*(?:нед\.|ч\.)", comment_text) |
|
comment_data["time"] = int(time_match.group(1)) if time_match else None |
|
|
|
|
|
return comment_data |
|
|
|
def analyze_comments(comments_text: str) -> dict: |
|
"""Analyzes a block of comments text.""" |
|
comments = [] |
|
blocks = re.split(r'(Фото профиля)', comments_text, flags=re.IGNORECASE) |
|
for i in range(1,len(blocks),2): |
|
comment_data = extract_comment_data(blocks[i]) |
|
if comment_data: |
|
comments.append(comment_data) |
|
|
|
|
|
analytics = defaultdict(int) |
|
unique_users = set() |
|
top_commenters = Counter() |
|
|
|
for comment in comments: |
|
analytics["total_comments"] += 1 |
|
unique_users.add(comment["username"]) |
|
analytics["total_likes"] += comment["likes"] |
|
top_commenters[comment["username"]] += 1 |
|
analytics["emojis"] += len(emoji.demojize(comment["comment"])) |
|
|
|
analytics["unique_users"] = len(unique_users) |
|
analytics["avg_likes"] = analytics["total_likes"] / analytics["total_comments"] if analytics["total_comments"] > 0 else 0 |
|
analytics["top_commenters"] = dict(top_commenters.most_common(5)) |
|
|
|
|
|
return analytics, comments |
|
|
|
|
|
iface = gr.Interface( |
|
fn=analyze_comments, |
|
inputs=gr.Textbox(label="Instagram Comments (Paste here)", lines=10), |
|
outputs=[ |
|
gr.Textbox(label="Analytics Summary"), |
|
gr.JSON(label="Individual Comment Data") |
|
], |
|
title="Enhanced Instagram Comment Analyzer", |
|
description="Improved analyzer for Instagram comments.", |
|
) |
|
|
|
iface.launch(share=True) |