new-space / app.py
boompack's picture
Update app.py
2e66c7c verified
import gradio as gr
import re
import emoji
import logging
from typing import Tuple, Optional
from functools import lru_cache
from collections import Counter
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def count_emojis(text: str) -> int:
"""Подсчет количества эмодзи в тексте"""
return len([c for c in text if c in emoji.EMOJI_DATA])
def extract_mentions(text: str) -> list:
"""Извлечение упоминаний пользователей"""
return re.findall(r'@(\w+)', text)
def is_spam(text: str) -> bool:
"""Определение спам-комментариев"""
spam_indicators = ['🔥' * 3, '❤️' * 3, 'follow me', 'check my']
return any(indicator in text.lower() for indicator in spam_indicators)
def extract_comment_data(comment_text: str) -> Tuple[Optional[str], Optional[str], int, int]:
"""Извлекает данные из комментария"""
try:
# Extract username
username_match = re.search(r'Фото профиля\s+(.+?)\n', comment_text)
username = username_match.group(1).strip() if username_match else None
if not username:
return None, None, 0, 0
# Extract comment text
comment_lines = comment_text.split('\n')
comment = ""
time_pattern = r'\d+\s*(?:ч\.|нед\.)'
# Identify where the comment text starts
for i, line in enumerate(comment_lines):
if re.search(time_pattern, line):
if i + 1 < len(comment_lines):
comment = comment_lines[i + 1].strip()
break
# Clean up comment text
comment = re.sub(r'\d+\s*(?:ч\.|нед\.)\s*$', '', comment)
comment = re.sub(r'"Нравится":\s*\d+\s*Ответить\s*$', '', comment)
# Extract likes
likes_match = re.search(r'"Нравится":\s*(\d+)', comment_text)
likes = int(likes_match.group(1)) if likes_match else 0
# Extract time
time_match = re.search(r'(\d+)\s*(?:ч\.|нед\.)', comment_text)
time = int(time_match.group(1)) if time_match else 0
return username, comment.strip(), likes, time
except Exception as e:
logger.error(f"Error extracting data: {e}")
return None, None, 0, 0
@lru_cache(maxsize=100)
def analyze_post(content_type: str, link: str, post_likes: int,
post_date: str, description: str, comment_count: int,
all_comments: str) -> Tuple[str, str, str, str, str]:
"""Анализирует пост и комментарии"""
try:
if not all_comments or 'Фото профиля' not in all_comments:
return "Ошибка: неверный формат данных", "", "", "", "0"
blocks = re.split(r'(?=Фото профиля)', all_comments)
blocks = [b.strip() for b in blocks if b.strip()]
comments_data = []
total_emojis = 0
mentions = []
spam_count = 0
for block in blocks:
username, comment, likes, time = extract_comment_data(block)
if username and comment:
emoji_count = count_emojis(comment)
comment_mentions = extract_mentions(comment)
is_spam_comment = is_spam(comment)
comments_data.append({
'username': username,
'comment': comment,
'likes': likes,
'time': time,
'emoji_count': emoji_count,
'mentions': comment_mentions,
'is_spam': is_spam_comment
})
total_emojis += emoji_count
mentions.extend(comment_mentions)
if is_spam_comment:
spam_count += 1
# Подсчет статистики
total_comments = len(comments_data)
unique_users = len(set(item['username'] for item in comments_data))
total_likes = sum(item['likes'] for item in comments_data)
avg_likes = total_likes / total_comments if total_comments > 0 else 0
# Топ комментаторы
commenter_counts = Counter(item['username'] for item in comments_data)
top_commenters = commenter_counts.most_common(5)
analytics = f"""
📊 Подробный анализ комментариев:
Основные метрики:
• Всего комментариев: {total_comments}
• Уникальных пользователей: {unique_users}
• Общее количество лайков: {total_likes}
• Среднее количество лайков: {avg_likes:.1f}
Дополнительная информация:
• Использовано эмодзи: {total_emojis}
• Количество упоминаний: {len(mentions)}
• Выявлено спам-комментариев: {spam_count}
Топ комментаторы:
{chr(10).join(f'• {user}: {count} комментария' for user, count in top_commenters if count > 1)}
"""
return (
analytics,
"\n".join(item['username'] for item in comments_data),
"\n".join(item['comment'] for item in comments_data),
"\n".join(str(item['likes']) for item in comments_data),
str(total_likes)
)
except Exception as e:
logger.error(f"Analysis error: {e}")
return str(e), "", "", "", "0"
# Создаем интерфейс Gradio
iface = gr.Interface(
fn=analyze_post,
inputs=[
gr.Radio(
choices=["Photo", "Video"],
label="Content Type",
value="Photo"
),
gr.Textbox(
label="Link to Post",
placeholder="Вставьте ссылку на пост"
),
gr.Number(
label="Likes",
value=0,
minimum=0
),
gr.Textbox(
label="Post Date",
placeholder="YYYY-MM-DD"
),
gr.Textbox(
label="Description",
lines=3,
placeholder="Описание поста"
),
gr.Number(
label="Comment Count",
value=0,
minimum=0
),
gr.Textbox(
label="Comments",
lines=10,
placeholder="Вставьте комментарии"
)
],
outputs=[
gr.Textbox(label="Analytics Summary", lines=15),
gr.Textbox(label="Usernames"),
gr.Textbox(label="Comments"),
gr.Textbox(label="Likes Chronology"),
gr.Textbox(label="Total Likes on Comments")
],
title="Enhanced Instagram Comment Analyzer",
description="Анализатор комментариев Instagram с расширенной аналитикой",
theme="default"
)
if __name__ == "__main__":
try:
iface.launch(
share=True, # Создает публичную ссылку
debug=True, # Включает режим отладки
show_error=True # Показывает подробности ошибок
)
except Exception as e:
logger.error(f"Error launching interface: {e}", exc_info=True)
import re
import emoji
import gradio as gr
from collections import defaultdict, Counter
def extract_comment_data(comment_text: str) -> dict:
"""Extracts data from a comment string."""
comment_data = {}
# Username extraction (improved robustness)
match = re.search(r"Фото профиля\s*(.+?)\n", comment_text)
comment_data["username"] = match.group(1).strip() if match else None
if not comment_data["username"]:
return None # Skip if no username found
# Comment text extraction (handling multiple lines & various time formats)
lines = comment_text.splitlines()
comment_text = ""
for i, line in enumerate(lines):
if re.search(r"\d+\s*(?:нед\.|ч\.)", line): #Matches days or hours
comment_text = "\n".join(lines[i+1:]).strip()
break
comment_text += line + "\n"
comment_text = comment_text.strip()
comment_data["comment"] = comment_text
# Likes extraction (more flexible regex)
match = re.search(r'"Нравится":\s*(\d+)', comment_text)
comment_data["likes"] = int(match.group(1)) if match else 0
# Time extraction (more robust to variations)
time_match = re.search(r"(\d+)\s*(?:нед\.|ч\.)", comment_text)
comment_data["time"] = int(time_match.group(1)) if time_match else None
return comment_data
def analyze_comments(comments_text: str) -> dict:
"""Analyzes a block of comments text."""
comments = []
blocks = re.split(r'(Фото профиля)', comments_text, flags=re.IGNORECASE)
for i in range(1,len(blocks),2):
comment_data = extract_comment_data(blocks[i])
if comment_data:
comments.append(comment_data)
# Aggregate data
analytics = defaultdict(int)
unique_users = set()
top_commenters = Counter()
for comment in comments:
analytics["total_comments"] += 1
unique_users.add(comment["username"])
analytics["total_likes"] += comment["likes"]
top_commenters[comment["username"]] += 1
analytics["emojis"] += len(emoji.demojize(comment["comment"])) # Counts emojis
analytics["unique_users"] = len(unique_users)
analytics["avg_likes"] = analytics["total_likes"] / analytics["total_comments"] if analytics["total_comments"] > 0 else 0
analytics["top_commenters"] = dict(top_commenters.most_common(5))
return analytics, comments
iface = gr.Interface(
fn=analyze_comments,
inputs=gr.Textbox(label="Instagram Comments (Paste here)", lines=10),
outputs=[
gr.Textbox(label="Analytics Summary"),
gr.JSON(label="Individual Comment Data")
],
title="Enhanced Instagram Comment Analyzer",
description="Improved analyzer for Instagram comments.",
)
iface.launch(share=True)