Spaces:

boompack
/

new-space

Sleeping

File size: 10,350 Bytes

import gradio as gr
import re
import emoji
import logging
from typing import Tuple, Optional
from functools import lru_cache
from collections import Counter

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def count_emojis(text: str) -> int:
    """Подсчет количества эмодзи в тексте"""
    return len([c for c in text if c in emoji.EMOJI_DATA])

def extract_mentions(text: str) -> list:
    """Извлечение упоминаний пользователей"""
    return re.findall(r'@(\w+)', text)

def is_spam(text: str) -> bool:
    """Определение спам-комментариев"""
    spam_indicators = ['🔥' * 3, '❤️' * 3, 'follow me', 'check my']
    return any(indicator in text.lower() for indicator in spam_indicators)

def extract_comment_data(comment_text: str) -> Tuple[Optional[str], Optional[str], int, int]:
    """Извлекает данные из комментария"""
    try:
        # Extract username
        username_match = re.search(r'Фото профиля\s+(.+?)\n', comment_text)
        username = username_match.group(1).strip() if username_match else None
        
        if not username:
            return None, None, 0, 0
        
        # Extract comment text
        comment_lines = comment_text.split('\n')
        comment = ""
        time_pattern = r'\d+\s*(?:ч\.|нед\.)'
        
        # Identify where the comment text starts
        for i, line in enumerate(comment_lines):
            if re.search(time_pattern, line):
                if i + 1 < len(comment_lines):
                    comment = comment_lines[i + 1].strip()
                break
        
        # Clean up comment text
        comment = re.sub(r'\d+\s*(?:ч\.|нед\.)\s*$', '', comment)
        comment = re.sub(r'"Нравится":\s*\d+\s*Ответить\s*$', '', comment)
        
        # Extract likes
        likes_match = re.search(r'"Нравится":\s*(\d+)', comment_text)
        likes = int(likes_match.group(1)) if likes_match else 0
        
        # Extract time
        time_match = re.search(r'(\d+)\s*(?:ч\.|нед\.)', comment_text)
        time = int(time_match.group(1)) if time_match else 0
        
        return username, comment.strip(), likes, time

    except Exception as e:
        logger.error(f"Error extracting data: {e}")
        return None, None, 0, 0

@lru_cache(maxsize=100)
def analyze_post(content_type: str, link: str, post_likes: int, 
                post_date: str, description: str, comment_count: int, 
                all_comments: str) -> Tuple[str, str, str, str, str]:
    """Анализирует пост и комментарии"""
    try:
        if not all_comments or 'Фото профиля' not in all_comments:
            return "Ошибка: неверный формат данных", "", "", "", "0"

        blocks = re.split(r'(?=Фото профиля)', all_comments)
        blocks = [b.strip() for b in blocks if b.strip()]
        
        comments_data = []
        total_emojis = 0
        mentions = []
        spam_count = 0
        
        for block in blocks:
            username, comment, likes, time = extract_comment_data(block)
            if username and comment:
                emoji_count = count_emojis(comment)
                comment_mentions = extract_mentions(comment)
                is_spam_comment = is_spam(comment)
                
                comments_data.append({
                    'username': username,
                    'comment': comment,
                    'likes': likes,
                    'time': time,
                    'emoji_count': emoji_count,
                    'mentions': comment_mentions,
                    'is_spam': is_spam_comment
                })
                
                total_emojis += emoji_count
                mentions.extend(comment_mentions)
                if is_spam_comment:
                    spam_count += 1

        # Подсчет статистики
        total_comments = len(comments_data)
        unique_users = len(set(item['username'] for item in comments_data))
        total_likes = sum(item['likes'] for item in comments_data)
        avg_likes = total_likes / total_comments if total_comments > 0 else 0

        # Топ комментаторы
        commenter_counts = Counter(item['username'] for item in comments_data)
        top_commenters = commenter_counts.most_common(5)

        analytics = f"""
        📊 Подробный анализ комментариев:
        Основные метрики:
        • Всего комментариев: {total_comments}
        • Уникальных пользователей: {unique_users}
        • Общее количество лайков: {total_likes}
        • Среднее количество лайков: {avg_likes:.1f}
        Дополнительная информация:
        • Использовано эмодзи: {total_emojis}
        • Количество упоминаний: {len(mentions)}
        • Выявлено спам-комментариев: {spam_count}
        Топ комментаторы:
        {chr(10).join(f'• {user}: {count} комментария' for user, count in top_commenters if count > 1)}
        """

        return (
            analytics,
            "\n".join(item['username'] for item in comments_data),
            "\n".join(item['comment'] for item in comments_data),
            "\n".join(str(item['likes']) for item in comments_data),
            str(total_likes)
        )

    except Exception as e:
        logger.error(f"Analysis error: {e}")
        return str(e), "", "", "", "0"

# Создаем интерфейс Gradio
iface = gr.Interface(
    fn=analyze_post,
    inputs=[
        gr.Radio(
            choices=["Photo", "Video"],
            label="Content Type",
            value="Photo"
        ),
        gr.Textbox(
            label="Link to Post",
            placeholder="Вставьте ссылку на пост"
        ),
        gr.Number(
            label="Likes",
            value=0,
            minimum=0
        ),
        gr.Textbox(
            label="Post Date",
            placeholder="YYYY-MM-DD"
        ),
        gr.Textbox(
            label="Description",
            lines=3,
            placeholder="Описание поста"
        ),
        gr.Number(
            label="Comment Count",
            value=0,
            minimum=0
        ),
        gr.Textbox(
            label="Comments",
            lines=10,
            placeholder="Вставьте комментарии"
        )
    ],
    outputs=[
        gr.Textbox(label="Analytics Summary", lines=15),
        gr.Textbox(label="Usernames"),
        gr.Textbox(label="Comments"),
        gr.Textbox(label="Likes Chronology"),
        gr.Textbox(label="Total Likes on Comments")
    ],
    title="Enhanced Instagram Comment Analyzer",
    description="Анализатор комментариев Instagram с расширенной аналитикой",
    theme="default"
)

if __name__ == "__main__":
    try:
        iface.launch(
            share=True,  # Создает публичную ссылку
            debug=True,  # Включает режим отладки
            show_error=True  # Показывает подробности ошибок
        )
    except Exception as e:
        logger.error(f"Error launching interface: {e}", exc_info=True)
import re
import emoji
import gradio as gr
from collections import defaultdict, Counter

def extract_comment_data(comment_text: str) -> dict:
    """Extracts data from a comment string."""
    comment_data = {}
    
    # Username extraction (improved robustness)
    match = re.search(r"Фото профиля\s*(.+?)\n", comment_text)
    comment_data["username"] = match.group(1).strip() if match else None
    if not comment_data["username"]:
        return None # Skip if no username found

    # Comment text extraction (handling multiple lines & various time formats)
    lines = comment_text.splitlines()
    comment_text = ""
    for i, line in enumerate(lines):
        if re.search(r"\d+\s*(?:нед\.|ч\.)", line): #Matches days or hours
            comment_text = "\n".join(lines[i+1:]).strip()
            break
        comment_text += line + "\n"
    comment_text = comment_text.strip()
    comment_data["comment"] = comment_text

    # Likes extraction (more flexible regex)
    match = re.search(r'"Нравится":\s*(\d+)', comment_text)
    comment_data["likes"] = int(match.group(1)) if match else 0

    # Time extraction (more robust to variations)
    time_match = re.search(r"(\d+)\s*(?:нед\.|ч\.)", comment_text)
    comment_data["time"] = int(time_match.group(1)) if time_match else None


    return comment_data

def analyze_comments(comments_text: str) -> dict:
    """Analyzes a block of comments text."""
    comments = []
    blocks = re.split(r'(Фото профиля)', comments_text, flags=re.IGNORECASE)
    for i in range(1,len(blocks),2):
        comment_data = extract_comment_data(blocks[i])
        if comment_data:
            comments.append(comment_data)

    # Aggregate data
    analytics = defaultdict(int)
    unique_users = set()
    top_commenters = Counter()

    for comment in comments:
        analytics["total_comments"] += 1
        unique_users.add(comment["username"])
        analytics["total_likes"] += comment["likes"]
        top_commenters[comment["username"]] += 1
        analytics["emojis"] += len(emoji.demojize(comment["comment"])) # Counts emojis

    analytics["unique_users"] = len(unique_users)
    analytics["avg_likes"] = analytics["total_likes"] / analytics["total_comments"] if analytics["total_comments"] > 0 else 0
    analytics["top_commenters"] = dict(top_commenters.most_common(5))


    return analytics, comments


iface = gr.Interface(
    fn=analyze_comments,
    inputs=gr.Textbox(label="Instagram Comments (Paste here)", lines=10),
    outputs=[
        gr.Textbox(label="Analytics Summary"),
        gr.JSON(label="Individual Comment Data")
    ],
    title="Enhanced Instagram Comment Analyzer",
    description="Improved analyzer for Instagram comments.",
)

iface.launch(share=True)