Spaces:

boompack
/

new-space

Sleeping

App Files Files Community

new-space / app.py

boompack

Update app.py

2e66c7c verified 8 months ago

raw

history blame contribute delete

10.4 kB

	import gradio as gr
	import re
	import emoji
	import logging
	from typing import Tuple, Optional
	from functools import lru_cache
	from collections import Counter

	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	def count_emojis(text: str) -> int:
	"""Подсчет количества эмодзи в тексте"""
	return len([c for c in text if c in emoji.EMOJI_DATA])

	def extract_mentions(text: str) -> list:
	"""Извлечение упоминаний пользователей"""
	return re.findall(r'@(\w+)', text)

	def is_spam(text: str) -> bool:
	"""Определение спам-комментариев"""
	spam_indicators = ['🔥' * 3, '❤️' * 3, 'follow me', 'check my']
	return any(indicator in text.lower() for indicator in spam_indicators)

	def extract_comment_data(comment_text: str) -> Tuple[Optional[str], Optional[str], int, int]:
	"""Извлекает данные из комментария"""
	try:
	# Extract username
	username_match = re.search(r'Фото профиля\s+(.+?)\n', comment_text)
	username = username_match.group(1).strip() if username_match else None

	if not username:
	return None, None, 0, 0

	# Extract comment text
	comment_lines = comment_text.split('\n')
	comment = ""
	time_pattern = r'\d+\s*(?:ч\.\|нед\.)'

	# Identify where the comment text starts
	for i, line in enumerate(comment_lines):
	if re.search(time_pattern, line):
	if i + 1 < len(comment_lines):
	comment = comment_lines[i + 1].strip()
	break

	# Clean up comment text
	comment = re.sub(r'\d+\s(?:ч\.\|нед\.)\s$', '', comment)
	comment = re.sub(r'"Нравится":\s\d+\sОтветить\s*$', '', comment)

	# Extract likes
	likes_match = re.search(r'"Нравится":\s*(\d+)', comment_text)
	likes = int(likes_match.group(1)) if likes_match else 0

	# Extract time
	time_match = re.search(r'(\d+)\s*(?:ч\.\|нед\.)', comment_text)
	time = int(time_match.group(1)) if time_match else 0

	return username, comment.strip(), likes, time

	except Exception as e:
	logger.error(f"Error extracting data: {e}")
	return None, None, 0, 0

	@lru_cache(maxsize=100)
	def analyze_post(content_type: str, link: str, post_likes: int,
	post_date: str, description: str, comment_count: int,
	all_comments: str) -> Tuple[str, str, str, str, str]:
	"""Анализирует пост и комментарии"""
	try:
	if not all_comments or 'Фото профиля' not in all_comments:
	return "Ошибка: неверный формат данных", "", "", "", "0"

	blocks = re.split(r'(?=Фото профиля)', all_comments)
	blocks = [b.strip() for b in blocks if b.strip()]

	comments_data = []
	total_emojis = 0
	mentions = []
	spam_count = 0

	for block in blocks:
	username, comment, likes, time = extract_comment_data(block)
	if username and comment:
	emoji_count = count_emojis(comment)
	comment_mentions = extract_mentions(comment)
	is_spam_comment = is_spam(comment)

	comments_data.append({
	'username': username,
	'comment': comment,
	'likes': likes,
	'time': time,
	'emoji_count': emoji_count,
	'mentions': comment_mentions,
	'is_spam': is_spam_comment
	})

	total_emojis += emoji_count
	mentions.extend(comment_mentions)
	if is_spam_comment:
	spam_count += 1

	# Подсчет статистики
	total_comments = len(comments_data)
	unique_users = len(set(item['username'] for item in comments_data))
	total_likes = sum(item['likes'] for item in comments_data)
	avg_likes = total_likes / total_comments if total_comments > 0 else 0

	# Топ комментаторы
	commenter_counts = Counter(item['username'] for item in comments_data)
	top_commenters = commenter_counts.most_common(5)

	analytics = f"""
	📊 Подробный анализ комментариев:
	Основные метрики:
	• Всего комментариев: {total_comments}
	• Уникальных пользователей: {unique_users}
	• Общее количество лайков: {total_likes}
	• Среднее количество лайков: {avg_likes:.1f}
	Дополнительная информация:
	• Использовано эмодзи: {total_emojis}
	• Количество упоминаний: {len(mentions)}
	• Выявлено спам-комментариев: {spam_count}
	Топ комментаторы:
	{chr(10).join(f'• {user}: {count} комментария' for user, count in top_commenters if count > 1)}
	"""

	return (
	analytics,
	"\n".join(item['username'] for item in comments_data),
	"\n".join(item['comment'] for item in comments_data),
	"\n".join(str(item['likes']) for item in comments_data),
	str(total_likes)
	)

	except Exception as e:
	logger.error(f"Analysis error: {e}")
	return str(e), "", "", "", "0"

	# Создаем интерфейс Gradio
	iface = gr.Interface(
	fn=analyze_post,
	inputs=[
	gr.Radio(
	choices=["Photo", "Video"],
	label="Content Type",
	value="Photo"
	),
	gr.Textbox(
	label="Link to Post",
	placeholder="Вставьте ссылку на пост"
	),
	gr.Number(
	label="Likes",
	value=0,
	minimum=0
	),
	gr.Textbox(
	label="Post Date",
	placeholder="YYYY-MM-DD"
	),
	gr.Textbox(
	label="Description",
	lines=3,
	placeholder="Описание поста"
	),
	gr.Number(
	label="Comment Count",
	value=0,
	minimum=0
	),
	gr.Textbox(
	label="Comments",
	lines=10,
	placeholder="Вставьте комментарии"
	)
	],
	outputs=[
	gr.Textbox(label="Analytics Summary", lines=15),
	gr.Textbox(label="Usernames"),
	gr.Textbox(label="Comments"),
	gr.Textbox(label="Likes Chronology"),
	gr.Textbox(label="Total Likes on Comments")
	],
	title="Enhanced Instagram Comment Analyzer",
	description="Анализатор комментариев Instagram с расширенной аналитикой",
	theme="default"
	)

	if __name__ == "__main__":
	try:
	iface.launch(
	share=True, # Создает публичную ссылку
	debug=True, # Включает режим отладки
	show_error=True # Показывает подробности ошибок
	)
	except Exception as e:
	logger.error(f"Error launching interface: {e}", exc_info=True)
	import re
	import emoji
	import gradio as gr
	from collections import defaultdict, Counter

	def extract_comment_data(comment_text: str) -> dict:
	"""Extracts data from a comment string."""
	comment_data = {}

	# Username extraction (improved robustness)
	match = re.search(r"Фото профиля\s*(.+?)\n", comment_text)
	comment_data["username"] = match.group(1).strip() if match else None
	if not comment_data["username"]:
	return None # Skip if no username found

	# Comment text extraction (handling multiple lines & various time formats)
	lines = comment_text.splitlines()
	comment_text = ""
	for i, line in enumerate(lines):
	if re.search(r"\d+\s*(?:нед\.\|ч\.)", line): #Matches days or hours
	comment_text = "\n".join(lines[i+1:]).strip()
	break
	comment_text += line + "\n"
	comment_text = comment_text.strip()
	comment_data["comment"] = comment_text

	# Likes extraction (more flexible regex)
	match = re.search(r'"Нравится":\s*(\d+)', comment_text)
	comment_data["likes"] = int(match.group(1)) if match else 0

	# Time extraction (more robust to variations)
	time_match = re.search(r"(\d+)\s*(?:нед\.\|ч\.)", comment_text)
	comment_data["time"] = int(time_match.group(1)) if time_match else None


	return comment_data

	def analyze_comments(comments_text: str) -> dict:
	"""Analyzes a block of comments text."""
	comments = []
	blocks = re.split(r'(Фото профиля)', comments_text, flags=re.IGNORECASE)
	for i in range(1,len(blocks),2):
	comment_data = extract_comment_data(blocks[i])
	if comment_data:
	comments.append(comment_data)

	# Aggregate data
	analytics = defaultdict(int)
	unique_users = set()
	top_commenters = Counter()

	for comment in comments:
	analytics["total_comments"] += 1
	unique_users.add(comment["username"])
	analytics["total_likes"] += comment["likes"]
	top_commenters[comment["username"]] += 1
	analytics["emojis"] += len(emoji.demojize(comment["comment"])) # Counts emojis

	analytics["unique_users"] = len(unique_users)
	analytics["avg_likes"] = analytics["total_likes"] / analytics["total_comments"] if analytics["total_comments"] > 0 else 0
	analytics["top_commenters"] = dict(top_commenters.most_common(5))


	return analytics, comments


	iface = gr.Interface(
	fn=analyze_comments,
	inputs=gr.Textbox(label="Instagram Comments (Paste here)", lines=10),
	outputs=[
	gr.Textbox(label="Analytics Summary"),
	gr.JSON(label="Individual Comment Data")
	],
	title="Enhanced Instagram Comment Analyzer",
	description="Improved analyzer for Instagram comments.",
	)

	iface.launch(share=True)