leaderboard-hackaton-2025

Sleeping

App Files Files Community

leaderboard-hackaton-2025 / app.py

ouhenio

Update app.py

2b70f36 verified 2 months ago

raw

history blame

15.6 kB

	import gradio as gr
	import argilla as rg
	import pandas as pd
	import os
	import time
	from collections import defaultdict
	from fastapi import FastAPI
	from functools import lru_cache

	client = rg.Argilla(
	api_url=os.getenv("ARGILLA_API_URL", ""),
	api_key=os.getenv("ARGILLA_API_KEY", "")
	)

	countries = {
	"Argentina": {
	"iso": "ARG",
	"emoji": "🇦🇷"
	},
	"Bolivia": {
	"iso": "BOL",
	"emoji": "🇧🇴"
	},
	"Chile": {
	"iso": "CHL",
	"emoji": "🇨🇱"
	},
	"Colombia": {
	"iso": "COL",
	"emoji": "🇨🇴"
	},
	"Costa Rica": {
	"iso": "CRI",
	"emoji": "🇨🇷"
	},
	"Cuba": {
	"iso": "CUB",
	"emoji": "🇨🇺"
	},
	"Ecuador": {
	"iso": "ECU",
	"emoji": "🇪🇨"
	},
	"El Salvador": {
	"iso": "SLV",
	"emoji": "🇸🇻"
	},
	"España": {
	"iso": "ESP",
	"emoji": "🇪🇸"
	},
	"Guatemala": {
	"iso": "GTM",
	"emoji": "🇬🇹"
	},
	"Honduras": {
	"iso": "HND",
	"emoji": "🇭🇳"
	},
	"México": {
	"iso": "MEX",
	"emoji": "🇲🇽"
	},
	"Nicaragua": {
	"iso": "NIC",
	"emoji": "🇳🇮"
	},
	"Panamá": {
	"iso": "PAN",
	"emoji": "🇵🇦"
	},
	"Paraguay": {
	"iso": "PRY",
	"emoji": "🇵🇾"
	},
	"Perú": {
	"iso": "PER",
	"emoji": "🇵🇪"
	},
	"Puerto Rico": {
	"iso": "PRI",
	"emoji": "🇵🇷"
	},
	"República Dominicana": {
	"iso": "DOM",
	"emoji": "🇩🇴"
	},
	"Uruguay": {
	"iso": "URY",
	"emoji": "🇺🇾"
	},
	"Venezuela": {
	"iso": "VEN",
	"emoji": "🇻🇪"
	}
	}

	def get_blend_es_data():
	user_contributions = defaultdict(lambda: {"username": "", "contributions": 0, "country_contributions": {}})
	user_id_to_username = {}

	for country in countries.keys():
	iso = countries[country]["iso"]
	emoji = countries[country]["emoji"]

	dataset_name = f"{emoji} {country} - {iso} - Responder"

	try:
	print(f"Processing dataset: {dataset_name}")
	dataset = client.datasets(dataset_name)
	records = list(dataset.records(with_responses=True))

	dataset_contributions = defaultdict(int)

	for record in records:
	record_dict = record.to_dict()
	if "answer_1" in record_dict["responses"]:
	for answer in record_dict["responses"]["answer_1"]:
	if answer["user_id"]:
	user_id = answer["user_id"]
	dataset_contributions[user_id] += 1

	if user_id not in user_id_to_username:
	try:
	user = client.users(id=user_id)
	user_id_to_username[user_id] = user.username
	except Exception as e:
	print(f"Error getting username for {user_id}: {e}")
	user_id_to_username[user_id] = f"User-{user_id[:8]}"

	for user_id, count in dataset_contributions.items():
	username = user_id_to_username.get(user_id, f"User-{user_id[:8]}")
	user_contributions[user_id]["username"] = username
	user_contributions[user_id]["contributions"] += count
	user_contributions[user_id]["country_contributions"][country] = count

	except Exception as e:
	print(f"Error processing dataset {dataset_name}: {e}")

	return user_contributions, user_id_to_username

	def get_include_data(username_mapping):
	result = defaultdict(int)
	try:
	if os.path.exists("include.csv"):
	include_df = pd.read_csv("include.csv")
	if "Nombre en Discord / username" in include_df.columns and "Número de preguntas / number of questions" in include_df.columns:
	discord_users = defaultdict(int)
	for _, row in include_df.iterrows():
	username = row["Nombre en Discord / username"]
	questions = row["Número de preguntas / number of questions"]
	if pd.notna(username) and pd.notna(questions):
	discord_users[username.lower()] += int(questions)

	reverse_mapping = {}
	for user_id, username in username_mapping.items():
	reverse_mapping[username.lower()] = user_id

	for discord_name, questions in discord_users.items():
	matched = False
	for argilla_name in reverse_mapping:
	if discord_name in argilla_name or argilla_name in discord_name:
	user_id = reverse_mapping[argilla_name]
	result[user_id] += questions
	matched = True
	break

	if not matched:
	result[f"discord_{discord_name}"] = questions
	except Exception as e:
	print(f"Error loading include.csv: {e}")

	return result

	def get_estereotipos_data(username_mapping):
	result = defaultdict(int)
	try:
	mail_to_discord = {}
	if os.path.exists("mail_to_username.csv"):
	mapping_df = pd.read_csv("mail_to_username.csv")
	if "gmail" in mapping_df.columns and "discord" in mapping_df.columns:
	for _, row in mapping_df.iterrows():
	mail = row["gmail"]
	discord = row["discord"]
	if pd.notna(mail) and pd.notna(discord):
	mail_to_discord[mail.lower()] = discord.lower()

	if os.path.exists("token_id_counts.csv"):
	counts_df = pd.read_csv("token_id_counts.csv")
	if "token_id" in counts_df.columns and "count" in counts_df.columns:
	mail_counts = defaultdict(int)
	for _, row in counts_df.iterrows():
	mail = row["token_id"]
	count = row["count"]
	if pd.notna(mail) and pd.notna(count):
	mail_counts[mail.lower()] += int(count)

	reverse_mapping = {}
	for user_id, username in username_mapping.items():
	reverse_mapping[username.lower()] = user_id

	for mail, count in mail_counts.items():
	discord_name = mail_to_discord.get(mail, "")
	if discord_name:
	matched = False
	for argilla_name in reverse_mapping:
	if discord_name in argilla_name or argilla_name in discord_name:
	user_id = reverse_mapping[argilla_name]
	result[user_id] += count
	matched = True
	break

	if not matched:
	result[f"estereotipos_{discord_name}"] = count
	else:
	# Use just the username portion of the email (before the @)
	username_part = mail.split('@')[0] if '@' in mail else mail
	result[f"estereotipos_{username_part}"] = count
	except Exception as e:
	print(f"Error loading estereotipos data: {e}")

	return result

	def get_arena_data(username_mapping):
	result = defaultdict(int)
	try:
	mail_to_discord = {}
	if os.path.exists("mail_to_username.csv"):
	mapping_df = pd.read_csv("mail_to_username.csv")
	if "gmail" in mapping_df.columns and "discord" in mapping_df.columns:
	for _, row in mapping_df.iterrows():
	mail = row["gmail"]
	discord = row["discord"]
	if pd.notna(mail) and pd.notna(discord):
	mail_to_discord[mail.lower()] = discord.lower()

	if os.path.exists("arena.json"):
	import json
	with open("arena.json", "r", encoding="utf-8") as f:
	arena_data = json.load(f)

	mail_counts = defaultdict(int)

	for country, conversations in arena_data.items():
	for conversation in conversations:
	if "username" in conversation:
	mail = conversation["username"]
	if mail:
	mail_counts[mail.lower()] += 1

	reverse_mapping = {}
	for user_id, username in username_mapping.items():
	reverse_mapping[username.lower()] = user_id

	for mail, count in mail_counts.items():
	discord_name = mail_to_discord.get(mail, "")
	if discord_name:
	matched = False
	for argilla_name in reverse_mapping:
	if discord_name in argilla_name or argilla_name in discord_name:
	user_id = reverse_mapping[argilla_name]
	result[user_id] += count
	matched = True
	break

	if not matched:
	result[f"arena_{discord_name}"] = count
	else:
	# Use just the username portion of the email (before the @)
	username_part = mail.split('@')[0] if '@' in mail else mail
	result[f"arena_{username_part}"] = count
	except Exception as e:
	print(f"Error loading arena data: {e}")

	return result

	@lru_cache(maxsize=32)
	def get_user_contributions_cached(cache_buster: int):
	return consolidate_all_data()

	def consolidate_all_data():
	user_data = {}

	blend_es_data, username_mapping = get_blend_es_data()
	include_data = get_include_data(username_mapping)
	estereotipos_data = get_estereotipos_data(username_mapping)
	arena_data = get_arena_data(username_mapping)

	all_user_ids = set(blend_es_data.keys()) \| set(include_data.keys()) \| set(estereotipos_data.keys()) \| set(arena_data.keys())

	rows = []
	for user_id in all_user_ids:
	blend_es_value = 0
	username = user_id

	if user_id in blend_es_data:
	blend_es_value = blend_es_data[user_id]["contributions"]
	username = blend_es_data[user_id]["username"]

	include_value = include_data.get(user_id, 0)
	estereotipos_value = estereotipos_data.get(user_id, 0)
	arena_value = arena_data.get(user_id, 0)

	if isinstance(user_id, str):
	if user_id.startswith("discord_"):
	username = user_id.replace("discord_", "")
	blend_es_value = 0
	elif user_id.startswith("estereotipos_"):
	username = user_id.replace("estereotipos_", "")
	blend_es_value = 0
	elif user_id.startswith("arena_"):
	username = user_id.replace("arena_", "")
	blend_es_value = 0

	row = {
	"Username": username,
	"Total": blend_es_value + include_value + estereotipos_value + arena_value,
	"Blend-es": blend_es_value,
	"INCLUDE": include_value,
	"Estereotipos": estereotipos_value,
	"Arena": arena_value
	}
	rows.append(row)

	df = pd.DataFrame(rows)

	if not df.empty:
	df = df.sort_values("Total", ascending=False)

	return df

	app = FastAPI()

	last_update_time = 0
	cached_data = None

	def create_leaderboard_ui():
	global cached_data, last_update_time
	current_time = time.time()

	if cached_data is not None and current_time - last_update_time < 300:
	df = cached_data
	else:
	cache_buster = int(current_time)
	df = get_user_contributions_cached(cache_buster)
	cached_data = df
	last_update_time = current_time

	if not df.empty:
	df = df.reset_index(drop=True)
	df.index = df.index + 1
	df = df.rename_axis("Rank")
	df = df.reset_index()

	df_html = df.to_html(classes="leaderboard-table", border=0, index=False)

	styled_html = f"""
	<div style="margin: 20px 0;">
	<p>Última Actualización: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(last_update_time))}</p>
	<style>
	.leaderboard-table {{
	width: 100%;
	border-collapse: collapse;
	font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
	box-shadow: 0 4px 8px rgba(0,0,0,0.1);
	border-radius: 8px;
	overflow: hidden;
	}}
	.leaderboard-table th {{
	background-color: #1a1a2e;
	color: white;
	font-weight: bold;
	text-align: left;
	padding: 14px;
	border-bottom: 2px solid #16213e;
	}}
	.leaderboard-table td {{
	padding: 12px 14px;
	border-bottom: 1px solid #333;
	background-color: #222;
	color: #fff;
	}}
	.leaderboard-table tr:hover td {{
	background-color: #2a2a3a;
	}}
	.leaderboard-table tr:nth-child(1) td:first-child {{
	background-color: #ffd700;
	color: #333;
	font-weight: bold;
	text-align: center;
	border-right: 1px solid #333;
	}}
	.leaderboard-table tr:nth-child(2) td:first-child {{
	background-color: #c0c0c0;
	color: #333;
	font-weight: bold;
	text-align: center;
	border-right: 1px solid #333;
	}}
	.leaderboard-table tr:nth-child(3) td:first-child {{
	background-color: #cd7f32;
	color: #333;
	font-weight: bold;
	text-align: center;
	border-right: 1px solid #333;
	}}
	.leaderboard-table tr:nth-child(1) td:nth-child(2) {{
	font-weight: bold;
	color: #ffd700;
	}}
	.leaderboard-table tr:nth-child(2) td:nth-child(2) {{
	font-weight: bold;
	color: #c0c0c0;
	}}
	.leaderboard-table tr:nth-child(3) td:nth-child(2) {{
	font-weight: bold;
	color: #cd7f32;
	}}
	</style>
	{df_html}
	</div>
	"""
	return styled_html

	def refresh_data():
	global cached_data, last_update_time
	cached_data = None
	last_update_time = 0
	return create_leaderboard_ui()

	with gr.Blocks(theme=gr.themes.Default()) as demo:
	with gr.Column(scale=1):
	gr.Markdown("""# 🏆 Hackaton Leaderboard""")

	leaderboard_html = gr.HTML(create_leaderboard_ui)

	refresh_btn = gr.Button("🔄 Actualizar Datos", variant="primary")
	refresh_btn.click(fn=refresh_data, outputs=leaderboard_html)

	gr.mount_gradio_app(app, demo, path="/")

	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=7860)