import gradio as gr import argilla as rg import pandas as pd import os import time from collections import defaultdict from fastapi import FastAPI from functools import lru_cache client = rg.Argilla( api_url=os.getenv("ARGILLA_API_URL", ""), api_key=os.getenv("ARGILLA_API_KEY", "") ) countries = { "Argentina": { "iso": "ARG", "emoji": "🇦🇷" }, "Bolivia": { "iso": "BOL", "emoji": "🇧🇴" }, "Chile": { "iso": "CHL", "emoji": "🇨🇱" }, "Colombia": { "iso": "COL", "emoji": "🇨🇴" }, "Costa Rica": { "iso": "CRI", "emoji": "🇨🇷" }, "Cuba": { "iso": "CUB", "emoji": "🇨🇺" }, "Ecuador": { "iso": "ECU", "emoji": "🇪🇨" }, "El Salvador": { "iso": "SLV", "emoji": "🇸🇻" }, "España": { "iso": "ESP", "emoji": "🇪🇸" }, "Guatemala": { "iso": "GTM", "emoji": "🇬🇹" }, "Honduras": { "iso": "HND", "emoji": "🇭🇳" }, "México": { "iso": "MEX", "emoji": "🇲🇽" }, "Nicaragua": { "iso": "NIC", "emoji": "🇳🇮" }, "Panamá": { "iso": "PAN", "emoji": "🇵🇦" }, "Paraguay": { "iso": "PRY", "emoji": "🇵🇾" }, "Perú": { "iso": "PER", "emoji": "🇵🇪" }, "Puerto Rico": { "iso": "PRI", "emoji": "🇵🇷" }, "República Dominicana": { "iso": "DOM", "emoji": "🇩🇴" }, "Uruguay": { "iso": "URY", "emoji": "🇺🇾" }, "Venezuela": { "iso": "VEN", "emoji": "🇻🇪" } } def get_blend_es_data(): user_contributions = defaultdict(lambda: {"username": "", "contributions": 0, "country_contributions": {}}) user_id_to_username = {} for country in countries.keys(): iso = countries[country]["iso"] emoji = countries[country]["emoji"] dataset_name = f"{emoji} {country} - {iso} - Responder" try: print(f"Processing dataset: {dataset_name}") dataset = client.datasets(dataset_name) records = list(dataset.records(with_responses=True)) dataset_contributions = defaultdict(int) for record in records: record_dict = record.to_dict() if "answer_1" in record_dict["responses"]: for answer in record_dict["responses"]["answer_1"]: if answer["user_id"]: user_id = answer["user_id"] dataset_contributions[user_id] += 1 if user_id not in user_id_to_username: try: user = client.users(id=user_id) user_id_to_username[user_id] = user.username except Exception as e: print(f"Error getting username for {user_id}: {e}") user_id_to_username[user_id] = f"User-{user_id[:8]}" for user_id, count in dataset_contributions.items(): username = user_id_to_username.get(user_id, f"User-{user_id[:8]}") user_contributions[user_id]["username"] = username user_contributions[user_id]["contributions"] += count user_contributions[user_id]["country_contributions"][country] = count except Exception as e: print(f"Error processing dataset {dataset_name}: {e}") return user_contributions, user_id_to_username def get_include_data(username_mapping): result = defaultdict(int) try: if os.path.exists("include.csv"): include_df = pd.read_csv("include.csv") if "Nombre en Discord / username" in include_df.columns and "Número de preguntas / number of questions" in include_df.columns: discord_users = defaultdict(int) for _, row in include_df.iterrows(): username = row["Nombre en Discord / username"] questions = row["Número de preguntas / number of questions"] if pd.notna(username) and pd.notna(questions): discord_users[username.lower()] += int(questions) reverse_mapping = {} for user_id, username in username_mapping.items(): reverse_mapping[username.lower()] = user_id for discord_name, questions in discord_users.items(): matched = False for argilla_name in reverse_mapping: if discord_name in argilla_name or argilla_name in discord_name: user_id = reverse_mapping[argilla_name] result[user_id] += questions matched = True break if not matched: result[f"discord_{discord_name}"] = questions except Exception as e: print(f"Error loading include.csv: {e}") return result def get_estereotipos_data(username_mapping): result = defaultdict(int) try: mail_to_discord = {} if os.path.exists("mail_to_username.csv"): mapping_df = pd.read_csv("mail_to_username.csv") if "gmail" in mapping_df.columns and "discord" in mapping_df.columns: for _, row in mapping_df.iterrows(): mail = row["gmail"] discord = row["discord"] if pd.notna(mail) and pd.notna(discord): mail_to_discord[mail.lower()] = discord.lower() if os.path.exists("token_id_counts.csv"): counts_df = pd.read_csv("token_id_counts.csv") if "token_id" in counts_df.columns and "count" in counts_df.columns: mail_counts = defaultdict(int) for _, row in counts_df.iterrows(): mail = row["token_id"] count = row["count"] if pd.notna(mail) and pd.notna(count): mail_counts[mail.lower()] += int(count) reverse_mapping = {} for user_id, username in username_mapping.items(): reverse_mapping[username.lower()] = user_id for mail, count in mail_counts.items(): discord_name = mail_to_discord.get(mail, "") if discord_name: matched = False for argilla_name in reverse_mapping: if discord_name in argilla_name or argilla_name in discord_name: user_id = reverse_mapping[argilla_name] result[user_id] += count matched = True break if not matched: result[f"estereotipos_{discord_name}"] = count else: # Use just the username portion of the email (before the @) username_part = mail.split('@')[0] if '@' in mail else mail result[f"estereotipos_{username_part}"] = count except Exception as e: print(f"Error loading estereotipos data: {e}") return result def get_arena_data(username_mapping): result = defaultdict(int) try: mail_to_discord = {} if os.path.exists("mail_to_username.csv"): mapping_df = pd.read_csv("mail_to_username.csv") if "gmail" in mapping_df.columns and "discord" in mapping_df.columns: for _, row in mapping_df.iterrows(): mail = row["gmail"] discord = row["discord"] if pd.notna(mail) and pd.notna(discord): mail_to_discord[mail.lower()] = discord.lower() if os.path.exists("arena.json"): import json with open("arena.json", "r", encoding="utf-8") as f: arena_data = json.load(f) mail_counts = defaultdict(int) for country, conversations in arena_data.items(): for conversation in conversations: if "username" in conversation: mail = conversation["username"] if mail: mail_counts[mail.lower()] += 1 reverse_mapping = {} for user_id, username in username_mapping.items(): reverse_mapping[username.lower()] = user_id for mail, count in mail_counts.items(): discord_name = mail_to_discord.get(mail, "") if discord_name: matched = False for argilla_name in reverse_mapping: if discord_name in argilla_name or argilla_name in discord_name: user_id = reverse_mapping[argilla_name] result[user_id] += count matched = True break if not matched: result[f"arena_{discord_name}"] = count else: # Use just the username portion of the email (before the @) username_part = mail.split('@')[0] if '@' in mail else mail result[f"arena_{username_part}"] = count except Exception as e: print(f"Error loading arena data: {e}") return result @lru_cache(maxsize=32) def get_user_contributions_cached(cache_buster: int): return consolidate_all_data() def consolidate_all_data(): user_data = {} blend_es_data, username_mapping = get_blend_es_data() include_data = get_include_data(username_mapping) estereotipos_data = get_estereotipos_data(username_mapping) arena_data = get_arena_data(username_mapping) all_user_ids = set(blend_es_data.keys()) | set(include_data.keys()) | set(estereotipos_data.keys()) | set(arena_data.keys()) rows = [] for user_id in all_user_ids: blend_es_value = 0 username = user_id if user_id in blend_es_data: blend_es_value = blend_es_data[user_id]["contributions"] username = blend_es_data[user_id]["username"] include_value = include_data.get(user_id, 0) estereotipos_value = estereotipos_data.get(user_id, 0) arena_value = arena_data.get(user_id, 0) if isinstance(user_id, str): if user_id.startswith("discord_"): username = user_id.replace("discord_", "") blend_es_value = 0 elif user_id.startswith("estereotipos_"): username = user_id.replace("estereotipos_", "") blend_es_value = 0 elif user_id.startswith("arena_"): username = user_id.replace("arena_", "") blend_es_value = 0 row = { "Username": username, "Total": blend_es_value + include_value + estereotipos_value + arena_value, "Blend-es": blend_es_value, "INCLUDE": include_value, "Estereotipos": estereotipos_value, "Arena": arena_value } rows.append(row) df = pd.DataFrame(rows) if not df.empty: df = df.sort_values("Total", ascending=False) return df app = FastAPI() last_update_time = 0 cached_data = None def create_leaderboard_ui(): global cached_data, last_update_time current_time = time.time() if cached_data is not None and current_time - last_update_time < 300: df = cached_data else: cache_buster = int(current_time) df = get_user_contributions_cached(cache_buster) cached_data = df last_update_time = current_time if not df.empty: df = df.reset_index(drop=True) df.index = df.index + 1 df = df.rename_axis("Rank") df = df.reset_index() df_html = df.to_html(classes="leaderboard-table", border=0, index=False) styled_html = f"""
Última Actualización: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(last_update_time))}
{df_html}