Spaces:
Sleeping
Sleeping
import gradio as gr | |
import argilla as rg | |
import pandas as pd | |
import os | |
import time | |
from collections import defaultdict | |
from fastapi import FastAPI | |
from functools import lru_cache | |
client = rg.Argilla( | |
api_url=os.getenv("ARGILLA_API_URL", ""), | |
api_key=os.getenv("ARGILLA_API_KEY", "") | |
) | |
countries = { | |
"Argentina": { | |
"iso": "ARG", | |
"emoji": "🇦🇷" | |
}, | |
"Bolivia": { | |
"iso": "BOL", | |
"emoji": "🇧🇴" | |
}, | |
"Chile": { | |
"iso": "CHL", | |
"emoji": "🇨🇱" | |
}, | |
"Colombia": { | |
"iso": "COL", | |
"emoji": "🇨🇴" | |
}, | |
"Costa Rica": { | |
"iso": "CRI", | |
"emoji": "🇨🇷" | |
}, | |
"Cuba": { | |
"iso": "CUB", | |
"emoji": "🇨🇺" | |
}, | |
"Ecuador": { | |
"iso": "ECU", | |
"emoji": "🇪🇨" | |
}, | |
"El Salvador": { | |
"iso": "SLV", | |
"emoji": "🇸🇻" | |
}, | |
"España": { | |
"iso": "ESP", | |
"emoji": "🇪🇸" | |
}, | |
"Guatemala": { | |
"iso": "GTM", | |
"emoji": "🇬🇹" | |
}, | |
"Honduras": { | |
"iso": "HND", | |
"emoji": "🇭🇳" | |
}, | |
"México": { | |
"iso": "MEX", | |
"emoji": "🇲🇽" | |
}, | |
"Nicaragua": { | |
"iso": "NIC", | |
"emoji": "🇳🇮" | |
}, | |
"Panamá": { | |
"iso": "PAN", | |
"emoji": "🇵🇦" | |
}, | |
"Paraguay": { | |
"iso": "PRY", | |
"emoji": "🇵🇾" | |
}, | |
"Perú": { | |
"iso": "PER", | |
"emoji": "🇵🇪" | |
}, | |
"Puerto Rico": { | |
"iso": "PRI", | |
"emoji": "🇵🇷" | |
}, | |
"República Dominicana": { | |
"iso": "DOM", | |
"emoji": "🇩🇴" | |
}, | |
"Uruguay": { | |
"iso": "URY", | |
"emoji": "🇺🇾" | |
}, | |
"Venezuela": { | |
"iso": "VEN", | |
"emoji": "🇻🇪" | |
} | |
} | |
def get_blend_es_data(): | |
user_contributions = defaultdict(lambda: {"username": "", "contributions": 0, "country_contributions": {}}) | |
user_id_to_username = {} | |
for country in countries.keys(): | |
iso = countries[country]["iso"] | |
emoji = countries[country]["emoji"] | |
dataset_name = f"{emoji} {country} - {iso} - Responder" | |
try: | |
print(f"Processing dataset: {dataset_name}") | |
dataset = client.datasets(dataset_name) | |
records = list(dataset.records(with_responses=True)) | |
dataset_contributions = defaultdict(int) | |
for record in records: | |
record_dict = record.to_dict() | |
if "answer_1" in record_dict["responses"]: | |
for answer in record_dict["responses"]["answer_1"]: | |
if answer["user_id"]: | |
user_id = answer["user_id"] | |
dataset_contributions[user_id] += 1 | |
if user_id not in user_id_to_username: | |
try: | |
user = client.users(id=user_id) | |
user_id_to_username[user_id] = user.username | |
except Exception as e: | |
print(f"Error getting username for {user_id}: {e}") | |
user_id_to_username[user_id] = f"User-{user_id[:8]}" | |
for user_id, count in dataset_contributions.items(): | |
username = user_id_to_username.get(user_id, f"User-{user_id[:8]}") | |
user_contributions[user_id]["username"] = username | |
user_contributions[user_id]["contributions"] += count | |
user_contributions[user_id]["country_contributions"][country] = count | |
except Exception as e: | |
print(f"Error processing dataset {dataset_name}: {e}") | |
return user_contributions, user_id_to_username | |
def get_include_data(username_mapping): | |
result = defaultdict(int) | |
try: | |
if os.path.exists("include.csv"): | |
include_df = pd.read_csv("include.csv") | |
if "Nombre en Discord / username" in include_df.columns and "Número de preguntas / number of questions" in include_df.columns: | |
discord_users = defaultdict(int) | |
for _, row in include_df.iterrows(): | |
username = row["Nombre en Discord / username"] | |
questions = row["Número de preguntas / number of questions"] | |
if pd.notna(username) and pd.notna(questions): | |
discord_users[username.lower()] += int(questions) | |
reverse_mapping = {} | |
for user_id, username in username_mapping.items(): | |
reverse_mapping[username.lower()] = user_id | |
for discord_name, questions in discord_users.items(): | |
matched = False | |
for argilla_name in reverse_mapping: | |
if discord_name in argilla_name or argilla_name in discord_name: | |
user_id = reverse_mapping[argilla_name] | |
result[user_id] += questions | |
matched = True | |
break | |
if not matched: | |
result[f"discord_{discord_name}"] = questions | |
except Exception as e: | |
print(f"Error loading include.csv: {e}") | |
return result | |
def get_estereotipos_data(username_mapping): | |
result = defaultdict(int) | |
try: | |
mail_to_discord = {} | |
if os.path.exists("mail_to_username.csv"): | |
mapping_df = pd.read_csv("mail_to_username.csv") | |
if "gmail" in mapping_df.columns and "discord" in mapping_df.columns: | |
for _, row in mapping_df.iterrows(): | |
mail = row["gmail"] | |
discord = row["discord"] | |
if pd.notna(mail) and pd.notna(discord): | |
mail_to_discord[mail.lower()] = discord.lower() | |
if os.path.exists("token_id_counts.csv"): | |
counts_df = pd.read_csv("token_id_counts.csv") | |
if "token_id" in counts_df.columns and "count" in counts_df.columns: | |
mail_counts = defaultdict(int) | |
for _, row in counts_df.iterrows(): | |
mail = row["token_id"] | |
count = row["count"] | |
if pd.notna(mail) and pd.notna(count): | |
mail_counts[mail.lower()] += int(count) | |
reverse_mapping = {} | |
for user_id, username in username_mapping.items(): | |
reverse_mapping[username.lower()] = user_id | |
for mail, count in mail_counts.items(): | |
discord_name = mail_to_discord.get(mail, "") | |
if discord_name: | |
matched = False | |
for argilla_name in reverse_mapping: | |
if discord_name in argilla_name or argilla_name in discord_name: | |
user_id = reverse_mapping[argilla_name] | |
result[user_id] += count | |
matched = True | |
break | |
if not matched: | |
result[f"estereotipos_{discord_name}"] = count | |
else: | |
# Use just the username portion of the email (before the @) | |
username_part = mail.split('@')[0] if '@' in mail else mail | |
result[f"estereotipos_{username_part}"] = count | |
except Exception as e: | |
print(f"Error loading estereotipos data: {e}") | |
return result | |
def get_arena_data(username_mapping): | |
result = defaultdict(int) | |
try: | |
mail_to_discord = {} | |
if os.path.exists("mail_to_username.csv"): | |
mapping_df = pd.read_csv("mail_to_username.csv") | |
if "gmail" in mapping_df.columns and "discord" in mapping_df.columns: | |
for _, row in mapping_df.iterrows(): | |
mail = row["gmail"] | |
discord = row["discord"] | |
if pd.notna(mail) and pd.notna(discord): | |
mail_to_discord[mail.lower()] = discord.lower() | |
if os.path.exists("arena.json"): | |
import json | |
with open("arena.json", "r", encoding="utf-8") as f: | |
arena_data = json.load(f) | |
mail_counts = defaultdict(int) | |
for country, conversations in arena_data.items(): | |
for conversation in conversations: | |
if "username" in conversation: | |
mail = conversation["username"] | |
if mail: | |
mail_counts[mail.lower()] += 1 | |
reverse_mapping = {} | |
for user_id, username in username_mapping.items(): | |
reverse_mapping[username.lower()] = user_id | |
for mail, count in mail_counts.items(): | |
discord_name = mail_to_discord.get(mail, "") | |
if discord_name: | |
matched = False | |
for argilla_name in reverse_mapping: | |
if discord_name in argilla_name or argilla_name in discord_name: | |
user_id = reverse_mapping[argilla_name] | |
result[user_id] += count | |
matched = True | |
break | |
if not matched: | |
result[f"arena_{discord_name}"] = count | |
else: | |
# Use just the username portion of the email (before the @) | |
username_part = mail.split('@')[0] if '@' in mail else mail | |
result[f"arena_{username_part}"] = count | |
except Exception as e: | |
print(f"Error loading arena data: {e}") | |
return result | |
def get_user_contributions_cached(cache_buster: int): | |
return consolidate_all_data() | |
def consolidate_all_data(): | |
user_data = {} | |
blend_es_data, username_mapping = get_blend_es_data() | |
include_data = get_include_data(username_mapping) | |
estereotipos_data = get_estereotipos_data(username_mapping) | |
arena_data = get_arena_data(username_mapping) | |
all_user_ids = set(blend_es_data.keys()) | set(include_data.keys()) | set(estereotipos_data.keys()) | set(arena_data.keys()) | |
rows = [] | |
for user_id in all_user_ids: | |
blend_es_value = 0 | |
username = user_id | |
if user_id in blend_es_data: | |
blend_es_value = blend_es_data[user_id]["contributions"] | |
username = blend_es_data[user_id]["username"] | |
include_value = include_data.get(user_id, 0) | |
estereotipos_value = estereotipos_data.get(user_id, 0) | |
arena_value = arena_data.get(user_id, 0) | |
if isinstance(user_id, str): | |
if user_id.startswith("discord_"): | |
username = user_id.replace("discord_", "") | |
blend_es_value = 0 | |
elif user_id.startswith("estereotipos_"): | |
username = user_id.replace("estereotipos_", "") | |
blend_es_value = 0 | |
elif user_id.startswith("arena_"): | |
username = user_id.replace("arena_", "") | |
blend_es_value = 0 | |
row = { | |
"Username": username, | |
"Total": blend_es_value + include_value + estereotipos_value + arena_value, | |
"Blend-es": blend_es_value, | |
"INCLUDE": include_value, | |
"Estereotipos": estereotipos_value, | |
"Arena": arena_value | |
} | |
rows.append(row) | |
df = pd.DataFrame(rows) | |
if not df.empty: | |
df = df.sort_values("Total", ascending=False) | |
return df | |
app = FastAPI() | |
last_update_time = 0 | |
cached_data = None | |
def create_leaderboard_ui(): | |
global cached_data, last_update_time | |
current_time = time.time() | |
if cached_data is not None and current_time - last_update_time < 300: | |
df = cached_data | |
else: | |
cache_buster = int(current_time) | |
df = get_user_contributions_cached(cache_buster) | |
cached_data = df | |
last_update_time = current_time | |
if not df.empty: | |
df = df.reset_index(drop=True) | |
df.index = df.index + 1 | |
df = df.rename_axis("Rank") | |
df = df.reset_index() | |
df_html = df.to_html(classes="leaderboard-table", border=0, index=False) | |
styled_html = f""" | |
<div style="margin: 20px 0;"> | |
<p>Última Actualización: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(last_update_time))}</p> | |
<style> | |
.leaderboard-table {{ | |
width: 100%; | |
border-collapse: collapse; | |
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; | |
box-shadow: 0 4px 8px rgba(0,0,0,0.1); | |
border-radius: 8px; | |
overflow: hidden; | |
}} | |
.leaderboard-table th {{ | |
background-color: #1a1a2e; | |
color: white; | |
font-weight: bold; | |
text-align: left; | |
padding: 14px; | |
border-bottom: 2px solid #16213e; | |
}} | |
.leaderboard-table td {{ | |
padding: 12px 14px; | |
border-bottom: 1px solid #333; | |
background-color: #222; | |
color: #fff; | |
}} | |
.leaderboard-table tr:hover td {{ | |
background-color: #2a2a3a; | |
}} | |
.leaderboard-table tr:nth-child(1) td:first-child {{ | |
background-color: #ffd700; | |
color: #333; | |
font-weight: bold; | |
text-align: center; | |
border-right: 1px solid #333; | |
}} | |
.leaderboard-table tr:nth-child(2) td:first-child {{ | |
background-color: #c0c0c0; | |
color: #333; | |
font-weight: bold; | |
text-align: center; | |
border-right: 1px solid #333; | |
}} | |
.leaderboard-table tr:nth-child(3) td:first-child {{ | |
background-color: #cd7f32; | |
color: #333; | |
font-weight: bold; | |
text-align: center; | |
border-right: 1px solid #333; | |
}} | |
.leaderboard-table tr:nth-child(1) td:nth-child(2) {{ | |
font-weight: bold; | |
color: #ffd700; | |
}} | |
.leaderboard-table tr:nth-child(2) td:nth-child(2) {{ | |
font-weight: bold; | |
color: #c0c0c0; | |
}} | |
.leaderboard-table tr:nth-child(3) td:nth-child(2) {{ | |
font-weight: bold; | |
color: #cd7f32; | |
}} | |
</style> | |
{df_html} | |
</div> | |
""" | |
return styled_html | |
def refresh_data(): | |
global cached_data, last_update_time | |
cached_data = None | |
last_update_time = 0 | |
return create_leaderboard_ui() | |
with gr.Blocks(theme=gr.themes.Default()) as demo: | |
with gr.Column(scale=1): | |
gr.Markdown("""# 🏆 Hackaton Leaderboard""") | |
leaderboard_html = gr.HTML(create_leaderboard_ui) | |
refresh_btn = gr.Button("🔄 Actualizar Datos", variant="primary") | |
refresh_btn.click(fn=refresh_data, outputs=leaderboard_html) | |
gr.mount_gradio_app(app, demo, path="/") | |
if __name__ == "__main__": | |
import uvicorn | |
uvicorn.run(app, host="0.0.0.0", port=7860) |