ouhenio's picture
Update app.py
2b70f36 verified
raw
history blame
15.6 kB
import gradio as gr
import argilla as rg
import pandas as pd
import os
import time
from collections import defaultdict
from fastapi import FastAPI
from functools import lru_cache
client = rg.Argilla(
api_url=os.getenv("ARGILLA_API_URL", ""),
api_key=os.getenv("ARGILLA_API_KEY", "")
)
countries = {
"Argentina": {
"iso": "ARG",
"emoji": "🇦🇷"
},
"Bolivia": {
"iso": "BOL",
"emoji": "🇧🇴"
},
"Chile": {
"iso": "CHL",
"emoji": "🇨🇱"
},
"Colombia": {
"iso": "COL",
"emoji": "🇨🇴"
},
"Costa Rica": {
"iso": "CRI",
"emoji": "🇨🇷"
},
"Cuba": {
"iso": "CUB",
"emoji": "🇨🇺"
},
"Ecuador": {
"iso": "ECU",
"emoji": "🇪🇨"
},
"El Salvador": {
"iso": "SLV",
"emoji": "🇸🇻"
},
"España": {
"iso": "ESP",
"emoji": "🇪🇸"
},
"Guatemala": {
"iso": "GTM",
"emoji": "🇬🇹"
},
"Honduras": {
"iso": "HND",
"emoji": "🇭🇳"
},
"México": {
"iso": "MEX",
"emoji": "🇲🇽"
},
"Nicaragua": {
"iso": "NIC",
"emoji": "🇳🇮"
},
"Panamá": {
"iso": "PAN",
"emoji": "🇵🇦"
},
"Paraguay": {
"iso": "PRY",
"emoji": "🇵🇾"
},
"Perú": {
"iso": "PER",
"emoji": "🇵🇪"
},
"Puerto Rico": {
"iso": "PRI",
"emoji": "🇵🇷"
},
"República Dominicana": {
"iso": "DOM",
"emoji": "🇩🇴"
},
"Uruguay": {
"iso": "URY",
"emoji": "🇺🇾"
},
"Venezuela": {
"iso": "VEN",
"emoji": "🇻🇪"
}
}
def get_blend_es_data():
user_contributions = defaultdict(lambda: {"username": "", "contributions": 0, "country_contributions": {}})
user_id_to_username = {}
for country in countries.keys():
iso = countries[country]["iso"]
emoji = countries[country]["emoji"]
dataset_name = f"{emoji} {country} - {iso} - Responder"
try:
print(f"Processing dataset: {dataset_name}")
dataset = client.datasets(dataset_name)
records = list(dataset.records(with_responses=True))
dataset_contributions = defaultdict(int)
for record in records:
record_dict = record.to_dict()
if "answer_1" in record_dict["responses"]:
for answer in record_dict["responses"]["answer_1"]:
if answer["user_id"]:
user_id = answer["user_id"]
dataset_contributions[user_id] += 1
if user_id not in user_id_to_username:
try:
user = client.users(id=user_id)
user_id_to_username[user_id] = user.username
except Exception as e:
print(f"Error getting username for {user_id}: {e}")
user_id_to_username[user_id] = f"User-{user_id[:8]}"
for user_id, count in dataset_contributions.items():
username = user_id_to_username.get(user_id, f"User-{user_id[:8]}")
user_contributions[user_id]["username"] = username
user_contributions[user_id]["contributions"] += count
user_contributions[user_id]["country_contributions"][country] = count
except Exception as e:
print(f"Error processing dataset {dataset_name}: {e}")
return user_contributions, user_id_to_username
def get_include_data(username_mapping):
result = defaultdict(int)
try:
if os.path.exists("include.csv"):
include_df = pd.read_csv("include.csv")
if "Nombre en Discord / username" in include_df.columns and "Número de preguntas / number of questions" in include_df.columns:
discord_users = defaultdict(int)
for _, row in include_df.iterrows():
username = row["Nombre en Discord / username"]
questions = row["Número de preguntas / number of questions"]
if pd.notna(username) and pd.notna(questions):
discord_users[username.lower()] += int(questions)
reverse_mapping = {}
for user_id, username in username_mapping.items():
reverse_mapping[username.lower()] = user_id
for discord_name, questions in discord_users.items():
matched = False
for argilla_name in reverse_mapping:
if discord_name in argilla_name or argilla_name in discord_name:
user_id = reverse_mapping[argilla_name]
result[user_id] += questions
matched = True
break
if not matched:
result[f"discord_{discord_name}"] = questions
except Exception as e:
print(f"Error loading include.csv: {e}")
return result
def get_estereotipos_data(username_mapping):
result = defaultdict(int)
try:
mail_to_discord = {}
if os.path.exists("mail_to_username.csv"):
mapping_df = pd.read_csv("mail_to_username.csv")
if "gmail" in mapping_df.columns and "discord" in mapping_df.columns:
for _, row in mapping_df.iterrows():
mail = row["gmail"]
discord = row["discord"]
if pd.notna(mail) and pd.notna(discord):
mail_to_discord[mail.lower()] = discord.lower()
if os.path.exists("token_id_counts.csv"):
counts_df = pd.read_csv("token_id_counts.csv")
if "token_id" in counts_df.columns and "count" in counts_df.columns:
mail_counts = defaultdict(int)
for _, row in counts_df.iterrows():
mail = row["token_id"]
count = row["count"]
if pd.notna(mail) and pd.notna(count):
mail_counts[mail.lower()] += int(count)
reverse_mapping = {}
for user_id, username in username_mapping.items():
reverse_mapping[username.lower()] = user_id
for mail, count in mail_counts.items():
discord_name = mail_to_discord.get(mail, "")
if discord_name:
matched = False
for argilla_name in reverse_mapping:
if discord_name in argilla_name or argilla_name in discord_name:
user_id = reverse_mapping[argilla_name]
result[user_id] += count
matched = True
break
if not matched:
result[f"estereotipos_{discord_name}"] = count
else:
# Use just the username portion of the email (before the @)
username_part = mail.split('@')[0] if '@' in mail else mail
result[f"estereotipos_{username_part}"] = count
except Exception as e:
print(f"Error loading estereotipos data: {e}")
return result
def get_arena_data(username_mapping):
result = defaultdict(int)
try:
mail_to_discord = {}
if os.path.exists("mail_to_username.csv"):
mapping_df = pd.read_csv("mail_to_username.csv")
if "gmail" in mapping_df.columns and "discord" in mapping_df.columns:
for _, row in mapping_df.iterrows():
mail = row["gmail"]
discord = row["discord"]
if pd.notna(mail) and pd.notna(discord):
mail_to_discord[mail.lower()] = discord.lower()
if os.path.exists("arena.json"):
import json
with open("arena.json", "r", encoding="utf-8") as f:
arena_data = json.load(f)
mail_counts = defaultdict(int)
for country, conversations in arena_data.items():
for conversation in conversations:
if "username" in conversation:
mail = conversation["username"]
if mail:
mail_counts[mail.lower()] += 1
reverse_mapping = {}
for user_id, username in username_mapping.items():
reverse_mapping[username.lower()] = user_id
for mail, count in mail_counts.items():
discord_name = mail_to_discord.get(mail, "")
if discord_name:
matched = False
for argilla_name in reverse_mapping:
if discord_name in argilla_name or argilla_name in discord_name:
user_id = reverse_mapping[argilla_name]
result[user_id] += count
matched = True
break
if not matched:
result[f"arena_{discord_name}"] = count
else:
# Use just the username portion of the email (before the @)
username_part = mail.split('@')[0] if '@' in mail else mail
result[f"arena_{username_part}"] = count
except Exception as e:
print(f"Error loading arena data: {e}")
return result
@lru_cache(maxsize=32)
def get_user_contributions_cached(cache_buster: int):
return consolidate_all_data()
def consolidate_all_data():
user_data = {}
blend_es_data, username_mapping = get_blend_es_data()
include_data = get_include_data(username_mapping)
estereotipos_data = get_estereotipos_data(username_mapping)
arena_data = get_arena_data(username_mapping)
all_user_ids = set(blend_es_data.keys()) | set(include_data.keys()) | set(estereotipos_data.keys()) | set(arena_data.keys())
rows = []
for user_id in all_user_ids:
blend_es_value = 0
username = user_id
if user_id in blend_es_data:
blend_es_value = blend_es_data[user_id]["contributions"]
username = blend_es_data[user_id]["username"]
include_value = include_data.get(user_id, 0)
estereotipos_value = estereotipos_data.get(user_id, 0)
arena_value = arena_data.get(user_id, 0)
if isinstance(user_id, str):
if user_id.startswith("discord_"):
username = user_id.replace("discord_", "")
blend_es_value = 0
elif user_id.startswith("estereotipos_"):
username = user_id.replace("estereotipos_", "")
blend_es_value = 0
elif user_id.startswith("arena_"):
username = user_id.replace("arena_", "")
blend_es_value = 0
row = {
"Username": username,
"Total": blend_es_value + include_value + estereotipos_value + arena_value,
"Blend-es": blend_es_value,
"INCLUDE": include_value,
"Estereotipos": estereotipos_value,
"Arena": arena_value
}
rows.append(row)
df = pd.DataFrame(rows)
if not df.empty:
df = df.sort_values("Total", ascending=False)
return df
app = FastAPI()
last_update_time = 0
cached_data = None
def create_leaderboard_ui():
global cached_data, last_update_time
current_time = time.time()
if cached_data is not None and current_time - last_update_time < 300:
df = cached_data
else:
cache_buster = int(current_time)
df = get_user_contributions_cached(cache_buster)
cached_data = df
last_update_time = current_time
if not df.empty:
df = df.reset_index(drop=True)
df.index = df.index + 1
df = df.rename_axis("Rank")
df = df.reset_index()
df_html = df.to_html(classes="leaderboard-table", border=0, index=False)
styled_html = f"""
<div style="margin: 20px 0;">
<p>Última Actualización: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(last_update_time))}</p>
<style>
.leaderboard-table {{
width: 100%;
border-collapse: collapse;
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
box-shadow: 0 4px 8px rgba(0,0,0,0.1);
border-radius: 8px;
overflow: hidden;
}}
.leaderboard-table th {{
background-color: #1a1a2e;
color: white;
font-weight: bold;
text-align: left;
padding: 14px;
border-bottom: 2px solid #16213e;
}}
.leaderboard-table td {{
padding: 12px 14px;
border-bottom: 1px solid #333;
background-color: #222;
color: #fff;
}}
.leaderboard-table tr:hover td {{
background-color: #2a2a3a;
}}
.leaderboard-table tr:nth-child(1) td:first-child {{
background-color: #ffd700;
color: #333;
font-weight: bold;
text-align: center;
border-right: 1px solid #333;
}}
.leaderboard-table tr:nth-child(2) td:first-child {{
background-color: #c0c0c0;
color: #333;
font-weight: bold;
text-align: center;
border-right: 1px solid #333;
}}
.leaderboard-table tr:nth-child(3) td:first-child {{
background-color: #cd7f32;
color: #333;
font-weight: bold;
text-align: center;
border-right: 1px solid #333;
}}
.leaderboard-table tr:nth-child(1) td:nth-child(2) {{
font-weight: bold;
color: #ffd700;
}}
.leaderboard-table tr:nth-child(2) td:nth-child(2) {{
font-weight: bold;
color: #c0c0c0;
}}
.leaderboard-table tr:nth-child(3) td:nth-child(2) {{
font-weight: bold;
color: #cd7f32;
}}
</style>
{df_html}
</div>
"""
return styled_html
def refresh_data():
global cached_data, last_update_time
cached_data = None
last_update_time = 0
return create_leaderboard_ui()
with gr.Blocks(theme=gr.themes.Default()) as demo:
with gr.Column(scale=1):
gr.Markdown("""# 🏆 Hackaton Leaderboard""")
leaderboard_html = gr.HTML(create_leaderboard_ui)
refresh_btn = gr.Button("🔄 Actualizar Datos", variant="primary")
refresh_btn.click(fn=refresh_data, outputs=leaderboard_html)
gr.mount_gradio_app(app, demo, path="/")
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)