leaderboard-hackaton-2025

Running

App Files Files Community

ouhenio commited on 3 days ago

Commit

6067055

verified ·

1 Parent(s): 2b70f36

Update app.py

Browse files

Files changed (1) hide show

app.py +88 -120

app.py CHANGED Viewed

@@ -96,8 +96,7 @@ countries = {
 }
 def get_blend_es_data():
-    user_contributions = defaultdict(lambda: {"username": "", "contributions": 0, "country_contributions": {}})
-    user_id_to_username = {}
     for country in countries.keys():
         iso = countries[country]["iso"]
@@ -111,6 +110,7 @@ def get_blend_es_data():
             records = list(dataset.records(with_responses=True))
             dataset_contributions = defaultdict(int)
             for record in records:
                 record_dict = record.to_dict()
@@ -120,27 +120,29 @@ def get_blend_es_data():
                             user_id = answer["user_id"]
                             dataset_contributions[user_id] += 1
-                            if user_id not in user_id_to_username:
                                 try:
                                     user = client.users(id=user_id)
-                                    user_id_to_username[user_id] = user.username
                                 except Exception as e:
                                     print(f"Error getting username for {user_id}: {e}")
-                                    user_id_to_username[user_id] = f"User-{user_id[:8]}"
             for user_id, count in dataset_contributions.items():
-                username = user_id_to_username.get(user_id, f"User-{user_id[:8]}")
-                user_contributions[user_id]["username"] = username
-                user_contributions[user_id]["contributions"] += count
-                user_contributions[user_id]["country_contributions"][country] = count
         except Exception as e:
             print(f"Error processing dataset {dataset_name}: {e}")
-    return user_contributions, user_id_to_username
-def get_include_data(username_mapping):
-    result = defaultdict(int)
     try:
         if os.path.exists("include.csv"):
             include_df = pd.read_csv("include.csv")
@@ -152,30 +154,20 @@ def get_include_data(username_mapping):
                     if pd.notna(username) and pd.notna(questions):
                         discord_users[username.lower()] += int(questions)
-                reverse_mapping = {}
-                for user_id, username in username_mapping.items():
-                    reverse_mapping[username.lower()] = user_id
-                for discord_name, questions in discord_users.items():
-                    matched = False
-                    for argilla_name in reverse_mapping:
-                        if discord_name in argilla_name or argilla_name in discord_name:
-                            user_id = reverse_mapping[argilla_name]
-                            result[user_id] += questions
-                            matched = True
-                            break
-                    if not matched:
-                        result[f"discord_{discord_name}"] = questions
     except Exception as e:
         print(f"Error loading include.csv: {e}")
-    return result
-def get_estereotipos_data(username_mapping):
-    result = defaultdict(int)
     try:
-        mail_to_discord = {}
         if os.path.exists("mail_to_username.csv"):
             mapping_df = pd.read_csv("mail_to_username.csv")
             if "gmail" in mapping_df.columns and "discord" in mapping_df.columns:
@@ -184,7 +176,16 @@ def get_estereotipos_data(username_mapping):
                     discord = row["discord"]
                     if pd.notna(mail) and pd.notna(discord):
                         mail_to_discord[mail.lower()] = discord.lower()
         if os.path.exists("token_id_counts.csv"):
             counts_df = pd.read_csv("token_id_counts.csv")
             if "token_id" in counts_df.columns and "count" in counts_df.columns:
@@ -195,45 +196,26 @@ def get_estereotipos_data(username_mapping):
                     if pd.notna(mail) and pd.notna(count):
                         mail_counts[mail.lower()] += int(count)
-                reverse_mapping = {}
-                for user_id, username in username_mapping.items():
-                    reverse_mapping[username.lower()] = user_id
                 for mail, count in mail_counts.items():
-                    discord_name = mail_to_discord.get(mail, "")
-                    if discord_name:
-                        matched = False
-                        for argilla_name in reverse_mapping:
-                            if discord_name in argilla_name or argilla_name in discord_name:
-                                user_id = reverse_mapping[argilla_name]
-                                result[user_id] += count
-                                matched = True
-                                break
-                        if not matched:
-                            result[f"estereotipos_{discord_name}"] = count
-                    else:
-                        # Use just the username portion of the email (before the @)
-                        username_part = mail.split('@')[0] if '@' in mail else mail
-                        result[f"estereotipos_{username_part}"] = count
     except Exception as e:
         print(f"Error loading estereotipos data: {e}")
-    return result
-def get_arena_data(username_mapping):
-    result = defaultdict(int)
     try:
-        mail_to_discord = {}
-        if os.path.exists("mail_to_username.csv"):
-            mapping_df = pd.read_csv("mail_to_username.csv")
-            if "gmail" in mapping_df.columns and "discord" in mapping_df.columns:
-                for _, row in mapping_df.iterrows():
-                    mail = row["gmail"]
-                    discord = row["discord"]
-                    if pd.notna(mail) and pd.notna(discord):
-                        mail_to_discord[mail.lower()] = discord.lower()
         if os.path.exists("arena.json"):
             import json
             with open("arena.json", "r", encoding="utf-8") as f:
@@ -248,77 +230,63 @@ def get_arena_data(username_mapping):
                         if mail:
                             mail_counts[mail.lower()] += 1
-            reverse_mapping = {}
-            for user_id, username in username_mapping.items():
-                reverse_mapping[username.lower()] = user_id
             for mail, count in mail_counts.items():
-                discord_name = mail_to_discord.get(mail, "")
-                if discord_name:
-                    matched = False
-                    for argilla_name in reverse_mapping:
-                        if discord_name in argilla_name or argilla_name in discord_name:
-                            user_id = reverse_mapping[argilla_name]
-                            result[user_id] += count
-                            matched = True
-                            break
-                    if not matched:
-                        result[f"arena_{discord_name}"] = count
-                else:
-                    # Use just the username portion of the email (before the @)
-                    username_part = mail.split('@')[0] if '@' in mail else mail
-                    result[f"arena_{username_part}"] = count
     except Exception as e:
         print(f"Error loading arena data: {e}")
-    return result
 @lru_cache(maxsize=32)
 def get_user_contributions_cached(cache_buster: int):
     return consolidate_all_data()
 def consolidate_all_data():
-    user_data = {}
-    blend_es_data, username_mapping = get_blend_es_data()
-    include_data = get_include_data(username_mapping)
-    estereotipos_data = get_estereotipos_data(username_mapping)
-    arena_data = get_arena_data(username_mapping)
-    all_user_ids = set(blend_es_data.keys()) | set(include_data.keys()) | set(estereotipos_data.keys()) | set(arena_data.keys())
-    rows = []
-    for user_id in all_user_ids:
-        blend_es_value = 0
-        username = user_id
-        if user_id in blend_es_data:
-            blend_es_value = blend_es_data[user_id]["contributions"]
-            username = blend_es_data[user_id]["username"]
-        include_value = include_data.get(user_id, 0)
-        estereotipos_value = estereotipos_data.get(user_id, 0)
-        arena_value = arena_data.get(user_id, 0)
-        if isinstance(user_id, str):
-            if user_id.startswith("discord_"):
-                username = user_id.replace("discord_", "")
-                blend_es_value = 0
-            elif user_id.startswith("estereotipos_"):
-                username = user_id.replace("estereotipos_", "")
-                blend_es_value = 0
-            elif user_id.startswith("arena_"):
-                username = user_id.replace("arena_", "")
-                blend_es_value = 0
         row = {
-            "Username": username,
-            "Total": blend_es_value + include_value + estereotipos_value + arena_value,
-            "Blend-es": blend_es_value,
-            "INCLUDE": include_value,
-            "Estereotipos": estereotipos_value,
-            "Arena": arena_value
         }
         rows.append(row)

 }
 def get_blend_es_data():
+    data = []
     for country in countries.keys():
         iso = countries[country]["iso"]
             records = list(dataset.records(with_responses=True))
             dataset_contributions = defaultdict(int)
+            user_mapping = {}
             for record in records:
                 record_dict = record.to_dict()
                             user_id = answer["user_id"]
                             dataset_contributions[user_id] += 1
+                            if user_id not in user_mapping:
                                 try:
                                     user = client.users(id=user_id)
+                                    user_mapping[user_id] = user.username
                                 except Exception as e:
                                     print(f"Error getting username for {user_id}: {e}")
+                                    user_mapping[user_id] = f"User-{user_id[:8]}"
             for user_id, count in dataset_contributions.items():
+                username = user_mapping.get(user_id, f"User-{user_id[:8]}")
+                data.append({
+                    "source": "blend-es",
+                    "username": username,
+                    "count": count
+                })
         except Exception as e:
             print(f"Error processing dataset {dataset_name}: {e}")
+    return data
+def get_include_data():
+    data = []
     try:
         if os.path.exists("include.csv"):
             include_df = pd.read_csv("include.csv")
                     if pd.notna(username) and pd.notna(questions):
                         discord_users[username.lower()] += int(questions)
+                for username, count in discord_users.items():
+                    data.append({
+                        "source": "include",
+                        "username": username,
+                        "count": count
+                    })
     except Exception as e:
         print(f"Error loading include.csv: {e}")
+    return data
+def get_mail_to_username_mapping():
+    mail_to_discord = {}
     try:
         if os.path.exists("mail_to_username.csv"):
             mapping_df = pd.read_csv("mail_to_username.csv")
             if "gmail" in mapping_df.columns and "discord" in mapping_df.columns:
                     discord = row["discord"]
                     if pd.notna(mail) and pd.notna(discord):
                         mail_to_discord[mail.lower()] = discord.lower()
+    except Exception as e:
+        print(f"Error loading mail_to_username.csv: {e}")
+    return mail_to_discord
+def get_estereotipos_data():
+    data = []
+    mail_to_discord = get_mail_to_username_mapping()
+    try:
         if os.path.exists("token_id_counts.csv"):
             counts_df = pd.read_csv("token_id_counts.csv")
             if "token_id" in counts_df.columns and "count" in counts_df.columns:
                     if pd.notna(mail) and pd.notna(count):
                         mail_counts[mail.lower()] += int(count)
                 for mail, count in mail_counts.items():
+                    username = mail_to_discord.get(mail.lower(), "")
+                    if not username:
+                        username = mail.split('@')[0] if '@' in mail else mail
+                    data.append({
+                        "source": "estereotipos",
+                        "username": username,
+                        "count": count
+                    })
     except Exception as e:
         print(f"Error loading estereotipos data: {e}")
+    return data
+def get_arena_data():
+    data = []
+    mail_to_discord = get_mail_to_username_mapping()
     try:
         if os.path.exists("arena.json"):
             import json
             with open("arena.json", "r", encoding="utf-8") as f:
                         if mail:
                             mail_counts[mail.lower()] += 1
             for mail, count in mail_counts.items():
+                username = mail_to_discord.get(mail.lower(), "")
+                if not username:
+                    username = mail.split('@')[0] if '@' in mail else mail
+                data.append({
+                    "source": "arena",
+                    "username": username,
+                    "count": count
+                })
     except Exception as e:
         print(f"Error loading arena data: {e}")
+    return data
 @lru_cache(maxsize=32)
 def get_user_contributions_cached(cache_buster: int):
     return consolidate_all_data()
 def consolidate_all_data():
+    all_data = []
+    all_data.extend(get_blend_es_data())
+    all_data.extend(get_include_data())
+    all_data.extend(get_estereotipos_data())
+    all_data.extend(get_arena_data())
+    user_contributions = defaultdict(lambda: {"username": "", "blend_es": 0, "include": 0, "estereotipos": 0, "arena": 0})
+    for item in all_data:
+        source = item["source"]
+        username = item["username"]
+        count = item["count"]
+        user_key = username.lower()
+        if not user_contributions[user_key]["username"]:
+            user_contributions[user_key]["username"] = username
+        if source == "blend-es":
+            user_contributions[user_key]["blend_es"] += count
+        elif source == "include":
+            user_contributions[user_key]["include"] += count
+        elif source == "estereotipos":
+            user_contributions[user_key]["estereotipos"] += count
+        elif source == "arena":
+            user_contributions[user_key]["arena"] += count
+    rows = []
+    for _, data in user_contributions.items():
+        total = data["blend_es"] + data["include"] + data["estereotipos"] + data["arena"]
         row = {
+            "Username": data["username"],
+            "Total": total,
+            "Blend-es": data["blend_es"],
+            "INCLUDE": data["include"],
+            "Estereotipos": data["estereotipos"],
+            "Arena": data["arena"]
         }
         rows.append(row)