ouhenio commited on
Commit
6067055
·
verified ·
1 Parent(s): 2b70f36

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -120
app.py CHANGED
@@ -96,8 +96,7 @@ countries = {
96
  }
97
 
98
  def get_blend_es_data():
99
- user_contributions = defaultdict(lambda: {"username": "", "contributions": 0, "country_contributions": {}})
100
- user_id_to_username = {}
101
 
102
  for country in countries.keys():
103
  iso = countries[country]["iso"]
@@ -111,6 +110,7 @@ def get_blend_es_data():
111
  records = list(dataset.records(with_responses=True))
112
 
113
  dataset_contributions = defaultdict(int)
 
114
 
115
  for record in records:
116
  record_dict = record.to_dict()
@@ -120,27 +120,29 @@ def get_blend_es_data():
120
  user_id = answer["user_id"]
121
  dataset_contributions[user_id] += 1
122
 
123
- if user_id not in user_id_to_username:
124
  try:
125
  user = client.users(id=user_id)
126
- user_id_to_username[user_id] = user.username
127
  except Exception as e:
128
  print(f"Error getting username for {user_id}: {e}")
129
- user_id_to_username[user_id] = f"User-{user_id[:8]}"
130
 
131
  for user_id, count in dataset_contributions.items():
132
- username = user_id_to_username.get(user_id, f"User-{user_id[:8]}")
133
- user_contributions[user_id]["username"] = username
134
- user_contributions[user_id]["contributions"] += count
135
- user_contributions[user_id]["country_contributions"][country] = count
 
 
136
 
137
  except Exception as e:
138
  print(f"Error processing dataset {dataset_name}: {e}")
139
 
140
- return user_contributions, user_id_to_username
141
 
142
- def get_include_data(username_mapping):
143
- result = defaultdict(int)
144
  try:
145
  if os.path.exists("include.csv"):
146
  include_df = pd.read_csv("include.csv")
@@ -152,30 +154,20 @@ def get_include_data(username_mapping):
152
  if pd.notna(username) and pd.notna(questions):
153
  discord_users[username.lower()] += int(questions)
154
 
155
- reverse_mapping = {}
156
- for user_id, username in username_mapping.items():
157
- reverse_mapping[username.lower()] = user_id
158
-
159
- for discord_name, questions in discord_users.items():
160
- matched = False
161
- for argilla_name in reverse_mapping:
162
- if discord_name in argilla_name or argilla_name in discord_name:
163
- user_id = reverse_mapping[argilla_name]
164
- result[user_id] += questions
165
- matched = True
166
- break
167
-
168
- if not matched:
169
- result[f"discord_{discord_name}"] = questions
170
  except Exception as e:
171
  print(f"Error loading include.csv: {e}")
172
 
173
- return result
174
 
175
- def get_estereotipos_data(username_mapping):
176
- result = defaultdict(int)
177
  try:
178
- mail_to_discord = {}
179
  if os.path.exists("mail_to_username.csv"):
180
  mapping_df = pd.read_csv("mail_to_username.csv")
181
  if "gmail" in mapping_df.columns and "discord" in mapping_df.columns:
@@ -184,7 +176,16 @@ def get_estereotipos_data(username_mapping):
184
  discord = row["discord"]
185
  if pd.notna(mail) and pd.notna(discord):
186
  mail_to_discord[mail.lower()] = discord.lower()
187
-
 
 
 
 
 
 
 
 
 
188
  if os.path.exists("token_id_counts.csv"):
189
  counts_df = pd.read_csv("token_id_counts.csv")
190
  if "token_id" in counts_df.columns and "count" in counts_df.columns:
@@ -195,45 +196,26 @@ def get_estereotipos_data(username_mapping):
195
  if pd.notna(mail) and pd.notna(count):
196
  mail_counts[mail.lower()] += int(count)
197
 
198
- reverse_mapping = {}
199
- for user_id, username in username_mapping.items():
200
- reverse_mapping[username.lower()] = user_id
201
-
202
  for mail, count in mail_counts.items():
203
- discord_name = mail_to_discord.get(mail, "")
204
- if discord_name:
205
- matched = False
206
- for argilla_name in reverse_mapping:
207
- if discord_name in argilla_name or argilla_name in discord_name:
208
- user_id = reverse_mapping[argilla_name]
209
- result[user_id] += count
210
- matched = True
211
- break
212
-
213
- if not matched:
214
- result[f"estereotipos_{discord_name}"] = count
215
- else:
216
- # Use just the username portion of the email (before the @)
217
- username_part = mail.split('@')[0] if '@' in mail else mail
218
- result[f"estereotipos_{username_part}"] = count
219
  except Exception as e:
220
  print(f"Error loading estereotipos data: {e}")
221
 
222
- return result
223
 
224
- def get_arena_data(username_mapping):
225
- result = defaultdict(int)
 
 
226
  try:
227
- mail_to_discord = {}
228
- if os.path.exists("mail_to_username.csv"):
229
- mapping_df = pd.read_csv("mail_to_username.csv")
230
- if "gmail" in mapping_df.columns and "discord" in mapping_df.columns:
231
- for _, row in mapping_df.iterrows():
232
- mail = row["gmail"]
233
- discord = row["discord"]
234
- if pd.notna(mail) and pd.notna(discord):
235
- mail_to_discord[mail.lower()] = discord.lower()
236
-
237
  if os.path.exists("arena.json"):
238
  import json
239
  with open("arena.json", "r", encoding="utf-8") as f:
@@ -248,77 +230,63 @@ def get_arena_data(username_mapping):
248
  if mail:
249
  mail_counts[mail.lower()] += 1
250
 
251
- reverse_mapping = {}
252
- for user_id, username in username_mapping.items():
253
- reverse_mapping[username.lower()] = user_id
254
-
255
  for mail, count in mail_counts.items():
256
- discord_name = mail_to_discord.get(mail, "")
257
- if discord_name:
258
- matched = False
259
- for argilla_name in reverse_mapping:
260
- if discord_name in argilla_name or argilla_name in discord_name:
261
- user_id = reverse_mapping[argilla_name]
262
- result[user_id] += count
263
- matched = True
264
- break
265
-
266
- if not matched:
267
- result[f"arena_{discord_name}"] = count
268
- else:
269
- # Use just the username portion of the email (before the @)
270
- username_part = mail.split('@')[0] if '@' in mail else mail
271
- result[f"arena_{username_part}"] = count
272
  except Exception as e:
273
  print(f"Error loading arena data: {e}")
274
 
275
- return result
276
 
277
  @lru_cache(maxsize=32)
278
  def get_user_contributions_cached(cache_buster: int):
279
  return consolidate_all_data()
280
 
281
  def consolidate_all_data():
282
- user_data = {}
 
 
 
 
283
 
284
- blend_es_data, username_mapping = get_blend_es_data()
285
- include_data = get_include_data(username_mapping)
286
- estereotipos_data = get_estereotipos_data(username_mapping)
287
- arena_data = get_arena_data(username_mapping)
288
 
289
- all_user_ids = set(blend_es_data.keys()) | set(include_data.keys()) | set(estereotipos_data.keys()) | set(arena_data.keys())
290
-
291
- rows = []
292
- for user_id in all_user_ids:
293
- blend_es_value = 0
294
- username = user_id
295
 
296
- if user_id in blend_es_data:
297
- blend_es_value = blend_es_data[user_id]["contributions"]
298
- username = blend_es_data[user_id]["username"]
299
 
300
- include_value = include_data.get(user_id, 0)
301
- estereotipos_value = estereotipos_data.get(user_id, 0)
302
- arena_value = arena_data.get(user_id, 0)
303
-
304
- if isinstance(user_id, str):
305
- if user_id.startswith("discord_"):
306
- username = user_id.replace("discord_", "")
307
- blend_es_value = 0
308
- elif user_id.startswith("estereotipos_"):
309
- username = user_id.replace("estereotipos_", "")
310
- blend_es_value = 0
311
- elif user_id.startswith("arena_"):
312
- username = user_id.replace("arena_", "")
313
- blend_es_value = 0
314
 
 
 
 
 
 
 
 
 
 
 
 
 
315
  row = {
316
- "Username": username,
317
- "Total": blend_es_value + include_value + estereotipos_value + arena_value,
318
- "Blend-es": blend_es_value,
319
- "INCLUDE": include_value,
320
- "Estereotipos": estereotipos_value,
321
- "Arena": arena_value
322
  }
323
  rows.append(row)
324
 
 
96
  }
97
 
98
  def get_blend_es_data():
99
+ data = []
 
100
 
101
  for country in countries.keys():
102
  iso = countries[country]["iso"]
 
110
  records = list(dataset.records(with_responses=True))
111
 
112
  dataset_contributions = defaultdict(int)
113
+ user_mapping = {}
114
 
115
  for record in records:
116
  record_dict = record.to_dict()
 
120
  user_id = answer["user_id"]
121
  dataset_contributions[user_id] += 1
122
 
123
+ if user_id not in user_mapping:
124
  try:
125
  user = client.users(id=user_id)
126
+ user_mapping[user_id] = user.username
127
  except Exception as e:
128
  print(f"Error getting username for {user_id}: {e}")
129
+ user_mapping[user_id] = f"User-{user_id[:8]}"
130
 
131
  for user_id, count in dataset_contributions.items():
132
+ username = user_mapping.get(user_id, f"User-{user_id[:8]}")
133
+ data.append({
134
+ "source": "blend-es",
135
+ "username": username,
136
+ "count": count
137
+ })
138
 
139
  except Exception as e:
140
  print(f"Error processing dataset {dataset_name}: {e}")
141
 
142
+ return data
143
 
144
+ def get_include_data():
145
+ data = []
146
  try:
147
  if os.path.exists("include.csv"):
148
  include_df = pd.read_csv("include.csv")
 
154
  if pd.notna(username) and pd.notna(questions):
155
  discord_users[username.lower()] += int(questions)
156
 
157
+ for username, count in discord_users.items():
158
+ data.append({
159
+ "source": "include",
160
+ "username": username,
161
+ "count": count
162
+ })
 
 
 
 
 
 
 
 
 
163
  except Exception as e:
164
  print(f"Error loading include.csv: {e}")
165
 
166
+ return data
167
 
168
+ def get_mail_to_username_mapping():
169
+ mail_to_discord = {}
170
  try:
 
171
  if os.path.exists("mail_to_username.csv"):
172
  mapping_df = pd.read_csv("mail_to_username.csv")
173
  if "gmail" in mapping_df.columns and "discord" in mapping_df.columns:
 
176
  discord = row["discord"]
177
  if pd.notna(mail) and pd.notna(discord):
178
  mail_to_discord[mail.lower()] = discord.lower()
179
+ except Exception as e:
180
+ print(f"Error loading mail_to_username.csv: {e}")
181
+
182
+ return mail_to_discord
183
+
184
+ def get_estereotipos_data():
185
+ data = []
186
+ mail_to_discord = get_mail_to_username_mapping()
187
+
188
+ try:
189
  if os.path.exists("token_id_counts.csv"):
190
  counts_df = pd.read_csv("token_id_counts.csv")
191
  if "token_id" in counts_df.columns and "count" in counts_df.columns:
 
196
  if pd.notna(mail) and pd.notna(count):
197
  mail_counts[mail.lower()] += int(count)
198
 
 
 
 
 
199
  for mail, count in mail_counts.items():
200
+ username = mail_to_discord.get(mail.lower(), "")
201
+ if not username:
202
+ username = mail.split('@')[0] if '@' in mail else mail
203
+
204
+ data.append({
205
+ "source": "estereotipos",
206
+ "username": username,
207
+ "count": count
208
+ })
 
 
 
 
 
 
 
209
  except Exception as e:
210
  print(f"Error loading estereotipos data: {e}")
211
 
212
+ return data
213
 
214
+ def get_arena_data():
215
+ data = []
216
+ mail_to_discord = get_mail_to_username_mapping()
217
+
218
  try:
 
 
 
 
 
 
 
 
 
 
219
  if os.path.exists("arena.json"):
220
  import json
221
  with open("arena.json", "r", encoding="utf-8") as f:
 
230
  if mail:
231
  mail_counts[mail.lower()] += 1
232
 
 
 
 
 
233
  for mail, count in mail_counts.items():
234
+ username = mail_to_discord.get(mail.lower(), "")
235
+ if not username:
236
+ username = mail.split('@')[0] if '@' in mail else mail
237
+
238
+ data.append({
239
+ "source": "arena",
240
+ "username": username,
241
+ "count": count
242
+ })
 
 
 
 
 
 
 
243
  except Exception as e:
244
  print(f"Error loading arena data: {e}")
245
 
246
+ return data
247
 
248
  @lru_cache(maxsize=32)
249
  def get_user_contributions_cached(cache_buster: int):
250
  return consolidate_all_data()
251
 
252
  def consolidate_all_data():
253
+ all_data = []
254
+ all_data.extend(get_blend_es_data())
255
+ all_data.extend(get_include_data())
256
+ all_data.extend(get_estereotipos_data())
257
+ all_data.extend(get_arena_data())
258
 
259
+ user_contributions = defaultdict(lambda: {"username": "", "blend_es": 0, "include": 0, "estereotipos": 0, "arena": 0})
 
 
 
260
 
261
+ for item in all_data:
262
+ source = item["source"]
263
+ username = item["username"]
264
+ count = item["count"]
 
 
265
 
266
+ user_key = username.lower()
 
 
267
 
268
+ if not user_contributions[user_key]["username"]:
269
+ user_contributions[user_key]["username"] = username
 
 
 
 
 
 
 
 
 
 
 
 
270
 
271
+ if source == "blend-es":
272
+ user_contributions[user_key]["blend_es"] += count
273
+ elif source == "include":
274
+ user_contributions[user_key]["include"] += count
275
+ elif source == "estereotipos":
276
+ user_contributions[user_key]["estereotipos"] += count
277
+ elif source == "arena":
278
+ user_contributions[user_key]["arena"] += count
279
+
280
+ rows = []
281
+ for _, data in user_contributions.items():
282
+ total = data["blend_es"] + data["include"] + data["estereotipos"] + data["arena"]
283
  row = {
284
+ "Username": data["username"],
285
+ "Total": total,
286
+ "Blend-es": data["blend_es"],
287
+ "INCLUDE": data["include"],
288
+ "Estereotipos": data["estereotipos"],
289
+ "Arena": data["arena"]
290
  }
291
  rows.append(row)
292