djstrong commited on
Commit
a8b01d2
1 Parent(s): 23c87c8
Files changed (1) hide show
  1. src/leaderboard/read_evals.py +4 -4
src/leaderboard/read_evals.py CHANGED
@@ -205,12 +205,12 @@ class EvalResult:
205
 
206
  baselines = {task.value.benchmark: task.value.baseline*100 for task in Tasks}
207
 
208
- average_old = sum([v for task, v in self.results.items() if v is not None and task in all_tasks_wo_polqa]) / len(all_tasks_wo_polqa)
209
 
210
  average = sum([(self.results.get(task,0) - baselines.get(task, 0)) / (100 - baselines.get(task, 0)) * 100 for task in all_tasks]) / len(all_tasks)
211
- average_g = sum([(self.results.get(task,0) - baselines.get(task, 0)) / (100 - baselines.get(task, 0)) * 100 for task in g_tasks]) / len(g_tasks)
212
- average_mc = sum([(self.results.get(task,0) - baselines.get(task, 0)) / (100 - baselines.get(task, 0)) * 100 for task in mc_tasks]) / len(mc_tasks)
213
- average_rag = sum([(self.results.get(task,0) - baselines.get(task, 0)) / (100 - baselines.get(task, 0)) * 100 for task in rag_tasks]) / len(rag_tasks)
214
 
215
  data_dict = {}
216
  # data_dict = {
 
205
 
206
  baselines = {task.value.benchmark: task.value.baseline*100 for task in Tasks}
207
 
208
+ # average_old = sum([v for task, v in self.results.items() if v is not None and task in all_tasks_wo_polqa]) / len(all_tasks_wo_polqa)
209
 
210
  average = sum([(self.results.get(task,0) - baselines.get(task, 0)) / (100 - baselines.get(task, 0)) * 100 for task in all_tasks]) / len(all_tasks)
211
+ # average_g = sum([(self.results.get(task,0) - baselines.get(task, 0)) / (100 - baselines.get(task, 0)) * 100 for task in g_tasks]) / len(g_tasks)
212
+ # average_mc = sum([(self.results.get(task,0) - baselines.get(task, 0)) / (100 - baselines.get(task, 0)) * 100 for task in mc_tasks]) / len(mc_tasks)
213
+ # average_rag = sum([(self.results.get(task,0) - baselines.get(task, 0)) / (100 - baselines.get(task, 0)) * 100 for task in rag_tasks]) / len(rag_tasks)
214
 
215
  data_dict = {}
216
  # data_dict = {