Spaces:
Runtime error
Runtime error
fix
Browse files
src/leaderboard/read_evals.py
CHANGED
@@ -205,12 +205,12 @@ class EvalResult:
|
|
205 |
|
206 |
baselines = {task.value.benchmark: task.value.baseline*100 for task in Tasks}
|
207 |
|
208 |
-
average_old = sum([v for task, v in self.results.items() if v is not None and task in all_tasks_wo_polqa]) / len(all_tasks_wo_polqa)
|
209 |
|
210 |
average = sum([(self.results.get(task,0) - baselines.get(task, 0)) / (100 - baselines.get(task, 0)) * 100 for task in all_tasks]) / len(all_tasks)
|
211 |
-
average_g = sum([(self.results.get(task,0) - baselines.get(task, 0)) / (100 - baselines.get(task, 0)) * 100 for task in g_tasks]) / len(g_tasks)
|
212 |
-
average_mc = sum([(self.results.get(task,0) - baselines.get(task, 0)) / (100 - baselines.get(task, 0)) * 100 for task in mc_tasks]) / len(mc_tasks)
|
213 |
-
average_rag = sum([(self.results.get(task,0) - baselines.get(task, 0)) / (100 - baselines.get(task, 0)) * 100 for task in rag_tasks]) / len(rag_tasks)
|
214 |
|
215 |
data_dict = {}
|
216 |
# data_dict = {
|
|
|
205 |
|
206 |
baselines = {task.value.benchmark: task.value.baseline*100 for task in Tasks}
|
207 |
|
208 |
+
# average_old = sum([v for task, v in self.results.items() if v is not None and task in all_tasks_wo_polqa]) / len(all_tasks_wo_polqa)
|
209 |
|
210 |
average = sum([(self.results.get(task,0) - baselines.get(task, 0)) / (100 - baselines.get(task, 0)) * 100 for task in all_tasks]) / len(all_tasks)
|
211 |
+
# average_g = sum([(self.results.get(task,0) - baselines.get(task, 0)) / (100 - baselines.get(task, 0)) * 100 for task in g_tasks]) / len(g_tasks)
|
212 |
+
# average_mc = sum([(self.results.get(task,0) - baselines.get(task, 0)) / (100 - baselines.get(task, 0)) * 100 for task in mc_tasks]) / len(mc_tasks)
|
213 |
+
# average_rag = sum([(self.results.get(task,0) - baselines.get(task, 0)) / (100 - baselines.get(task, 0)) * 100 for task in rag_tasks]) / len(rag_tasks)
|
214 |
|
215 |
data_dict = {}
|
216 |
# data_dict = {
|