Spaces:
Restarting
Restarting
poquad in rag avg
Browse files
src/leaderboard/read_evals.py
CHANGED
@@ -170,7 +170,7 @@ class EvalResult:
|
|
170 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
171 |
g_tasks = [task.value.benchmark for task in Tasks if task.value.type == "generate_until"]
|
172 |
mc_tasks = [task.value.benchmark for task in Tasks if task.value.type == "multiple_choice"]
|
173 |
-
rag_tasks = ['polish_polqa_reranking_multiple_choice', 'polish_polqa_open_book']
|
174 |
all_tasks = g_tasks + mc_tasks
|
175 |
all_tasks_wo_polqa = [task for task in all_tasks if 'polqa' not in task]
|
176 |
|
|
|
170 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
171 |
g_tasks = [task.value.benchmark for task in Tasks if task.value.type == "generate_until"]
|
172 |
mc_tasks = [task.value.benchmark for task in Tasks if task.value.type == "multiple_choice"]
|
173 |
+
rag_tasks = ['polish_polqa_reranking_multiple_choice', 'polish_polqa_open_book', 'polish_poquad_open_book']
|
174 |
all_tasks = g_tasks + mc_tasks
|
175 |
all_tasks_wo_polqa = [task for task in all_tasks if 'polqa' not in task]
|
176 |
|