polish_medical_leaderboard

Restarting

djstrong commited on Jun 6, 2024

Commit

008f87f

1 Parent(s): eb0a895

poquad in rag avg

Files changed (1) hide show

src/leaderboard/read_evals.py CHANGED Viewed

@@ -170,7 +170,7 @@ class EvalResult:
         """Converts the Eval Result to a dict compatible with our dataframe display"""
         g_tasks = [task.value.benchmark for task in Tasks if task.value.type == "generate_until"]
         mc_tasks = [task.value.benchmark for task in Tasks if task.value.type == "multiple_choice"]
-        rag_tasks = ['polish_polqa_reranking_multiple_choice', 'polish_polqa_open_book']
         all_tasks = g_tasks + mc_tasks
         all_tasks_wo_polqa = [task for task in all_tasks if 'polqa' not in task]

         """Converts the Eval Result to a dict compatible with our dataframe display"""
         g_tasks = [task.value.benchmark for task in Tasks if task.value.type == "generate_until"]
         mc_tasks = [task.value.benchmark for task in Tasks if task.value.type == "multiple_choice"]
+        rag_tasks = ['polish_polqa_reranking_multiple_choice', 'polish_polqa_open_book', 'polish_poquad_open_book']
         all_tasks = g_tasks + mc_tasks
         all_tasks_wo_polqa = [task for task in all_tasks if 'polqa' not in task]