djstrong commited on
Commit
008f87f
·
1 Parent(s): eb0a895

poquad in rag avg

Browse files
Files changed (1) hide show
  1. src/leaderboard/read_evals.py +1 -1
src/leaderboard/read_evals.py CHANGED
@@ -170,7 +170,7 @@ class EvalResult:
170
  """Converts the Eval Result to a dict compatible with our dataframe display"""
171
  g_tasks = [task.value.benchmark for task in Tasks if task.value.type == "generate_until"]
172
  mc_tasks = [task.value.benchmark for task in Tasks if task.value.type == "multiple_choice"]
173
- rag_tasks = ['polish_polqa_reranking_multiple_choice', 'polish_polqa_open_book']
174
  all_tasks = g_tasks + mc_tasks
175
  all_tasks_wo_polqa = [task for task in all_tasks if 'polqa' not in task]
176
 
 
170
  """Converts the Eval Result to a dict compatible with our dataframe display"""
171
  g_tasks = [task.value.benchmark for task in Tasks if task.value.type == "generate_until"]
172
  mc_tasks = [task.value.benchmark for task in Tasks if task.value.type == "multiple_choice"]
173
+ rag_tasks = ['polish_polqa_reranking_multiple_choice', 'polish_polqa_open_book', 'polish_poquad_open_book']
174
  all_tasks = g_tasks + mc_tasks
175
  all_tasks_wo_polqa = [task for task in all_tasks if 'polqa' not in task]
176