djstrong commited on
Commit
6b50f19
·
1 Parent(s): 81f000c
Files changed (1) hide show
  1. src/leaderboard/read_evals.py +5 -0
src/leaderboard/read_evals.py CHANGED
@@ -447,6 +447,7 @@ def get_raw_eval_results(results_path: str, requests_path: str, metadata) -> lis
447
  all_models = []
448
  missing_results_for_task = {}
449
  missing_metadata = []
 
450
  for v in eval_results.values():
451
  r = v.to_dict()
452
  for task in Tasks:
@@ -454,8 +455,12 @@ def get_raw_eval_results(results_path: str, requests_path: str, metadata) -> lis
454
  task_name = f"{r['n_shot']}|{task.value.benchmark}"
455
  if task_name in missing_results_for_task:
456
  missing_results_for_task[task_name].append(f"{v.full_model}|{v.org_and_model}")
 
 
457
  else:
458
  missing_results_for_task[task_name] = [f"{v.full_model}|{v.org_and_model}"]
 
 
459
  if r[AutoEvalColumn.lang.name] is None or r[AutoEvalColumn.lang.name] == "?":
460
  missing_metadata.append(f"{v.full_model}")
461
  all_models.append((v.full_model, v.num_params, v.still_on_hub))
 
447
  all_models = []
448
  missing_results_for_task = {}
449
  missing_metadata = []
450
+ for_run=[]
451
  for v in eval_results.values():
452
  r = v.to_dict()
453
  for task in Tasks:
 
455
  task_name = f"{r['n_shot']}|{task.value.benchmark}"
456
  if task_name in missing_results_for_task:
457
  missing_results_for_task[task_name].append(f"{v.full_model}|{v.org_and_model}")
458
+ if v.still_on_hub and task.value.benchmark in all_tasks:
459
+ print(f'batch start.sh "bash eval_model_task_bs1.sh {r["n_shot"]} {task.value.benchmark} {v.full_model}"')
460
  else:
461
  missing_results_for_task[task_name] = [f"{v.full_model}|{v.org_and_model}"]
462
+ if v.still_on_hub and task.value.benchmark in all_tasks:
463
+ print(f'batch start.sh "bash eval_model_task_bs1.sh {r["n_shot"]} {task.value.benchmark} {v.full_model}"')
464
  if r[AutoEvalColumn.lang.name] is None or r[AutoEvalColumn.lang.name] == "?":
465
  missing_metadata.append(f"{v.full_model}")
466
  all_models.append((v.full_model, v.num_params, v.still_on_hub))