Spaces:

babylm
/

leaderboard-2024

Running

Aaron Mueller commited on Nov 23, 2024

Commit

93233b6

1 Parent(s): f21ebe8

testing

Files changed (2) hide show

src/leaderboard/read_evals.py CHANGED Viewed

@@ -71,14 +71,11 @@ class EvalResult:
                     results[task.benchmark] = task_result
         else:
             for task in Tasks:
-                print("task:", task)
                 task = task.value
-                print("task.value:", task)
                 task_result = _get_task_results(task)
                 if task_result is not None:
                     results[task.benchmark] = task_result
-        print(results)
         return self(
             eval_name=eval_name,
             full_model=full_model,
@@ -148,7 +145,6 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
     for root, _, files in os.walk(results_path):
         # We should only have json files in model results
-        print(files)
         if len(files) == 0 or any([not f.endswith(".json") for f in files]):
             continue
@@ -182,4 +178,5 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
         except KeyError:  # not all eval values present
             continue
     return results

                     results[task.benchmark] = task_result
         else:
             for task in Tasks:
                 task = task.value
                 task_result = _get_task_results(task)
                 if task_result is not None:
                     results[task.benchmark] = task_result
         return self(
             eval_name=eval_name,
             full_model=full_model,
     for root, _, files in os.walk(results_path):
         # We should only have json files in model results
         if len(files) == 0 or any([not f.endswith(".json") for f in files]):
             continue
         except KeyError:  # not all eval values present
             continue
+    print(results, len(results))
     return results

src/populate.py CHANGED Viewed

@@ -12,6 +12,13 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
     """Creates a dataframe from all the individual experiment results"""
     raw_data = get_raw_eval_results(results_path, requests_path)
     all_data_json = [v.to_dict() for v in raw_data]
     print(all_data_json)
     df = pd.DataFrame.from_records(all_data_json)

     """Creates a dataframe from all the individual experiment results"""
     raw_data = get_raw_eval_results(results_path, requests_path)
     all_data_json = [v.to_dict() for v in raw_data]
+    all_data_json_filtered = []
+    for item in all_data_json:
+        if "VQA" in benchmark_cols and "VQA" in item:
+            all_data_json_filtered.append(item)
+        if "VQA" not in benchmark_cols and "VQA" not in item:
+            all_data_json_filtered.append(item)
+    all_data_json = all_data_json_filtered
     print(all_data_json)
     df = pd.DataFrame.from_records(all_data_json)