Aaron Mueller commited on
Commit
81490fa
·
1 Parent(s): 4ca4431

separate txt and vision avgs

Browse files
Files changed (2) hide show
  1. src/leaderboard/read_evals.py +7 -6
  2. src/populate.py +2 -2
src/leaderboard/read_evals.py CHANGED
@@ -8,7 +8,7 @@ import dateutil
8
  import numpy as np
9
 
10
  from src.display.formatting import make_clickable_model
11
- from src.display.utils import AutoEvalColumn, Tasks, TasksMultimodal
12
  from src.submission.check_validity import is_model_on_hub
13
 
14
 
@@ -100,20 +100,21 @@ class EvalResult:
100
 
101
  def to_dict(self):
102
  """Converts the Eval Result to a dict compatible with our dataframe display"""
 
103
  vision_tasks = ("VQA", "Winoground", "DevBench")
104
  text_average = sum([v for k, v in self.results.items() if v is not None and k not in vision_tasks]) / len(Tasks)
105
  data_dict = {
106
  "eval_name": self.eval_name, # not a column, just a save name,
107
- AutoEvalColumn.model.name: make_clickable_model(self.full_model),
108
- AutoEvalColumn.revision.name: self.revision,
109
- AutoEvalColumn.text_average.name: text_average,
110
- AutoEvalColumn.still_on_hub.name: self.still_on_hub,
111
  }
112
 
113
  if self.track.lower() == "multimodal":
114
  taskset = TasksMultimodal
115
  vision_average = sum([v for k, v in self.results.items() if v is not None and k in vision_tasks]) / len(Tasks)
116
- data_dict[AutoEvalColumn.vision_average.name] = vision_average
117
  else:
118
  taskset = Tasks
119
  for task in taskset:
 
8
  import numpy as np
9
 
10
  from src.display.formatting import make_clickable_model
11
+ from src.display.utils import AutoEvalColumn, AutoEvalColumnMultimodal, Tasks, TasksMultimodal
12
  from src.submission.check_validity import is_model_on_hub
13
 
14
 
 
100
 
101
  def to_dict(self):
102
  """Converts the Eval Result to a dict compatible with our dataframe display"""
103
+ eval_column = AutoEvalColumnMultimodal if self.track.lower() == "multimodal" else AutoEvalColumn
104
  vision_tasks = ("VQA", "Winoground", "DevBench")
105
  text_average = sum([v for k, v in self.results.items() if v is not None and k not in vision_tasks]) / len(Tasks)
106
  data_dict = {
107
  "eval_name": self.eval_name, # not a column, just a save name,
108
+ eval_column.model.name: make_clickable_model(self.full_model),
109
+ eval_column.revision.name: self.revision,
110
+ eval_column.text_average.name: text_average,
111
+ eval_column.still_on_hub.name: self.still_on_hub,
112
  }
113
 
114
  if self.track.lower() == "multimodal":
115
  taskset = TasksMultimodal
116
  vision_average = sum([v for k, v in self.results.items() if v is not None and k in vision_tasks]) / len(Tasks)
117
+ data_dict[eval_column.vision_average.name] = vision_average
118
  else:
119
  taskset = Tasks
120
  for task in taskset:
src/populate.py CHANGED
@@ -4,7 +4,7 @@ import os
4
  import pandas as pd
5
 
6
  from src.display.formatting import has_no_nan_values, make_clickable_model
7
- from src.display.utils import AutoEvalColumn, EvalQueueColumn
8
  from src.leaderboard.read_evals import get_raw_eval_results
9
 
10
 
@@ -23,7 +23,7 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
23
 
24
  df = pd.DataFrame.from_records(all_data_json)
25
  print(df)
26
- df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
27
  df = df[cols].round(decimals=1)
28
 
29
  # filter out if any of the benchmarks have not been produced
 
4
  import pandas as pd
5
 
6
  from src.display.formatting import has_no_nan_values, make_clickable_model
7
+ from src.display.utils import AutoEvalColumn, AutoEvalColumnMultimodal, EvalQueueColumn
8
  from src.leaderboard.read_evals import get_raw_eval_results
9
 
10
 
 
23
 
24
  df = pd.DataFrame.from_records(all_data_json)
25
  print(df)
26
+ df = df.sort_values(by=[AutoEvalColumn.text_average.name], ascending=False)
27
  df = df[cols].round(decimals=1)
28
 
29
  # filter out if any of the benchmarks have not been produced