Spaces:
Running
Running
Aaron Mueller
commited on
Commit
·
4ca4431
1
Parent(s):
6a3b9c1
separate txt and vision averages
Browse files
src/leaderboard/read_evals.py
CHANGED
@@ -100,17 +100,20 @@ class EvalResult:
|
|
100 |
|
101 |
def to_dict(self):
|
102 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
103 |
-
|
|
|
104 |
data_dict = {
|
105 |
"eval_name": self.eval_name, # not a column, just a save name,
|
106 |
AutoEvalColumn.model.name: make_clickable_model(self.full_model),
|
107 |
AutoEvalColumn.revision.name: self.revision,
|
108 |
-
AutoEvalColumn.
|
109 |
AutoEvalColumn.still_on_hub.name: self.still_on_hub,
|
110 |
}
|
111 |
|
112 |
if self.track.lower() == "multimodal":
|
113 |
taskset = TasksMultimodal
|
|
|
|
|
114 |
else:
|
115 |
taskset = Tasks
|
116 |
for task in taskset:
|
|
|
100 |
|
101 |
def to_dict(self):
|
102 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
103 |
+
vision_tasks = ("VQA", "Winoground", "DevBench")
|
104 |
+
text_average = sum([v for k, v in self.results.items() if v is not None and k not in vision_tasks]) / len(Tasks)
|
105 |
data_dict = {
|
106 |
"eval_name": self.eval_name, # not a column, just a save name,
|
107 |
AutoEvalColumn.model.name: make_clickable_model(self.full_model),
|
108 |
AutoEvalColumn.revision.name: self.revision,
|
109 |
+
AutoEvalColumn.text_average.name: text_average,
|
110 |
AutoEvalColumn.still_on_hub.name: self.still_on_hub,
|
111 |
}
|
112 |
|
113 |
if self.track.lower() == "multimodal":
|
114 |
taskset = TasksMultimodal
|
115 |
+
vision_average = sum([v for k, v in self.results.items() if v is not None and k in vision_tasks]) / len(Tasks)
|
116 |
+
data_dict[AutoEvalColumn.vision_average.name] = vision_average
|
117 |
else:
|
118 |
taskset = Tasks
|
119 |
for task in taskset:
|