Spaces:
Runtime error
Runtime error
- src/leaderboard/read_evals.py +87 -16
src/leaderboard/read_evals.py
CHANGED
@@ -108,22 +108,93 @@ class EvalResult:
|
|
108 |
def to_dict(self):
|
109 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
110 |
average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
|
111 |
-
data_dict = {
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
127 |
|
128 |
for task in Tasks:
|
129 |
data_dict[task.value.col_name] = self.results[task.value.benchmark]
|
|
|
108 |
def to_dict(self):
|
109 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
110 |
average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
|
111 |
+
# data_dict = {
|
112 |
+
# "eval_name": self.eval_name, # not a column, just a save name,
|
113 |
+
# AutoEvalColumn.precision.name: self.precision.value.name,
|
114 |
+
# AutoEvalColumn.model_type.name: self.model_type.value.name,
|
115 |
+
# AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
|
116 |
+
# AutoEvalColumn.weight_type.name: self.weight_type.value.name,
|
117 |
+
# AutoEvalColumn.architecture.name: self.architecture,
|
118 |
+
# AutoEvalColumn.model.name: make_clickable_model(self.full_model),
|
119 |
+
# AutoEvalColumn.dummy.name: self.full_model,
|
120 |
+
# AutoEvalColumn.revision.name: self.revision,
|
121 |
+
# AutoEvalColumn.average.name: average,
|
122 |
+
# AutoEvalColumn.license.name: self.license,
|
123 |
+
# AutoEvalColumn.likes.name: self.likes,
|
124 |
+
# AutoEvalColumn.params.name: self.num_params,
|
125 |
+
# AutoEvalColumn.still_on_hub.name: self.still_on_hub,
|
126 |
+
# }
|
127 |
+
try:
|
128 |
+
data_dict["eval_name"] = self.eval_name
|
129 |
+
except KeyError:
|
130 |
+
print(f"Could not find eval name")
|
131 |
+
|
132 |
+
try:
|
133 |
+
data_dict[AutoEvalColumn.precision.name] = self.precision.value.name
|
134 |
+
except KeyError:
|
135 |
+
print(f"Could not find precision")
|
136 |
+
|
137 |
+
try:
|
138 |
+
data_dict[AutoEvalColumn.model_type.name] = self.model_type.value.name
|
139 |
+
except KeyError:
|
140 |
+
print(f"Could not find model type")
|
141 |
+
|
142 |
+
try:
|
143 |
+
data_dict[AutoEvalColumn.model_type_symbol.name] = self.model_type.value.symbol
|
144 |
+
except KeyError:
|
145 |
+
print(f"Could not find model type symbol")
|
146 |
+
|
147 |
+
try:
|
148 |
+
data_dict[AutoEvalColumn.weight_type.name] = self.weight_type.value.name
|
149 |
+
except KeyError:
|
150 |
+
print(f"Could not find weight type")
|
151 |
+
|
152 |
+
try:
|
153 |
+
data_dict[AutoEvalColumn.architecture.name] = self.architecture
|
154 |
+
except KeyError:
|
155 |
+
print(f"Could not find architecture")
|
156 |
+
|
157 |
+
try:
|
158 |
+
data_dict[AutoEvalColumn.model.name] = make_clickable_model(self.full_model)
|
159 |
+
except KeyError:
|
160 |
+
print(f"Could not find model")
|
161 |
+
|
162 |
+
try:
|
163 |
+
data_dict[AutoEvalColumn.dummy.name] = self.full_model
|
164 |
+
except KeyError:
|
165 |
+
print(f"Could not find dummy")
|
166 |
+
|
167 |
+
try:
|
168 |
+
data_dict[AutoEvalColumn.revision.name] = self.revision
|
169 |
+
except KeyError:
|
170 |
+
print(f"Could not find revision")
|
171 |
+
|
172 |
+
try:
|
173 |
+
data_dict[AutoEvalColumn.average.name] = average
|
174 |
+
except KeyError:
|
175 |
+
print(f"Could not find average")
|
176 |
+
|
177 |
+
try:
|
178 |
+
data_dict[AutoEvalColumn.license.name] = self.license
|
179 |
+
except KeyError:
|
180 |
+
print(f"Could not find license")
|
181 |
+
|
182 |
+
try:
|
183 |
+
data_dict[AutoEvalColumn.likes.name] = self.likes
|
184 |
+
except KeyError:
|
185 |
+
print(f"Could not find likes")
|
186 |
+
|
187 |
+
try:
|
188 |
+
data_dict[AutoEvalColumn.params.name] = self.num_params
|
189 |
+
except KeyError:
|
190 |
+
print(f"Could not find params")
|
191 |
+
|
192 |
+
try:
|
193 |
+
data_dict[AutoEvalColumn.still_on_hub.name] = self.still_on_hub
|
194 |
+
except KeyError:
|
195 |
+
print(f"Could not find still on hub")
|
196 |
+
|
197 |
+
|
198 |
|
199 |
for task in Tasks:
|
200 |
data_dict[task.value.col_name] = self.results[task.value.benchmark]
|