add 5-shot
Browse files- src/leaderboard/read_evals.py +14 -13
src/leaderboard/read_evals.py
CHANGED
@@ -33,7 +33,7 @@ class EvalResult:
|
|
33 |
still_on_hub: bool = False
|
34 |
|
35 |
@classmethod
|
36 |
-
def init_from_json_file(self, json_filepath):
|
37 |
"""Inits the result from the specific model result file"""
|
38 |
with open(json_filepath) as fp:
|
39 |
data = json.load(fp)
|
@@ -74,7 +74,7 @@ class EvalResult:
|
|
74 |
task = task.value
|
75 |
|
76 |
# We average all scores of a given metric (not all metrics are present in all files)
|
77 |
-
accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k and n_shot.get(k, -1) ==
|
78 |
if accs.size == 0 or any([acc is None for acc in accs]):
|
79 |
continue
|
80 |
|
@@ -253,17 +253,18 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
253 |
model_result_filepaths.append(os.path.join(root, file))
|
254 |
|
255 |
eval_results = {}
|
256 |
-
for
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
|
|
267 |
|
268 |
results = []
|
269 |
for v in eval_results.values():
|
|
|
33 |
still_on_hub: bool = False
|
34 |
|
35 |
@classmethod
|
36 |
+
def init_from_json_file(self, json_filepath, n_shot_num):
|
37 |
"""Inits the result from the specific model result file"""
|
38 |
with open(json_filepath) as fp:
|
39 |
data = json.load(fp)
|
|
|
74 |
task = task.value
|
75 |
|
76 |
# We average all scores of a given metric (not all metrics are present in all files)
|
77 |
+
accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k and n_shot.get(k, -1) == n_shot_num])
|
78 |
if accs.size == 0 or any([acc is None for acc in accs]):
|
79 |
continue
|
80 |
|
|
|
253 |
model_result_filepaths.append(os.path.join(root, file))
|
254 |
|
255 |
eval_results = {}
|
256 |
+
for n_shot in [0,5]:
|
257 |
+
for model_result_filepath in model_result_filepaths:
|
258 |
+
# Creation of result
|
259 |
+
eval_result = EvalResult.init_from_json_file(model_result_filepath, n_shot_num=n_shot)
|
260 |
+
eval_result.update_with_request_file(requests_path)
|
261 |
+
|
262 |
+
# Store results of same eval together
|
263 |
+
eval_name = f"{eval_result.eval_name}_{n_shot}-shot"
|
264 |
+
if eval_name in eval_results.keys():
|
265 |
+
eval_results[eval_name].results.update({k: v for k, v in eval_result.results.items() if v is not None})
|
266 |
+
else:
|
267 |
+
eval_results[eval_name] = eval_result
|
268 |
|
269 |
results = []
|
270 |
for v in eval_results.values():
|