Clémentine
commited on
Commit
•
97b27da
1
Parent(s):
256c5d3
Small fix - we do not want to display models where the MMLU is old with models where the MMLU is new - however, since version is displayed in the results, we keep the files
Browse files
src/auto_leaderboard/load_results.py
CHANGED
@@ -55,6 +55,10 @@ class EvalResult:
|
|
55 |
def parse_eval_result(json_filepath: str) -> Tuple[str, list[dict]]:
|
56 |
with open(json_filepath) as fp:
|
57 |
data = json.load(fp)
|
|
|
|
|
|
|
|
|
58 |
|
59 |
config = data["config"]
|
60 |
model = config.get("model_name", None)
|
|
|
55 |
def parse_eval_result(json_filepath: str) -> Tuple[str, list[dict]]:
|
56 |
with open(json_filepath) as fp:
|
57 |
data = json.load(fp)
|
58 |
+
|
59 |
+
for mmlu_k in ["harness|hendrycksTest-abstract_algebra|5", "hendrycksTest-abstract_algebra"]:
|
60 |
+
if mmlu_k in data["versions"] and data["versions"][mmlu_k] == 0:
|
61 |
+
return None, [] # we skip models with the wrong version
|
62 |
|
63 |
config = data["config"]
|
64 |
model = config.get("model_name", None)
|