Spaces:
Running
Running
Aaron Mueller
commited on
Commit
·
e996d92
1
Parent(s):
fb22d4b
testing now
Browse files
src/leaderboard/read_evals.py
CHANGED
@@ -16,7 +16,7 @@ from src.submission.check_validity import is_model_on_hub
|
|
16 |
class EvalResult:
|
17 |
"""Represents one full evaluation. Built from a combination of the result and request file for a given run.
|
18 |
"""
|
19 |
-
eval_name: str #
|
20 |
full_model: str # org/model (path on hub)
|
21 |
org: str
|
22 |
model: str
|
@@ -33,6 +33,7 @@ class EvalResult:
|
|
33 |
|
34 |
config = data.get("config")
|
35 |
track = data.get("track")
|
|
|
36 |
|
37 |
# Get model and org
|
38 |
org_and_model = config.get("model_name", config.get("model_args", None))
|
@@ -45,7 +46,7 @@ class EvalResult:
|
|
45 |
org = org_and_model[0]
|
46 |
model = org_and_model[1]
|
47 |
full_model = "/".join(org_and_model)
|
48 |
-
eval_name =
|
49 |
|
50 |
still_on_hub, _, model_config = is_model_on_hub(
|
51 |
full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
|
@@ -88,14 +89,14 @@ class EvalResult:
|
|
88 |
|
89 |
def update_with_request_file(self, requests_path):
|
90 |
"""Finds the relevant request file for the current model and updates info with it"""
|
91 |
-
request_file = get_request_file_for_model(requests_path, self.full_model, self.
|
92 |
|
93 |
try:
|
94 |
with open(request_file, "r") as f:
|
95 |
request = json.load(f)
|
96 |
self.date = request.get("submitted_time", "")
|
97 |
except Exception:
|
98 |
-
print(f"Could not find request file for {self.org}/{self.model}
|
99 |
|
100 |
def to_dict(self):
|
101 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
@@ -108,13 +109,17 @@ class EvalResult:
|
|
108 |
AutoEvalColumn.still_on_hub.name: self.still_on_hub,
|
109 |
}
|
110 |
|
111 |
-
|
|
|
|
|
|
|
|
|
112 |
data_dict[task.value.col_name] = self.results[task.value.benchmark]
|
113 |
|
114 |
return data_dict
|
115 |
|
116 |
|
117 |
-
def get_request_file_for_model(requests_path, model_name,
|
118 |
"""Selects the correct request file for a given model. Only keeps runs tagged as FINISHED"""
|
119 |
request_files = os.path.join(
|
120 |
requests_path,
|
@@ -122,7 +127,7 @@ def get_request_file_for_model(requests_path, model_name, precision):
|
|
122 |
)
|
123 |
request_files = glob.glob(request_files)
|
124 |
|
125 |
-
# Select correct request file (
|
126 |
request_file = ""
|
127 |
request_files = sorted(request_files, reverse=True)
|
128 |
for tmp_request_file in request_files:
|
@@ -130,7 +135,7 @@ def get_request_file_for_model(requests_path, model_name, precision):
|
|
130 |
req_content = json.load(f)
|
131 |
if (
|
132 |
req_content["status"] in ["FINISHED"]
|
133 |
-
and req_content["
|
134 |
):
|
135 |
request_file = tmp_request_file
|
136 |
return request_file
|
@@ -175,4 +180,4 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
175 |
except KeyError: # not all eval values present
|
176 |
continue
|
177 |
|
178 |
-
return results
|
|
|
16 |
class EvalResult:
|
17 |
"""Represents one full evaluation. Built from a combination of the result and request file for a given run.
|
18 |
"""
|
19 |
+
eval_name: str # org_model_track (uid)
|
20 |
full_model: str # org/model (path on hub)
|
21 |
org: str
|
22 |
model: str
|
|
|
33 |
|
34 |
config = data.get("config")
|
35 |
track = data.get("track")
|
36 |
+
self.track = track
|
37 |
|
38 |
# Get model and org
|
39 |
org_and_model = config.get("model_name", config.get("model_args", None))
|
|
|
46 |
org = org_and_model[0]
|
47 |
model = org_and_model[1]
|
48 |
full_model = "/".join(org_and_model)
|
49 |
+
eval_name = "_".join(org_and_model) + f"_{track}"
|
50 |
|
51 |
still_on_hub, _, model_config = is_model_on_hub(
|
52 |
full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
|
|
|
89 |
|
90 |
def update_with_request_file(self, requests_path):
|
91 |
"""Finds the relevant request file for the current model and updates info with it"""
|
92 |
+
request_file = get_request_file_for_model(requests_path, self.full_model, self.track)
|
93 |
|
94 |
try:
|
95 |
with open(request_file, "r") as f:
|
96 |
request = json.load(f)
|
97 |
self.date = request.get("submitted_time", "")
|
98 |
except Exception:
|
99 |
+
print(f"Could not find request file for {self.org}/{self.model}")
|
100 |
|
101 |
def to_dict(self):
|
102 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
|
|
109 |
AutoEvalColumn.still_on_hub.name: self.still_on_hub,
|
110 |
}
|
111 |
|
112 |
+
if self.track.lower() == "multimodal":
|
113 |
+
taskset = TasksMultimodal
|
114 |
+
else:
|
115 |
+
taskset = Tasks
|
116 |
+
for task in taskset:
|
117 |
data_dict[task.value.col_name] = self.results[task.value.benchmark]
|
118 |
|
119 |
return data_dict
|
120 |
|
121 |
|
122 |
+
def get_request_file_for_model(requests_path, model_name, track):
|
123 |
"""Selects the correct request file for a given model. Only keeps runs tagged as FINISHED"""
|
124 |
request_files = os.path.join(
|
125 |
requests_path,
|
|
|
127 |
)
|
128 |
request_files = glob.glob(request_files)
|
129 |
|
130 |
+
# Select correct request file (track)
|
131 |
request_file = ""
|
132 |
request_files = sorted(request_files, reverse=True)
|
133 |
for tmp_request_file in request_files:
|
|
|
135 |
req_content = json.load(f)
|
136 |
if (
|
137 |
req_content["status"] in ["FINISHED"]
|
138 |
+
and req_content["track"] == track
|
139 |
):
|
140 |
request_file = tmp_request_file
|
141 |
return request_file
|
|
|
180 |
except KeyError: # not all eval values present
|
181 |
continue
|
182 |
|
183 |
+
return results
|