Spaces:
Sleeping
Sleeping
Update src/leaderboard/read_evals.py
Browse files
src/leaderboard/read_evals.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
import glob
|
2 |
import json
|
3 |
import math
|
4 |
import os
|
@@ -197,7 +196,6 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
197 |
for result in eval_results.values():
|
198 |
result.average = np.mean(list(result.results.values()))
|
199 |
sorted_results = sorted(eval_results.values(), key=lambda r: r.average, reverse=True)
|
200 |
-
print(f"SORTED RESULTS HERE: \n{sorted_results}")
|
201 |
for i,v in enumerate(sorted_results):
|
202 |
try:
|
203 |
v.to_dict(i) # we test if the dict version is complete
|
|
|
|
|
1 |
import json
|
2 |
import math
|
3 |
import os
|
|
|
196 |
for result in eval_results.values():
|
197 |
result.average = np.mean(list(result.results.values()))
|
198 |
sorted_results = sorted(eval_results.values(), key=lambda r: r.average, reverse=True)
|
|
|
199 |
for i,v in enumerate(sorted_results):
|
200 |
try:
|
201 |
v.to_dict(i) # we test if the dict version is complete
|