warning
Browse files- app.py +3 -0
- src/leaderboard/read_evals.py +12 -10
app.py
CHANGED
@@ -63,6 +63,8 @@ leaderboard_df = original_df.copy()
|
|
63 |
leaderboard_df = leaderboard_df[leaderboard_df[AutoEvalColumn.still_on_hub.name] == True]
|
64 |
# leaderboard_df = leaderboard_df[('speakleash' not in leaderboard_df['model_name_for_query']) | ('Bielik' in leaderboard_df['model_name_for_query'])]
|
65 |
|
|
|
|
|
66 |
(
|
67 |
finished_eval_queue_df,
|
68 |
running_eval_queue_df,
|
@@ -400,6 +402,7 @@ with demo:
|
|
400 |
elem_id="citation-button",
|
401 |
show_copy_button=True,
|
402 |
)
|
|
|
403 |
|
404 |
scheduler = BackgroundScheduler()
|
405 |
scheduler.add_job(restart_space, "interval", seconds=1800)
|
|
|
63 |
leaderboard_df = leaderboard_df[leaderboard_df[AutoEvalColumn.still_on_hub.name] == True]
|
64 |
# leaderboard_df = leaderboard_df[('speakleash' not in leaderboard_df['model_name_for_query']) | ('Bielik' in leaderboard_df['model_name_for_query'])]
|
65 |
|
66 |
+
original_df.to_csv("output.csv")
|
67 |
+
|
68 |
(
|
69 |
finished_eval_queue_df,
|
70 |
running_eval_queue_df,
|
|
|
402 |
elem_id="citation-button",
|
403 |
show_copy_button=True,
|
404 |
)
|
405 |
+
csv = gr.File(interactive=False, value="output.csv")
|
406 |
|
407 |
scheduler = BackgroundScheduler()
|
408 |
scheduler.add_job(restart_space, "interval", seconds=1800)
|
src/leaderboard/read_evals.py
CHANGED
@@ -433,16 +433,7 @@ def get_raw_eval_results(results_path: str, requests_path: str, metadata) -> lis
|
|
433 |
for k,v in eval_results.items():
|
434 |
v.results = {k: v for k, (v, start_date) in v.results.items()}
|
435 |
|
436 |
-
|
437 |
-
for v in eval_results.values():
|
438 |
-
try:
|
439 |
-
print(v)
|
440 |
-
v.to_dict() # we test if the dict version is complete
|
441 |
-
# if v.results:
|
442 |
-
results.append(v)
|
443 |
-
except KeyError: # not all eval values present
|
444 |
-
print(f"not all eval values present {v.eval_name} {v.full_model}")
|
445 |
-
continue
|
446 |
|
447 |
all_models = []
|
448 |
missing_results_for_task = {}
|
@@ -473,6 +464,17 @@ def get_raw_eval_results(results_path: str, requests_path: str, metadata) -> lis
|
|
473 |
missing_metadata.append(f"{v.full_model}")
|
474 |
all_models.append((v.full_model, v.num_params, v.still_on_hub))
|
475 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
476 |
print(f"Missing sbatch results:")
|
477 |
for r in for_run:
|
478 |
if r[0]==5 and r[1] in ['polish_eq_bench']: continue
|
|
|
433 |
for k,v in eval_results.items():
|
434 |
v.results = {k: v for k, (v, start_date) in v.results.items()}
|
435 |
|
436 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
437 |
|
438 |
all_models = []
|
439 |
missing_results_for_task = {}
|
|
|
464 |
missing_metadata.append(f"{v.full_model}")
|
465 |
all_models.append((v.full_model, v.num_params, v.still_on_hub))
|
466 |
|
467 |
+
results = []
|
468 |
+
for v in eval_results.values():
|
469 |
+
try:
|
470 |
+
print(v)
|
471 |
+
v.to_dict() # we test if the dict version is complete
|
472 |
+
# if v.results:
|
473 |
+
results.append(v)
|
474 |
+
except KeyError: # not all eval values present
|
475 |
+
print(f"not all eval values present {v.eval_name} {v.full_model}")
|
476 |
+
continue
|
477 |
+
|
478 |
print(f"Missing sbatch results:")
|
479 |
for r in for_run:
|
480 |
if r[0]==5 and r[1] in ['polish_eq_bench']: continue
|