Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Gregor Betz
commited on
add dashboard
Browse files- src/display/formatting.py +8 -6
- src/display/utils.py +21 -18
- src/envs.py +4 -2
- src/leaderboard/read_evals.py +4 -4
src/display/formatting.py
CHANGED
@@ -1,12 +1,8 @@
|
|
1 |
-
|
2 |
-
from datetime import datetime, timezone
|
3 |
|
4 |
-
from
|
5 |
-
from huggingface_hub.hf_api import ModelInfo
|
6 |
|
7 |
|
8 |
-
API = HfApi()
|
9 |
-
|
10 |
def model_hyperlink(link, model_name):
|
11 |
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
|
12 |
|
@@ -16,6 +12,12 @@ def make_clickable_model(model_name):
|
|
16 |
return model_hyperlink(link, model_name)
|
17 |
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
def styled_error(error):
|
20 |
return f"<p style='color: red; font-size: 20px; text-align: center;'>{error}</p>"
|
21 |
|
|
|
1 |
+
# utility functions for formatting text and data for display in the leaderboard
|
|
|
2 |
|
3 |
+
from src.envs import DASHBOARD_LINK
|
|
|
4 |
|
5 |
|
|
|
|
|
6 |
def model_hyperlink(link, model_name):
|
7 |
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
|
8 |
|
|
|
12 |
return model_hyperlink(link, model_name)
|
13 |
|
14 |
|
15 |
+
def model_dashboard_hyperlink(model_name):
|
16 |
+
link = DASHBOARD_LINK.format(model_id=model_name)
|
17 |
+
html_link = f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">show in 📊</a>'
|
18 |
+
return html_link
|
19 |
+
|
20 |
+
|
21 |
def styled_error(error):
|
22 |
return f"<p style='color: red; font-size: 20px; text-align: center;'>{error}</p>"
|
23 |
|
src/display/utils.py
CHANGED
@@ -1,7 +1,8 @@
|
|
1 |
from dataclasses import dataclass, make_dataclass
|
2 |
from enum import Enum
|
|
|
3 |
|
4 |
-
import pandas as pd
|
5 |
|
6 |
from src.display.about import Tasks
|
7 |
|
@@ -22,26 +23,28 @@ class ColumnContent:
|
|
22 |
dummy: bool = False
|
23 |
|
24 |
## Leaderboard columns
|
25 |
-
auto_eval_column_dict = []
|
26 |
# Init
|
27 |
-
auto_eval_column_dict.append(
|
28 |
-
auto_eval_column_dict.append(
|
29 |
-
#Scores
|
30 |
-
auto_eval_column_dict.append(
|
31 |
for task in Tasks:
|
32 |
-
auto_eval_column_dict.append(
|
|
|
|
|
33 |
# Model information
|
34 |
-
auto_eval_column_dict.append(
|
35 |
-
auto_eval_column_dict.append(
|
36 |
-
auto_eval_column_dict.append(
|
37 |
-
auto_eval_column_dict.append(
|
38 |
-
auto_eval_column_dict.append(
|
39 |
-
auto_eval_column_dict.append(
|
40 |
-
auto_eval_column_dict.append(
|
41 |
-
auto_eval_column_dict.append(
|
42 |
-
auto_eval_column_dict.append(
|
43 |
# Dummy column for the search bar (hidden by the custom CSS)
|
44 |
-
auto_eval_column_dict.append(
|
45 |
|
46 |
# We use make dataclass to dynamically fill the scores from Tasks
|
47 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
@@ -53,7 +56,7 @@ class EvalQueueColumn: # Queue column
|
|
53 |
revision = ColumnContent("revision", "str", True)
|
54 |
private = ColumnContent("private", "bool", True)
|
55 |
precision = ColumnContent("precision", "str", True)
|
56 |
-
weight_type = ColumnContent("weight_type", "str",
|
57 |
status = ColumnContent("status", "str", True)
|
58 |
|
59 |
## All the model information that we might need
|
|
|
1 |
from dataclasses import dataclass, make_dataclass
|
2 |
from enum import Enum
|
3 |
+
from typing import Any
|
4 |
|
5 |
+
import pandas as pd # type: ignore
|
6 |
|
7 |
from src.display.about import Tasks
|
8 |
|
|
|
23 |
dummy: bool = False
|
24 |
|
25 |
## Leaderboard columns
|
26 |
+
auto_eval_column_dict: list[tuple[str, type, Any]] = []
|
27 |
# Init
|
28 |
+
auto_eval_column_dict.append(("model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)))
|
29 |
+
auto_eval_column_dict.append(("model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)))
|
30 |
+
# Scores
|
31 |
+
auto_eval_column_dict.append(("average", ColumnContent, ColumnContent("Average ⬆️", "number", True)))
|
32 |
for task in Tasks:
|
33 |
+
auto_eval_column_dict.append((task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)))
|
34 |
+
# Dashboard
|
35 |
+
auto_eval_column_dict.append(("dashboard_link", ColumnContent, ColumnContent("Dashboard", "markdown", False)))
|
36 |
# Model information
|
37 |
+
auto_eval_column_dict.append(("model_type", ColumnContent, ColumnContent("Type", "str", False)))
|
38 |
+
auto_eval_column_dict.append(("architecture", ColumnContent, ColumnContent("Architecture", "str", False)))
|
39 |
+
auto_eval_column_dict.append(("weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)))
|
40 |
+
auto_eval_column_dict.append(("precision", ColumnContent, ColumnContent("Precision", "str", False)))
|
41 |
+
auto_eval_column_dict.append(("license", ColumnContent, ColumnContent("Hub License", "str", False)))
|
42 |
+
auto_eval_column_dict.append(("params", ColumnContent, ColumnContent("#Params (B)", "number", False)))
|
43 |
+
auto_eval_column_dict.append(("likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)))
|
44 |
+
auto_eval_column_dict.append(("still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)))
|
45 |
+
auto_eval_column_dict.append(("revision", ColumnContent, ColumnContent("Model sha", "str", False, False)))
|
46 |
# Dummy column for the search bar (hidden by the custom CSS)
|
47 |
+
auto_eval_column_dict.append(("dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)))
|
48 |
|
49 |
# We use make dataclass to dynamically fill the scores from Tasks
|
50 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
|
|
56 |
revision = ColumnContent("revision", "str", True)
|
57 |
private = ColumnContent("private", "bool", True)
|
58 |
precision = ColumnContent("precision", "str", True)
|
59 |
+
weight_type = ColumnContent("weight_type", "str", True)
|
60 |
status = ColumnContent("status", "str", True)
|
61 |
|
62 |
## All the model information that we might need
|
src/envs.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import os
|
2 |
|
3 |
-
from huggingface_hub import HfApi
|
4 |
|
5 |
# clone / pull the lmeh eval data
|
6 |
TOKEN = os.environ.get("TOKEN", None)
|
@@ -11,9 +11,11 @@ REPO_ID = f"{OWNER}/open_cot_leaderboard"
|
|
11 |
QUEUE_REPO = f"{DATA_OWNER}/cot-leaderboard-requests"
|
12 |
RESULTS_REPO = f"{DATA_OWNER}/cot-leaderboard-results"
|
13 |
|
14 |
-
|
15 |
CACHE_PATH=os.getenv("HF_HOME", ".")
|
16 |
|
|
|
|
|
|
|
17 |
# Local caches
|
18 |
EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
|
19 |
EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
|
|
|
1 |
import os
|
2 |
|
3 |
+
from huggingface_hub import HfApi # type: ignore
|
4 |
|
5 |
# clone / pull the lmeh eval data
|
6 |
TOKEN = os.environ.get("TOKEN", None)
|
|
|
11 |
QUEUE_REPO = f"{DATA_OWNER}/cot-leaderboard-requests"
|
12 |
RESULTS_REPO = f"{DATA_OWNER}/cot-leaderboard-results"
|
13 |
|
|
|
14 |
CACHE_PATH=os.getenv("HF_HOME", ".")
|
15 |
|
16 |
+
# Dashboard
|
17 |
+
DASHBOARD_LINK = "https://huggingface.co/cot-leaderboard/open-cot-dashboard?model={model_id}"
|
18 |
+
|
19 |
# Local caches
|
20 |
EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
|
21 |
EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
|
src/leaderboard/read_evals.py
CHANGED
@@ -1,13 +1,12 @@
|
|
1 |
import glob
|
2 |
import json
|
3 |
-
import math
|
4 |
import os
|
5 |
from dataclasses import dataclass
|
6 |
|
7 |
-
import dateutil
|
8 |
import numpy as np
|
9 |
|
10 |
-
from src.display.formatting import make_clickable_model
|
11 |
from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType
|
12 |
from src.submission.check_validity import is_model_on_hub
|
13 |
from src.envs import TOKEN
|
@@ -117,6 +116,7 @@ class EvalResult:
|
|
117 |
AutoEvalColumn.weight_type.name: self.weight_type.value.name,
|
118 |
AutoEvalColumn.architecture.name: self.architecture,
|
119 |
AutoEvalColumn.model.name: make_clickable_model(self.full_model),
|
|
|
120 |
AutoEvalColumn.dummy.name: self.full_model,
|
121 |
AutoEvalColumn.revision.name: self.revision,
|
122 |
AutoEvalColumn.average.name: average,
|
@@ -172,7 +172,7 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
172 |
for file in files:
|
173 |
model_result_filepaths.append(os.path.join(root, file))
|
174 |
|
175 |
-
eval_results = {}
|
176 |
for model_result_filepath in model_result_filepaths:
|
177 |
# Creation of result
|
178 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
|
|
1 |
import glob
|
2 |
import json
|
|
|
3 |
import os
|
4 |
from dataclasses import dataclass
|
5 |
|
6 |
+
import dateutil # type: ignore
|
7 |
import numpy as np
|
8 |
|
9 |
+
from src.display.formatting import make_clickable_model, model_dashboard_hyperlink
|
10 |
from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType
|
11 |
from src.submission.check_validity import is_model_on_hub
|
12 |
from src.envs import TOKEN
|
|
|
116 |
AutoEvalColumn.weight_type.name: self.weight_type.value.name,
|
117 |
AutoEvalColumn.architecture.name: self.architecture,
|
118 |
AutoEvalColumn.model.name: make_clickable_model(self.full_model),
|
119 |
+
AutoEvalColumn.dashboard_link.name: model_dashboard_hyperlink(self.full_model),
|
120 |
AutoEvalColumn.dummy.name: self.full_model,
|
121 |
AutoEvalColumn.revision.name: self.revision,
|
122 |
AutoEvalColumn.average.name: average,
|
|
|
172 |
for file in files:
|
173 |
model_result_filepaths.append(os.path.join(root, file))
|
174 |
|
175 |
+
eval_results: dict[str, EvalResult] = {}
|
176 |
for model_result_filepath in model_result_filepaths:
|
177 |
# Creation of result
|
178 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|