Gregor Betz commited on
Commit
c848631
1 Parent(s): 60b3a41

add dashboard

Browse files
src/display/formatting.py CHANGED
@@ -1,12 +1,8 @@
1
- import os
2
- from datetime import datetime, timezone
3
 
4
- from huggingface_hub import HfApi
5
- from huggingface_hub.hf_api import ModelInfo
6
 
7
 
8
- API = HfApi()
9
-
10
  def model_hyperlink(link, model_name):
11
  return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
12
 
@@ -16,6 +12,12 @@ def make_clickable_model(model_name):
16
  return model_hyperlink(link, model_name)
17
 
18
 
 
 
 
 
 
 
19
  def styled_error(error):
20
  return f"<p style='color: red; font-size: 20px; text-align: center;'>{error}</p>"
21
 
 
1
+ # utility functions for formatting text and data for display in the leaderboard
 
2
 
3
+ from src.envs import DASHBOARD_LINK
 
4
 
5
 
 
 
6
  def model_hyperlink(link, model_name):
7
  return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
8
 
 
12
  return model_hyperlink(link, model_name)
13
 
14
 
15
+ def model_dashboard_hyperlink(model_name):
16
+ link = DASHBOARD_LINK.format(model_id=model_name)
17
+ html_link = f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">show in 📊</a>'
18
+ return html_link
19
+
20
+
21
  def styled_error(error):
22
  return f"<p style='color: red; font-size: 20px; text-align: center;'>{error}</p>"
23
 
src/display/utils.py CHANGED
@@ -1,7 +1,8 @@
1
  from dataclasses import dataclass, make_dataclass
2
  from enum import Enum
 
3
 
4
- import pandas as pd
5
 
6
  from src.display.about import Tasks
7
 
@@ -22,26 +23,28 @@ class ColumnContent:
22
  dummy: bool = False
23
 
24
  ## Leaderboard columns
25
- auto_eval_column_dict = []
26
  # Init
27
- auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
28
- auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
29
- #Scores
30
- auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
31
  for task in Tasks:
32
- auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
 
 
33
  # Model information
34
- auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
35
- auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
36
- auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
37
- auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
38
- auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
39
- auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
40
- auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
41
- auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
42
- auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
43
  # Dummy column for the search bar (hidden by the custom CSS)
44
- auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)])
45
 
46
  # We use make dataclass to dynamically fill the scores from Tasks
47
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
@@ -53,7 +56,7 @@ class EvalQueueColumn: # Queue column
53
  revision = ColumnContent("revision", "str", True)
54
  private = ColumnContent("private", "bool", True)
55
  precision = ColumnContent("precision", "str", True)
56
- weight_type = ColumnContent("weight_type", "str", "Original")
57
  status = ColumnContent("status", "str", True)
58
 
59
  ## All the model information that we might need
 
1
  from dataclasses import dataclass, make_dataclass
2
  from enum import Enum
3
+ from typing import Any
4
 
5
+ import pandas as pd # type: ignore
6
 
7
  from src.display.about import Tasks
8
 
 
23
  dummy: bool = False
24
 
25
  ## Leaderboard columns
26
+ auto_eval_column_dict: list[tuple[str, type, Any]] = []
27
  # Init
28
+ auto_eval_column_dict.append(("model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)))
29
+ auto_eval_column_dict.append(("model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)))
30
+ # Scores
31
+ auto_eval_column_dict.append(("average", ColumnContent, ColumnContent("Average ⬆️", "number", True)))
32
  for task in Tasks:
33
+ auto_eval_column_dict.append((task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)))
34
+ # Dashboard
35
+ auto_eval_column_dict.append(("dashboard_link", ColumnContent, ColumnContent("Dashboard", "markdown", False)))
36
  # Model information
37
+ auto_eval_column_dict.append(("model_type", ColumnContent, ColumnContent("Type", "str", False)))
38
+ auto_eval_column_dict.append(("architecture", ColumnContent, ColumnContent("Architecture", "str", False)))
39
+ auto_eval_column_dict.append(("weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)))
40
+ auto_eval_column_dict.append(("precision", ColumnContent, ColumnContent("Precision", "str", False)))
41
+ auto_eval_column_dict.append(("license", ColumnContent, ColumnContent("Hub License", "str", False)))
42
+ auto_eval_column_dict.append(("params", ColumnContent, ColumnContent("#Params (B)", "number", False)))
43
+ auto_eval_column_dict.append(("likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)))
44
+ auto_eval_column_dict.append(("still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)))
45
+ auto_eval_column_dict.append(("revision", ColumnContent, ColumnContent("Model sha", "str", False, False)))
46
  # Dummy column for the search bar (hidden by the custom CSS)
47
+ auto_eval_column_dict.append(("dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)))
48
 
49
  # We use make dataclass to dynamically fill the scores from Tasks
50
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
 
56
  revision = ColumnContent("revision", "str", True)
57
  private = ColumnContent("private", "bool", True)
58
  precision = ColumnContent("precision", "str", True)
59
+ weight_type = ColumnContent("weight_type", "str", True)
60
  status = ColumnContent("status", "str", True)
61
 
62
  ## All the model information that we might need
src/envs.py CHANGED
@@ -1,6 +1,6 @@
1
  import os
2
 
3
- from huggingface_hub import HfApi
4
 
5
  # clone / pull the lmeh eval data
6
  TOKEN = os.environ.get("TOKEN", None)
@@ -11,9 +11,11 @@ REPO_ID = f"{OWNER}/open_cot_leaderboard"
11
  QUEUE_REPO = f"{DATA_OWNER}/cot-leaderboard-requests"
12
  RESULTS_REPO = f"{DATA_OWNER}/cot-leaderboard-results"
13
 
14
-
15
  CACHE_PATH=os.getenv("HF_HOME", ".")
16
 
 
 
 
17
  # Local caches
18
  EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
19
  EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
 
1
  import os
2
 
3
+ from huggingface_hub import HfApi # type: ignore
4
 
5
  # clone / pull the lmeh eval data
6
  TOKEN = os.environ.get("TOKEN", None)
 
11
  QUEUE_REPO = f"{DATA_OWNER}/cot-leaderboard-requests"
12
  RESULTS_REPO = f"{DATA_OWNER}/cot-leaderboard-results"
13
 
 
14
  CACHE_PATH=os.getenv("HF_HOME", ".")
15
 
16
+ # Dashboard
17
+ DASHBOARD_LINK = "https://huggingface.co/cot-leaderboard/open-cot-dashboard?model={model_id}"
18
+
19
  # Local caches
20
  EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
21
  EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
src/leaderboard/read_evals.py CHANGED
@@ -1,13 +1,12 @@
1
  import glob
2
  import json
3
- import math
4
  import os
5
  from dataclasses import dataclass
6
 
7
- import dateutil
8
  import numpy as np
9
 
10
- from src.display.formatting import make_clickable_model
11
  from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType
12
  from src.submission.check_validity import is_model_on_hub
13
  from src.envs import TOKEN
@@ -117,6 +116,7 @@ class EvalResult:
117
  AutoEvalColumn.weight_type.name: self.weight_type.value.name,
118
  AutoEvalColumn.architecture.name: self.architecture,
119
  AutoEvalColumn.model.name: make_clickable_model(self.full_model),
 
120
  AutoEvalColumn.dummy.name: self.full_model,
121
  AutoEvalColumn.revision.name: self.revision,
122
  AutoEvalColumn.average.name: average,
@@ -172,7 +172,7 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
172
  for file in files:
173
  model_result_filepaths.append(os.path.join(root, file))
174
 
175
- eval_results = {}
176
  for model_result_filepath in model_result_filepaths:
177
  # Creation of result
178
  eval_result = EvalResult.init_from_json_file(model_result_filepath)
 
1
  import glob
2
  import json
 
3
  import os
4
  from dataclasses import dataclass
5
 
6
+ import dateutil # type: ignore
7
  import numpy as np
8
 
9
+ from src.display.formatting import make_clickable_model, model_dashboard_hyperlink
10
  from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType
11
  from src.submission.check_validity import is_model_on_hub
12
  from src.envs import TOKEN
 
116
  AutoEvalColumn.weight_type.name: self.weight_type.value.name,
117
  AutoEvalColumn.architecture.name: self.architecture,
118
  AutoEvalColumn.model.name: make_clickable_model(self.full_model),
119
+ AutoEvalColumn.dashboard_link.name: model_dashboard_hyperlink(self.full_model),
120
  AutoEvalColumn.dummy.name: self.full_model,
121
  AutoEvalColumn.revision.name: self.revision,
122
  AutoEvalColumn.average.name: average,
 
172
  for file in files:
173
  model_result_filepaths.append(os.path.join(root, file))
174
 
175
+ eval_results: dict[str, EvalResult] = {}
176
  for model_result_filepath in model_result_filepaths:
177
  # Creation of result
178
  eval_result = EvalResult.init_from_json_file(model_result_filepath)