Aaron Mueller commited on
Commit
891a1ea
·
1 Parent(s): b026e8b

remove ModelType

Browse files
Files changed (1) hide show
  1. src/leaderboard/read_evals.py +21 -42
src/leaderboard/read_evals.py CHANGED
@@ -8,7 +8,7 @@ import dateutil
8
  import numpy as np
9
 
10
  from src.display.formatting import make_clickable_model
11
- from src.display.utils import AutoEvalColumn, ModelType, Tasks, TasksMultimodal, Precision, WeightType
12
  from src.submission.check_validity import is_model_on_hub
13
 
14
 
@@ -22,13 +22,6 @@ class EvalResult:
22
  model: str
23
  revision: str # commit hash, "" if main
24
  results: dict
25
- precision: Precision = Precision.Unknown
26
- model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
27
- weight_type: WeightType = WeightType.Original # Original or Adapter
28
- architecture: str = "Unknown"
29
- license: str = "?"
30
- likes: int = 0
31
- num_params: int = 0
32
  date: str = "" # submission date of request file
33
  still_on_hub: bool = False
34
 
@@ -41,9 +34,6 @@ class EvalResult:
41
  config = data.get("config")
42
  track = data.get("track")
43
 
44
- # Precision
45
- precision = Precision.from_str(config.get("model_dtype"))
46
-
47
  # Get model and org
48
  org_and_model = config.get("model_name", config.get("model_args", None))
49
  org_and_model = org_and_model.split("/", 1)
@@ -51,45 +41,47 @@ class EvalResult:
51
  if len(org_and_model) == 1:
52
  org = None
53
  model = org_and_model[0]
54
- result_key = f"{model}_{precision.value.name}"
55
  else:
56
  org = org_and_model[0]
57
  model = org_and_model[1]
58
- result_key = f"{org}_{model}_{precision.value.name}"
59
  full_model = "/".join(org_and_model)
60
 
61
  still_on_hub, _, model_config = is_model_on_hub(
62
  full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
63
  )
64
- architecture = "?"
65
- if model_config is not None:
66
- architectures = getattr(model_config, "architectures", None)
67
- if architectures:
68
- architecture = ";".join(architectures)
69
-
70
- # Extract results available in this file (some results are split in several files)
71
- results = {}
72
- for task in Tasks:
73
- task = task.value
74
 
 
75
  # We average all scores of a given metric (not all metrics are present in all files)
76
  accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
77
  if accs.size == 0 or any([acc is None for acc in accs]):
78
- continue
79
 
80
  mean_acc = np.mean(accs) * 100.0
81
- results[task.benchmark] = mean_acc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
  return self(
84
- eval_name=result_key,
85
  full_model=full_model,
86
  org=org,
87
  model=model,
88
  results=results,
89
- precision=precision,
90
- revision= config.get("model_sha", ""),
91
  still_on_hub=still_on_hub,
92
- architecture=architecture
93
  )
94
 
95
  def update_with_request_file(self, requests_path):
@@ -99,11 +91,6 @@ class EvalResult:
99
  try:
100
  with open(request_file, "r") as f:
101
  request = json.load(f)
102
- self.model_type = ModelType.from_str(request.get("model_type", ""))
103
- self.weight_type = WeightType[request.get("weight_type", "Original")]
104
- self.license = request.get("license", "?")
105
- self.likes = request.get("likes", 0)
106
- self.num_params = request.get("params", 0)
107
  self.date = request.get("submitted_time", "")
108
  except Exception:
109
  print(f"Could not find request file for {self.org}/{self.model} with precision {self.precision.value.name}")
@@ -113,17 +100,9 @@ class EvalResult:
113
  average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
114
  data_dict = {
115
  "eval_name": self.eval_name, # not a column, just a save name,
116
- AutoEvalColumn.precision.name: self.precision.value.name,
117
- AutoEvalColumn.model_type.name: self.model_type.value.name,
118
- AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
119
- AutoEvalColumn.weight_type.name: self.weight_type.value.name,
120
- AutoEvalColumn.architecture.name: self.architecture,
121
  AutoEvalColumn.model.name: make_clickable_model(self.full_model),
122
  AutoEvalColumn.revision.name: self.revision,
123
  AutoEvalColumn.average.name: average,
124
- AutoEvalColumn.license.name: self.license,
125
- AutoEvalColumn.likes.name: self.likes,
126
- AutoEvalColumn.params.name: self.num_params,
127
  AutoEvalColumn.still_on_hub.name: self.still_on_hub,
128
  }
129
 
 
8
  import numpy as np
9
 
10
  from src.display.formatting import make_clickable_model
11
+ from src.display.utils import AutoEvalColumn, Tasks, TasksMultimodal
12
  from src.submission.check_validity import is_model_on_hub
13
 
14
 
 
22
  model: str
23
  revision: str # commit hash, "" if main
24
  results: dict
 
 
 
 
 
 
 
25
  date: str = "" # submission date of request file
26
  still_on_hub: bool = False
27
 
 
34
  config = data.get("config")
35
  track = data.get("track")
36
 
 
 
 
37
  # Get model and org
38
  org_and_model = config.get("model_name", config.get("model_args", None))
39
  org_and_model = org_and_model.split("/", 1)
 
41
  if len(org_and_model) == 1:
42
  org = None
43
  model = org_and_model[0]
 
44
  else:
45
  org = org_and_model[0]
46
  model = org_and_model[1]
 
47
  full_model = "/".join(org_and_model)
48
 
49
  still_on_hub, _, model_config = is_model_on_hub(
50
  full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
51
  )
 
 
 
 
 
 
 
 
 
 
52
 
53
+ def _get_task_results(task):
54
  # We average all scores of a given metric (not all metrics are present in all files)
55
  accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
56
  if accs.size == 0 or any([acc is None for acc in accs]):
57
+ return None
58
 
59
  mean_acc = np.mean(accs) * 100.0
60
+ return mean_acc
61
+
62
+ # Extract results available in this file (some results are split in several files)
63
+ results = {}
64
+ if track.lower() == "multimodal":
65
+ for task in TasksMultimodal:
66
+ task = task.value
67
+ task_result = _get_task_results(task)
68
+ if task_result is not None:
69
+ results[task.benchmark] = task_result
70
+ else:
71
+ for task in Tasks:
72
+ task = task.value
73
+ task_result = _get_task_results(task)
74
+ if task_result is not None:
75
+ results[task.benchmark] = task_result
76
+
77
 
78
  return self(
 
79
  full_model=full_model,
80
  org=org,
81
  model=model,
82
  results=results,
83
+ revision=config.get("model_sha", ""),
 
84
  still_on_hub=still_on_hub,
 
85
  )
86
 
87
  def update_with_request_file(self, requests_path):
 
91
  try:
92
  with open(request_file, "r") as f:
93
  request = json.load(f)
 
 
 
 
 
94
  self.date = request.get("submitted_time", "")
95
  except Exception:
96
  print(f"Could not find request file for {self.org}/{self.model} with precision {self.precision.value.name}")
 
100
  average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
101
  data_dict = {
102
  "eval_name": self.eval_name, # not a column, just a save name,
 
 
 
 
 
103
  AutoEvalColumn.model.name: make_clickable_model(self.full_model),
104
  AutoEvalColumn.revision.name: self.revision,
105
  AutoEvalColumn.average.name: average,
 
 
 
106
  AutoEvalColumn.still_on_hub.name: self.still_on_hub,
107
  }
108