Clémentine commited on
Commit
6eaad72
·
1 Parent(s): d350941

added precision

Browse files
src/auto_leaderboard/load_results.py CHANGED
@@ -26,7 +26,7 @@ class EvalResult:
26
  model: str
27
  revision: str
28
  results: dict
29
- precision: str = "16bit"
30
  model_type: str = ""
31
  weight_type: str = ""
32
 
@@ -77,16 +77,18 @@ def parse_eval_result(json_filepath: str) -> Tuple[str, list[dict]]:
77
  eval_sha = config.get("lighteval_sha", "")
78
  model_split = model.split("/", 1)
79
 
 
 
80
  model = model_split[-1]
81
 
82
  if len(model_split) == 1:
83
  org = None
84
  model = model_split[0]
85
- result_key = f"{model}_{model_sha}_{eval_sha}"
86
  else:
87
  org = model_split[0]
88
  model = model_split[1]
89
- result_key = f"{org}_{model}_{model_sha}_{eval_sha}"
90
 
91
  eval_results = []
92
  for benchmark, metric in zip(BENCHMARKS, METRICS):
@@ -95,7 +97,7 @@ def parse_eval_result(json_filepath: str) -> Tuple[str, list[dict]]:
95
  continue
96
  mean_acc = np.mean(accs) * 100.0
97
  eval_results.append(EvalResult(
98
- eval_name=result_key, org=org, model=model, revision=model_sha, results={benchmark: mean_acc}, #todo model_type=, weight_type=
99
  ))
100
 
101
  return result_key, eval_results
@@ -110,14 +112,15 @@ def get_eval_results(is_public) -> List[EvalResult]:
110
  continue
111
 
112
  # Sort the files by date
 
113
  try:
114
  files.sort(key=lambda x: dateutil.parser.parse(x.split("_", 1)[-1][:-5]))
115
  except dateutil.parser._parser.ParserError:
116
- up_to_date = files[-1]
117
-
118
- up_to_date = files[-1]
119
 
120
- json_filepaths.append(os.path.join(root, up_to_date))
 
 
121
 
122
  eval_results = {}
123
  for json_filepath in json_filepaths:
 
26
  model: str
27
  revision: str
28
  results: dict
29
+ precision: str = ""
30
  model_type: str = ""
31
  weight_type: str = ""
32
 
 
77
  eval_sha = config.get("lighteval_sha", "")
78
  model_split = model.split("/", 1)
79
 
80
+ precision = config.get("model_dtype")
81
+
82
  model = model_split[-1]
83
 
84
  if len(model_split) == 1:
85
  org = None
86
  model = model_split[0]
87
+ result_key = f"{model}_{model_sha}_{eval_sha}_{precision}"
88
  else:
89
  org = model_split[0]
90
  model = model_split[1]
91
+ result_key = f"{org}_{model}_{model_sha}_{eval_sha}_{precision}"
92
 
93
  eval_results = []
94
  for benchmark, metric in zip(BENCHMARKS, METRICS):
 
97
  continue
98
  mean_acc = np.mean(accs) * 100.0
99
  eval_results.append(EvalResult(
100
+ eval_name=result_key, org=org, model=model, revision=model_sha, results={benchmark: mean_acc}, precision=precision, #todo model_type=, weight_type=
101
  ))
102
 
103
  return result_key, eval_results
 
112
  continue
113
 
114
  # Sort the files by date
115
+ # store results by precision maybe?
116
  try:
117
  files.sort(key=lambda x: dateutil.parser.parse(x.split("_", 1)[-1][:-5]))
118
  except dateutil.parser._parser.ParserError:
119
+ files = [files[-1]]
 
 
120
 
121
+ #up_to_date = files[-1]
122
+ for file in files:
123
+ json_filepaths.append(os.path.join(root, file))
124
 
125
  eval_results = {}
126
  for json_filepath in json_filepaths:
src/utils_display.py CHANGED
@@ -22,7 +22,7 @@ class AutoEvalColumn: # Auto evals column
22
  mmlu = ColumnContent("MMLU", "number", True)
23
  truthfulqa = ColumnContent("TruthfulQA", "number", True)
24
  model_type = ColumnContent("Type", "str", False)
25
- precision = ColumnContent("Precision", "str", False, True)
26
  license = ColumnContent("Hub License", "str", False)
27
  params = ColumnContent("#Params (B)", "number", False)
28
  likes = ColumnContent("Hub ❤️", "number", False)
@@ -43,7 +43,7 @@ class EvalQueueColumn: # Queue column
43
  model = ColumnContent("model", "markdown", True)
44
  revision = ColumnContent("revision", "str", True)
45
  private = ColumnContent("private", "bool", True)
46
- precision = ColumnContent("precision", "bool", True)
47
  weight_type = ColumnContent("weight_type", "str", "Original")
48
  status = ColumnContent("status", "str", True)
49
 
 
22
  mmlu = ColumnContent("MMLU", "number", True)
23
  truthfulqa = ColumnContent("TruthfulQA", "number", True)
24
  model_type = ColumnContent("Type", "str", False)
25
+ precision = ColumnContent("Precision", "str", False) #, True)
26
  license = ColumnContent("Hub License", "str", False)
27
  params = ColumnContent("#Params (B)", "number", False)
28
  likes = ColumnContent("Hub ❤️", "number", False)
 
43
  model = ColumnContent("model", "markdown", True)
44
  revision = ColumnContent("revision", "str", True)
45
  private = ColumnContent("private", "bool", True)
46
+ precision = ColumnContent("precision", "str", True)
47
  weight_type = ColumnContent("weight_type", "str", "Original")
48
  status = ColumnContent("status", "str", True)
49