Adam Jirkovsky commited on
Commit
b2e7d0b
·
1 Parent(s): 54b05ee

Rename headers during data loading

Browse files
Files changed (2) hide show
  1. src/display/utils.py +40 -18
  2. src/populate.py +2 -1
src/display/utils.py CHANGED
@@ -47,30 +47,52 @@ auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sh
47
  auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)])
48
  """
49
 
50
- auto_eval_column_dict.append(["eval_name", ColumnContent, ColumnContent("eval_name", "str", True, never_hidden=True)])
51
- auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("precision", "str", True)])
52
- auto_eval_column_dict.append(["hf_model_id", ColumnContent, ColumnContent("hf_model_id", "str", True)])
53
- auto_eval_column_dict.append(["agree_cs", ColumnContent, ColumnContent("agree_cs", "number", True)])
54
- auto_eval_column_dict.append(["anli_cs", ColumnContent, ColumnContent("anli_cs", "number", True)])
55
- auto_eval_column_dict.append(["arc_challenge_cs", ColumnContent, ColumnContent("arc_challenge_cs", "number", True)])
56
- auto_eval_column_dict.append(["arc_easy_cs", ColumnContent, ColumnContent("arc_easy_cs", "number", True)])
57
- auto_eval_column_dict.append(["belebele_cs", ColumnContent, ColumnContent("belebele_cs", "number", True)])
58
- auto_eval_column_dict.append(["ctkfacts_cs", ColumnContent, ColumnContent("ctkfacts_cs", "number", True)])
59
- auto_eval_column_dict.append(["czechnews_cs", ColumnContent, ColumnContent("czechnews_cs", "number", True)])
60
- auto_eval_column_dict.append(["fb_comments_cs", ColumnContent, ColumnContent("fb_comments_cs", "number", True)])
61
- auto_eval_column_dict.append(["gsm8k_cs", ColumnContent, ColumnContent("gsm8k_cs", "number", True)])
62
- auto_eval_column_dict.append(["klokanek_cs", ColumnContent, ColumnContent("klokanek_cs", "number", True)])
63
- auto_eval_column_dict.append(["mall_reviews_cs", ColumnContent, ColumnContent("mall_reviews_cs", "number", True)])
64
- auto_eval_column_dict.append(["mmlu_cs", ColumnContent, ColumnContent("mmlu_cs", "number", True)])
65
- auto_eval_column_dict.append(["sqad_cs", ColumnContent, ColumnContent("sqad_cs", "number", True)])
66
- auto_eval_column_dict.append(["subjectivity_cs", ColumnContent, ColumnContent("subjectivity_cs", "number", True)])
67
- auto_eval_column_dict.append(["truthfulqa_cs", ColumnContent, ColumnContent("truthfulqa_cs", "number", True)])
68
 
69
 
70
  # We use make dataclass to dynamically fill the scores from Tasks
71
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
72
 
73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  ## For the queue columns in the submission tab
75
  @dataclass(frozen=True)
76
  class EvalQueueColumn: # Queue column
 
47
  auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)])
48
  """
49
 
50
+ auto_eval_column_dict.append(["eval_name", ColumnContent, ColumnContent("Model", "str", True, never_hidden=True)])
51
+ auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", True)])
52
+ auto_eval_column_dict.append(["hf_model_id", ColumnContent, ColumnContent("Model link (temporary)", "str", True)])
53
+ auto_eval_column_dict.append(["agree_cs", ColumnContent, ColumnContent("AGREE", "number", True)])
54
+ auto_eval_column_dict.append(["anli_cs", ColumnContent, ColumnContent("ANLI", "number", True)])
55
+ auto_eval_column_dict.append(["arc_challenge_cs", ColumnContent, ColumnContent("ARC-Challenge", "number", True)])
56
+ auto_eval_column_dict.append(["arc_easy_cs", ColumnContent, ColumnContent("ARC-Easy", "number", True)])
57
+ auto_eval_column_dict.append(["belebele_cs", ColumnContent, ColumnContent("Belebele", "number", True)])
58
+ auto_eval_column_dict.append(["ctkfacts_cs", ColumnContent, ColumnContent("CTKFacts", "number", True)])
59
+ auto_eval_column_dict.append(["czechnews_cs", ColumnContent, ColumnContent("Czech News", "number", True)])
60
+ auto_eval_column_dict.append(["fb_comments_cs", ColumnContent, ColumnContent("Facebook Comments", "number", True)])
61
+ auto_eval_column_dict.append(["gsm8k_cs", ColumnContent, ColumnContent("GSM8K", "number", True)])
62
+ auto_eval_column_dict.append(["klokanek_cs", ColumnContent, ColumnContent("Klokanek", "number", True)])
63
+ auto_eval_column_dict.append(["mall_reviews_cs", ColumnContent, ColumnContent("Mall Reviews", "number", True)])
64
+ auto_eval_column_dict.append(["mmlu_cs", ColumnContent, ColumnContent("MMLU", "number", True)])
65
+ auto_eval_column_dict.append(["sqad_cs", ColumnContent, ColumnContent("SQAD", "number", True)])
66
+ auto_eval_column_dict.append(["subjectivity_cs", ColumnContent, ColumnContent("Subjectivity", "number", True)])
67
+ auto_eval_column_dict.append(["truthfulqa_cs", ColumnContent, ColumnContent("TruthfulQA", "number", True)])
68
 
69
 
70
  # We use make dataclass to dynamically fill the scores from Tasks
71
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
72
 
73
 
74
+ HEADER_MAP = {
75
+ "eval_name": "Model",
76
+ "precision": "Precision",
77
+ "hf_model_id": "Model link (temporary)",
78
+ "agree_cs": "AGREE",
79
+ "anli_cs": "ANLI",
80
+ "arc_challenge_cs": "ARC-Challenge",
81
+ "arc_easy_cs": "ARC-Easy",
82
+ "belebele_cs": "Belebele",
83
+ "ctkfacts_cs": "CTKFacts",
84
+ "czechnews_cs": "Czech News",
85
+ "fb_comments_cs": "Facebook Comments",
86
+ "gsm8k_cs": "GSM8K",
87
+ "klokanek_cs": "Klokanek",
88
+ "mall_reviews_cs": "Mall Reviews",
89
+ "mmlu_cs": "MMLU",
90
+ "sqad_cs": "SQAD",
91
+ "subjectivity_cs": "Subjectivity",
92
+ "truthfulqa_cs": "TruthfulQA",
93
+ }
94
+
95
+
96
  ## For the queue columns in the submission tab
97
  @dataclass(frozen=True)
98
  class EvalQueueColumn: # Queue column
src/populate.py CHANGED
@@ -4,7 +4,7 @@ import numpy as np
4
  import pandas as pd
5
 
6
  from src.display.formatting import has_no_nan_values, make_clickable_model
7
- from src.display.utils import AutoEvalColumn, EvalQueueColumn
8
  from src.leaderboard.read_evals import get_raw_eval_results
9
 
10
 
@@ -13,6 +13,7 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
13
  #all_data_json = [v.to_dict() for v in raw_data]
14
  df = pd.DataFrame.from_records(raw_data)
15
  #df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
 
16
  df = df[cols].round(decimals=2)
17
  df.replace(r'\s+', np.nan, regex=True)
18
  # filter out if any of the benchmarks have not been produced
 
4
  import pandas as pd
5
 
6
  from src.display.formatting import has_no_nan_values, make_clickable_model
7
+ from src.display.utils import AutoEvalColumn, EvalQueueColumn, HEADER_MAP
8
  from src.leaderboard.read_evals import get_raw_eval_results
9
 
10
 
 
13
  #all_data_json = [v.to_dict() for v in raw_data]
14
  df = pd.DataFrame.from_records(raw_data)
15
  #df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
16
+ df = df.rename(columns=HEADER_MAP)
17
  df = df[cols].round(decimals=2)
18
  df.replace(r'\s+', np.nan, regex=True)
19
  # filter out if any of the benchmarks have not been produced