Spaces:
AIR-Bench
/
Running on CPU Upgrade

nan commited on
Commit
a7c0332
·
1 Parent(s): 2d272e2

refactor: refactor the column settings

Browse files
src/display/column_names.py DELETED
@@ -1,9 +0,0 @@
1
- COL_NAME_AVG = "Average ⬆️"
2
- COL_NAME_RETRIEVAL_MODEL = "Retrieval Method"
3
- COL_NAME_RERANKING_MODEL = "Reranking Model"
4
- COL_NAME_RETRIEVAL_MODEL_LINK = "Retrieval Model LINK"
5
- COL_NAME_RERANKING_MODEL_LINK = "Reranking Model LINK"
6
- COL_NAME_RANK = "Rank 🏆"
7
- COL_NAME_REVISION = "Revision"
8
- COL_NAME_TIMESTAMP = "Submission Date"
9
- COL_NAME_IS_ANONYMOUS = "Anonymous Submission"
 
 
 
 
 
 
 
 
 
 
src/display/{utils.py → columns.py} RENAMED
@@ -1,9 +1,16 @@
1
  from dataclasses import dataclass, make_dataclass
2
 
3
  from src.benchmarks import BenchmarksQA, BenchmarksLongDoc
4
- from src.display.column_names import COL_NAME_AVG, COL_NAME_RETRIEVAL_MODEL, COL_NAME_RERANKING_MODEL, \
5
- COL_NAME_RETRIEVAL_MODEL_LINK, COL_NAME_RERANKING_MODEL_LINK, COL_NAME_RANK, COL_NAME_REVISION, COL_NAME_TIMESTAMP, \
6
- COL_NAME_IS_ANONYMOUS
 
 
 
 
 
 
 
7
 
8
 
9
  def fields(raw_class):
@@ -29,10 +36,12 @@ def get_default_auto_eval_column_dict():
29
  ["rank", ColumnContent, ColumnContent(COL_NAME_RANK, "number", True)]
30
  )
31
  auto_eval_column_dict.append(
32
- ["retrieval_model", ColumnContent, ColumnContent(COL_NAME_RETRIEVAL_MODEL, "markdown", True, hidden=False, never_hidden=True)]
 
33
  )
34
  auto_eval_column_dict.append(
35
- ["reranking_model", ColumnContent, ColumnContent(COL_NAME_RERANKING_MODEL, "markdown", True, hidden=False, never_hidden=True)]
 
36
  )
37
  auto_eval_column_dict.append(
38
  ["revision", ColumnContent, ColumnContent(COL_NAME_REVISION, "markdown", True, never_hidden=True)]
@@ -44,10 +53,12 @@ def get_default_auto_eval_column_dict():
44
  ["average", ColumnContent, ColumnContent(COL_NAME_AVG, "number", True)]
45
  )
46
  auto_eval_column_dict.append(
47
- ["retrieval_model_link", ColumnContent, ColumnContent(COL_NAME_RETRIEVAL_MODEL_LINK, "markdown", False, hidden=True, never_hidden=False)]
 
48
  )
49
  auto_eval_column_dict.append(
50
- ["reranking_model_link", ColumnContent, ColumnContent(COL_NAME_RERANKING_MODEL_LINK, "markdown", False, hidden=True, never_hidden=False)]
 
51
  )
52
  auto_eval_column_dict.append(
53
  ["is_anonymous", ColumnContent, ColumnContent(COL_NAME_IS_ANONYMOUS, "bool", False, hidden=True)]
@@ -72,6 +83,10 @@ AutoEvalColumnQA = make_autoevalcolumn(
72
  AutoEvalColumnLongDoc = make_autoevalcolumn(
73
  "AutoEvalColumnLongDoc", BenchmarksLongDoc)
74
 
 
 
 
 
75
 
76
  # Column selection
77
  COLS_QA = [c.name for c in fields(AutoEvalColumnQA) if not c.hidden]
@@ -79,5 +94,3 @@ COLS_LONG_DOC = [c.name for c in fields(AutoEvalColumnLongDoc) if not c.hidden]
79
  TYPES_QA = [c.type for c in fields(AutoEvalColumnQA) if not c.hidden]
80
  TYPES_LONG_DOC = [c.type for c in fields(AutoEvalColumnLongDoc) if not c.hidden]
81
  COLS_LITE = [c.name for c in fields(AutoEvalColumnQA) if c.displayed_by_default and not c.hidden]
82
-
83
-
 
1
  from dataclasses import dataclass, make_dataclass
2
 
3
  from src.benchmarks import BenchmarksQA, BenchmarksLongDoc
4
+
5
+ COL_NAME_AVG = "Average ⬆️"
6
+ COL_NAME_RETRIEVAL_MODEL = "Retrieval Method"
7
+ COL_NAME_RERANKING_MODEL = "Reranking Model"
8
+ COL_NAME_RETRIEVAL_MODEL_LINK = "Retrieval Model LINK"
9
+ COL_NAME_RERANKING_MODEL_LINK = "Reranking Model LINK"
10
+ COL_NAME_RANK = "Rank 🏆"
11
+ COL_NAME_REVISION = "Revision"
12
+ COL_NAME_TIMESTAMP = "Submission Date"
13
+ COL_NAME_IS_ANONYMOUS = "Anonymous Submission"
14
 
15
 
16
  def fields(raw_class):
 
36
  ["rank", ColumnContent, ColumnContent(COL_NAME_RANK, "number", True)]
37
  )
38
  auto_eval_column_dict.append(
39
+ ["retrieval_model", ColumnContent,
40
+ ColumnContent(COL_NAME_RETRIEVAL_MODEL, "markdown", True, hidden=False, never_hidden=True)]
41
  )
42
  auto_eval_column_dict.append(
43
+ ["reranking_model", ColumnContent,
44
+ ColumnContent(COL_NAME_RERANKING_MODEL, "markdown", True, hidden=False, never_hidden=True)]
45
  )
46
  auto_eval_column_dict.append(
47
  ["revision", ColumnContent, ColumnContent(COL_NAME_REVISION, "markdown", True, never_hidden=True)]
 
53
  ["average", ColumnContent, ColumnContent(COL_NAME_AVG, "number", True)]
54
  )
55
  auto_eval_column_dict.append(
56
+ ["retrieval_model_link", ColumnContent,
57
+ ColumnContent(COL_NAME_RETRIEVAL_MODEL_LINK, "markdown", False, hidden=True, never_hidden=False)]
58
  )
59
  auto_eval_column_dict.append(
60
+ ["reranking_model_link", ColumnContent,
61
+ ColumnContent(COL_NAME_RERANKING_MODEL_LINK, "markdown", False, hidden=True, never_hidden=False)]
62
  )
63
  auto_eval_column_dict.append(
64
  ["is_anonymous", ColumnContent, ColumnContent(COL_NAME_IS_ANONYMOUS, "bool", False, hidden=True)]
 
83
  AutoEvalColumnLongDoc = make_autoevalcolumn(
84
  "AutoEvalColumnLongDoc", BenchmarksLongDoc)
85
 
86
+ fixed_cols = get_default_auto_eval_column_dict()[:-3]
87
+
88
+ FIXED_COLS = [c.name for _, _, c in fixed_cols]
89
+ FIXED_COLS_TYPES = [c.type for _, _, c in fixed_cols]
90
 
91
  # Column selection
92
  COLS_QA = [c.name for c in fields(AutoEvalColumnQA) if not c.hidden]
 
94
  TYPES_QA = [c.type for c in fields(AutoEvalColumnQA) if not c.hidden]
95
  TYPES_LONG_DOC = [c.type for c in fields(AutoEvalColumnLongDoc) if not c.hidden]
96
  COLS_LITE = [c.name for c in fields(AutoEvalColumnQA) if c.displayed_by_default and not c.hidden]
 
 
src/loaders.py CHANGED
@@ -4,8 +4,7 @@ from typing import List
4
  import pandas as pd
5
 
6
  from src.benchmarks import DEFAULT_METRIC_QA, DEFAULT_METRIC_LONG_DOC
7
- from src.display.column_names import COL_NAME_REVISION, COL_NAME_IS_ANONYMOUS, \
8
- COL_NAME_TIMESTAMP
9
 
10
  from src.models import FullEvalResult, LeaderboardDataStore
11
  from src.utils import get_default_cols, get_leaderboard_df
@@ -13,7 +12,6 @@ from src.utils import get_default_cols, get_leaderboard_df
13
  pd.options.mode.copy_on_write = True
14
 
15
 
16
-
17
  def load_raw_eval_results(results_path: str) -> List[FullEvalResult]:
18
  """
19
  Load the evaluation results from a json file
 
4
  import pandas as pd
5
 
6
  from src.benchmarks import DEFAULT_METRIC_QA, DEFAULT_METRIC_LONG_DOC
7
+ from src.display.columns import COL_NAME_REVISION, COL_NAME_TIMESTAMP, COL_NAME_IS_ANONYMOUS
 
8
 
9
  from src.models import FullEvalResult, LeaderboardDataStore
10
  from src.utils import get_default_cols, get_leaderboard_df
 
12
  pd.options.mode.copy_on_write = True
13
 
14
 
 
15
  def load_raw_eval_results(results_path: str) -> List[FullEvalResult]:
16
  """
17
  Load the evaluation results from a json file
src/models.py CHANGED
@@ -6,7 +6,7 @@ from typing import List, Optional
6
  import pandas as pd
7
 
8
  from src.benchmarks import get_safe_name
9
- from src.display.column_names import COL_NAME_RETRIEVAL_MODEL, COL_NAME_RERANKING_MODEL, COL_NAME_RETRIEVAL_MODEL_LINK, \
10
  COL_NAME_RERANKING_MODEL_LINK, COL_NAME_REVISION, COL_NAME_TIMESTAMP, COL_NAME_IS_ANONYMOUS
11
  from src.display.formatting import make_clickable_model
12
 
 
6
  import pandas as pd
7
 
8
  from src.benchmarks import get_safe_name
9
+ from src.display.columns import COL_NAME_RETRIEVAL_MODEL, COL_NAME_RERANKING_MODEL, COL_NAME_RETRIEVAL_MODEL_LINK, \
10
  COL_NAME_RERANKING_MODEL_LINK, COL_NAME_REVISION, COL_NAME_TIMESTAMP, COL_NAME_IS_ANONYMOUS
11
  from src.display.formatting import make_clickable_model
12
 
src/utils.py CHANGED
@@ -8,9 +8,9 @@ import pandas as pd
8
 
9
  from src.benchmarks import qa_benchmark_dict, long_doc_benchmark_dict, BenchmarksQA, BenchmarksLongDoc
10
  from src.display.formatting import styled_message, styled_error
11
- from src.display.utils import COLS_QA, TYPES_QA, COLS_LONG_DOC, TYPES_LONG_DOC, get_default_auto_eval_column_dict
12
- from src.display.column_names import COL_NAME_AVG, COL_NAME_RETRIEVAL_MODEL, COL_NAME_RERANKING_MODEL, COL_NAME_RANK, \
13
- COL_NAME_REVISION, COL_NAME_TIMESTAMP, COL_NAME_IS_ANONYMOUS
14
  from src.envs import API, SEARCH_RESULTS_REPO, LATEST_BENCHMARK_VERSION
15
  from src.models import FullEvalResult
16
 
@@ -96,10 +96,7 @@ def get_default_cols(task: str, columns: list=[], add_fix_cols: bool=True) -> li
96
  return cols, types
97
 
98
 
99
- fixed_cols = get_default_auto_eval_column_dict()[:-3]
100
 
101
- FIXED_COLS = [c.name for _, _, c in fixed_cols]
102
- FIXED_COLS_TYPES = [c.type for _, _, c in fixed_cols]
103
 
104
 
105
  def select_columns(
 
8
 
9
  from src.benchmarks import qa_benchmark_dict, long_doc_benchmark_dict, BenchmarksQA, BenchmarksLongDoc
10
  from src.display.formatting import styled_message, styled_error
11
+ from src.display.columns import COL_NAME_AVG, COL_NAME_RETRIEVAL_MODEL, COL_NAME_RERANKING_MODEL, COL_NAME_RANK, \
12
+ COL_NAME_REVISION, COL_NAME_TIMESTAMP, COL_NAME_IS_ANONYMOUS, COLS_QA, TYPES_QA, COLS_LONG_DOC, TYPES_LONG_DOC, \
13
+ FIXED_COLS, FIXED_COLS_TYPES
14
  from src.envs import API, SEARCH_RESULTS_REPO, LATEST_BENCHMARK_VERSION
15
  from src.models import FullEvalResult
16
 
 
96
  return cols, types
97
 
98
 
 
99
 
 
 
100
 
101
 
102
  def select_columns(
tests/test_utils.py CHANGED
@@ -2,7 +2,7 @@ import pandas as pd
2
  import pytest
3
 
4
  from src.utils import filter_models, search_table, filter_queries, select_columns, update_table_long_doc, get_iso_format_timestamp, get_default_cols, update_table
5
- from src.display.column_names import COL_NAME_AVG, COL_NAME_RETRIEVAL_MODEL, COL_NAME_RERANKING_MODEL, COL_NAME_RANK, \
6
  COL_NAME_REVISION, COL_NAME_TIMESTAMP, COL_NAME_IS_ANONYMOUS
7
 
8
 
 
2
  import pytest
3
 
4
  from src.utils import filter_models, search_table, filter_queries, select_columns, update_table_long_doc, get_iso_format_timestamp, get_default_cols, update_table
5
+ from src.display.columns import COL_NAME_AVG, COL_NAME_RETRIEVAL_MODEL, COL_NAME_RERANKING_MODEL, COL_NAME_RANK, \
6
  COL_NAME_REVISION, COL_NAME_TIMESTAMP, COL_NAME_IS_ANONYMOUS
7
 
8