Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 3,906 Bytes
f766ce9 270c122 7845083 f766ce9 8b7a945 f766ce9 8b7a945 df659d0 9134169 8b7a945 df659d0 a7c0332 8b7a945 9134169 a7c0332 9134169 1a2dba5 df659d0 1a2dba5 df659d0 1a2dba5 9134169 3b83af7 9134169 2edd122 a7c0332 df659d0 a7c0332 2edd122 9400714 df659d0 9400714 32ee53f df659d0 649e0fb 7845083 9134169 f766ce9 9134169 7845083 f766ce9 a7c0332 7845083 8b7a945 f766ce9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
from dataclasses import dataclass, make_dataclass
from src.benchmarks import QABenchmarks, LongDocBenchmarks
from src.envs import COL_NAME_AVG, COL_NAME_RETRIEVAL_MODEL, COL_NAME_RERANKING_MODEL, COL_NAME_RETRIEVAL_MODEL_LINK, \
COL_NAME_RERANKING_MODEL_LINK, COL_NAME_RANK, COL_NAME_REVISION, COL_NAME_TIMESTAMP, COL_NAME_IS_ANONYMOUS
def fields(raw_class):
return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
# These classes are for user facing column names,
# to avoid having to change them all around the code
# when a modification is needed
@dataclass
class ColumnContent:
name: str
type: str
displayed_by_default: bool
hidden: bool = False
never_hidden: bool = False
def get_default_auto_eval_column_dict():
auto_eval_column_dict = []
# Init
auto_eval_column_dict.append(
["rank", ColumnContent, ColumnContent(COL_NAME_RANK, "number", True)]
)
auto_eval_column_dict.append(
["retrieval_model", ColumnContent,
ColumnContent(COL_NAME_RETRIEVAL_MODEL, "markdown", True, hidden=False, never_hidden=True)]
)
auto_eval_column_dict.append(
["reranking_model", ColumnContent,
ColumnContent(COL_NAME_RERANKING_MODEL, "markdown", True, hidden=False, never_hidden=True)]
)
auto_eval_column_dict.append(
["revision", ColumnContent, ColumnContent(COL_NAME_REVISION, "markdown", True, never_hidden=True)]
)
auto_eval_column_dict.append(
["timestamp", ColumnContent, ColumnContent(COL_NAME_TIMESTAMP, "date", True, never_hidden=True)]
)
auto_eval_column_dict.append(
["average", ColumnContent, ColumnContent(COL_NAME_AVG, "number", True)]
)
auto_eval_column_dict.append(
["retrieval_model_link", ColumnContent,
ColumnContent(COL_NAME_RETRIEVAL_MODEL_LINK, "markdown", False, hidden=True, never_hidden=False)]
)
auto_eval_column_dict.append(
["reranking_model_link", ColumnContent,
ColumnContent(COL_NAME_RERANKING_MODEL_LINK, "markdown", False, hidden=True, never_hidden=False)]
)
auto_eval_column_dict.append(
["is_anonymous", ColumnContent, ColumnContent(COL_NAME_IS_ANONYMOUS, "bool", False, hidden=True)]
)
return auto_eval_column_dict
def make_autoevalcolumn(cls_name, benchmarks):
auto_eval_column_dict = get_default_auto_eval_column_dict()
# Leaderboard columns
for benchmark in list(benchmarks.value):
auto_eval_column_dict.append(
[benchmark.name, ColumnContent, ColumnContent(benchmark.value.col_name, "number", True)]
)
# We use make dataclass to dynamically fill the scores from Tasks
return make_dataclass(cls_name, auto_eval_column_dict, frozen=True)
def get_default_col_names_and_types(benchmarks):
AutoEvalColumn = make_autoevalcolumn("AutoEvalColumn", benchmarks)
col_names = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
col_types = [c.type for c in fields(AutoEvalColumn) if not c.hidden]
return col_names, col_types
# AutoEvalColumnQA = make_autoevalcolumn("AutoEvalColumnQA", QABenchmarks)
# COLS_QA = [c.name for c in fields(AutoEvalColumnQA) if not c.hidden]
# TYPES_QA = [c.type for c in fields(AutoEvalColumnQA) if not c.hidden]
def get_fixed_col_names_and_types():
fixed_cols = get_default_auto_eval_column_dict()[:-3]
return [c.name for _, _, c in fixed_cols], [c.type for _, _, c in fixed_cols]
# fixed_cols = get_default_auto_eval_column_dict()[:-3]
# FIXED_COLS = [c.name for _, _, c in fixed_cols]
# FIXED_COLS_TYPES = [c.type for _, _, c in fixed_cols]
# AutoEvalColumnLongDoc = make_autoevalcolumn("AutoEvalColumnLongDoc", LongDocBenchmarks)
# COLS_LONG_DOC = [c.name for c in fields(AutoEvalColumnLongDoc) if not c.hidden]
# TYPES_LONG_DOC = [c.type for c in fields(AutoEvalColumnLongDoc) if not c.hidden]
# Column selection
|