future-xy
commited on
Commit
·
82a6ed1
1
Parent(s):
a4a186c
fix result display bug
Browse files- src/backend/envs.py +1 -1
- src/display/utils.py +16 -18
src/backend/envs.py
CHANGED
@@ -43,7 +43,7 @@ class Tasks(Enum):
|
|
43 |
|
44 |
# task13 = Task("ifeval", "prompt_level_strict_acc", "IFEval", 0)
|
45 |
|
46 |
-
task14 = Task("selfcheckgpt", "max-selfcheckgpt", "SelfCheckGPT", 0)
|
47 |
|
48 |
# task15 = Task("fever10", "acc", "FEVER", 16)
|
49 |
# task15_1 = Task("fever11", "acc", "FEVER", 8)
|
|
|
43 |
|
44 |
# task13 = Task("ifeval", "prompt_level_strict_acc", "IFEval", 0)
|
45 |
|
46 |
+
# task14 = Task("selfcheckgpt", "max-selfcheckgpt", "SelfCheckGPT", 0)
|
47 |
|
48 |
# task15 = Task("fever10", "acc", "FEVER", 16)
|
49 |
# task15_1 = Task("fever11", "acc", "FEVER", 8)
|
src/display/utils.py
CHANGED
@@ -45,8 +45,8 @@ class Tasks(Enum):
|
|
45 |
# halueval_dial = Task("halueval_dialogue", "acc", "HaluDial/Acc")
|
46 |
|
47 |
# # XXX include me back at some point
|
48 |
-
selfcheck = Task("selfcheckgpt", "max-selfcheckgpt", "SelfCheckGPT")
|
49 |
-
mmlu = Task("
|
50 |
|
51 |
|
52 |
# These classes are for user facing column names,
|
@@ -63,11 +63,9 @@ class ColumnContent:
|
|
63 |
|
64 |
|
65 |
auto_eval_column_dict = []
|
66 |
-
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "str", True, never_hidden=True)])
|
67 |
-
auto_eval_column_dict.append(["hardware", ColumnContent, ColumnContent("Hardware", "str", True, never_hidden=True)])
|
68 |
# Init
|
69 |
-
|
70 |
-
|
71 |
|
72 |
# #Scores
|
73 |
# # auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Avg", "number", True)])
|
@@ -75,18 +73,18 @@ auto_eval_column_dict.append(["hardware", ColumnContent, ColumnContent("Hardware
|
|
75 |
for task in Tasks:
|
76 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
77 |
|
78 |
-
#
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
#
|
89 |
-
|
90 |
|
91 |
# We use make dataclass to dynamically fill the scores from Tasks
|
92 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
|
|
45 |
# halueval_dial = Task("halueval_dialogue", "acc", "HaluDial/Acc")
|
46 |
|
47 |
# # XXX include me back at some point
|
48 |
+
# selfcheck = Task("selfcheckgpt", "max-selfcheckgpt", "SelfCheckGPT")
|
49 |
+
mmlu = Task("mmlu", "acc", "MMLU/Acc")
|
50 |
|
51 |
|
52 |
# These classes are for user facing column names,
|
|
|
63 |
|
64 |
|
65 |
auto_eval_column_dict = []
|
|
|
|
|
66 |
# Init
|
67 |
+
auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
|
68 |
+
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
|
69 |
|
70 |
# #Scores
|
71 |
# # auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Avg", "number", True)])
|
|
|
73 |
for task in Tasks:
|
74 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
75 |
|
76 |
+
# Model information
|
77 |
+
auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
|
78 |
+
auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
|
79 |
+
auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
|
80 |
+
auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
|
81 |
+
auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
|
82 |
+
auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
|
83 |
+
auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
|
84 |
+
auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
|
85 |
+
auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
|
86 |
+
# Dummy column for the search bar (hidden by the custom CSS)
|
87 |
+
auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)])
|
88 |
|
89 |
# We use make dataclass to dynamically fill the scores from Tasks
|
90 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|