Spaces:
Running
Running
from dataclasses import dataclass, make_dataclass | |
from enum import Enum | |
import pandas as pd | |
from src.about import Tasks, TasksMultimodal | |
def fields(raw_class): | |
return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"] | |
# These classes are for user facing column names, | |
# to avoid having to change them all around the code | |
# when a modif is needed | |
class ColumnContent: | |
name: str | |
type: str | |
displayed_by_default: bool | |
hidden: bool = False | |
never_hidden: bool = False | |
## Leaderboard columns | |
auto_eval_column_dict = [] | |
auto_eval_column_dict_multimodal = [] | |
# Init | |
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)]) | |
auto_eval_column_dict.append(["hf_repo", ColumnContent, ColumnContent("HF Repo", "str", False)]) | |
auto_eval_column_dict.append(["track", ColumnContent, ColumnContent("Track", "markdown", False)]) | |
#Scores | |
for task in Tasks: | |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)]) | |
# Model information | |
auto_eval_column_dict.append(["text_average", ColumnContent, ColumnContent("Text Average", "number", True)]) | |
auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)]) | |
auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)]) | |
auto_eval_column_dict_multimodal.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)]) | |
auto_eval_column_dict_multimodal.append(["hf_repo", ColumnContent, ColumnContent("HF Repo", "str", False)]) | |
auto_eval_column_dict_multimodal.append(["track", ColumnContent, ColumnContent("Track", "markdown", False)]) | |
for task in TasksMultimodal: | |
auto_eval_column_dict_multimodal.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)]) | |
if task.value.col_name in ("ewok", "EWoK"): # make sure this appears in the right order | |
auto_eval_column_dict_multimodal.append(["text_average", ColumnContent, ColumnContent("Text Average", "number", True)]) | |
auto_eval_column_dict_multimodal.append(["vision_average", ColumnContent, ColumnContent("Vision Average", "number", True)]) | |
auto_eval_column_dict_multimodal.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)]) | |
auto_eval_column_dict_multimodal.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)]) | |
# We use make dataclass to dynamically fill the scores from Tasks | |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True) | |
AutoEvalColumnMultimodal = make_dataclass("AutoEvalColumnMultimodal", auto_eval_column_dict_multimodal, frozen=True) | |
## For the queue columns in the submission tab | |
class EvalQueueColumn: # Queue column | |
model = ColumnContent("model", "markdown", True) | |
track = ColumnContent("track", "str", True) | |
revision = ColumnContent("revision", "str", True) | |
private = ColumnContent("private", "bool", True) | |
status = ColumnContent("status", "str", True) | |
## All the model information that we might need | |
class ModelDetails: | |
name: str | |
display_name: str = "" | |
symbol: str = "" # emoji | |
# Column selection | |
COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden] | |
COLS_MULTIMODAL = [c.name for c in fields(AutoEvalColumnMultimodal) if not c.hidden] | |
EVAL_COLS = [c.name for c in fields(EvalQueueColumn)] | |
EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)] | |
BENCHMARK_COLS = [t.value.col_name for t in Tasks] | |
BENCHMARK_COLS_MULTIMODAL = [t.value.col_name for t in TasksMultimodal] | |
TEXT_TASKS = { | |
"glue": ["cola", "sst2", "mrpc", "qqp", "mnli", "mnli-mm", "qnli", "rte", | |
"boolq", "multirc", "wsc"], | |
# Lots of BLiMP tasks – use verifier function below to see if you've included everything. | |
"blimp": ["adjunct_island","anaphor_gender_agreement","anaphor_number_agreement","animate_subject_passive","animate_subject_trans", | |
"causative","complex_NP_island","coordinate_structure_constraint_complex_left_branch","coordinate_structure_constraint_object_extraction","determiner_noun_agreement_1", | |
"determiner_noun_agreement_2","determiner_noun_agreement_irregular_1","determiner_noun_agreement_irregular_2","determiner_noun_agreement_with_adjective_1", | |
"determiner_noun_agreement_with_adj_2","determiner_noun_agreement_with_adj_irregular_1","determiner_noun_agreement_with_adj_irregular_2","distractor_agreement_relational_noun", | |
"distractor_agreement_relative_clause","drop_argument","ellipsis_n_bar_1","ellipsis_n_bar_2", | |
"existential_there_object_raising", "existential_there_quantifiers_1", | |
"existential_there_quantifiers_2", "existential_there_subject_raising", "expletive_it_object_raising", | |
"inchoative", "intransitive","irregular_past_participle_adjectives", "irregular_past_participle_verbs", | |
"irregular_plural_subject_verb_agreement_1", "irregular_plural_subject_verb_agreement_2", "left_branch_island_echo_question", "left_branch_island_simple_question", | |
"matrix_question_npi_licensor_present", "npi_present_1", "npi_present_2", "only_npi_licensor_present", "only_npi_scope", "passive_1", "passive_2", | |
"principle_A_case_1", "principle_A_case_2", "principle_A_c_command", "principle_A_domain_1", | |
"principle_A_domain_2", "principle_A_domain_3", "principle_A_reconstruction", "regular_plural_subject_verb_agreement_1", | |
"regular_plural_subject_verb_agreement_2", "sentential_negation_npi_licensor_present", "sentential_negation_npi_scope", "sentential_subject_island", | |
"superlative_quantifiers_1", "superlative_quantifiers_2", "tough_vs_raising_1", "tough_vs_raising_2", | |
"transitive", "wh_island", "wh_questions_object_gap", "wh_questions_subject_gap", | |
"wh_questions_subject_gap_long_distance", "wh_vs_that_no_gap", "wh_vs_that_no_gap_long_distance", "wh_vs_that_with_gap", | |
"wh_vs_that_with_gap_long_distance" | |
], | |
"blimp_supplement": ["hypernym", "qa_congruence_easy", "qa_congruence_tricky", | |
"subject_aux_inversion", "turn_taking"], | |
"ewok": ["agent-properties", "material-dynamics", "material-properties", "physical-dynamics", | |
"physical-interactions", "physical-relations", "quantitative-properties", | |
"social-interactions", "social-properties", "social-relations", "spatial-relations"] | |
} | |
VISION_TASKS = { | |
"vqa": ["vqa"], | |
"winoground": ["winoground"], | |
"devbench": ["lex-viz_vocab", "gram-trog", "sem-things"] | |
} | |
NUM_EXPECTED_EXAMPLES = { | |
"glue": { | |
"cola": 522, | |
"sst2": 436, | |
"mrpc": 204, | |
"qqp": 20215, | |
"mnli": 4908, | |
"mnli-mm": 4916, | |
"qnli": 2732, | |
"rte": 139, | |
"boolq": 1635, | |
"multirc": 2424, | |
"wsc": 52 | |
}, | |
"blimp": { | |
"adjunct_island": 928, | |
"anaphor_gender_agreement": 971, | |
"anaphor_number_agreement": 931, | |
"animate_subject_passive": 895, | |
"animate_subject_trans": 923, | |
"causative": 818, | |
"complex_NP_island": 846, | |
"coordinate_structure_constraint_complex_left_branch": 906, | |
"coordinate_structure_constraint_object_extraction": 949, | |
"determiner_noun_agreement_1": 929, | |
"determiner_noun_agreement_2": 931, | |
"determiner_noun_agreement_irregular_1": 681, | |
"determiner_noun_agreement_irregular_2": 820, | |
"determiner_noun_agreement_with_adjective_1": 933, | |
"determiner_noun_agreement_with_adj_2": 941, | |
"determiner_noun_agreement_with_adj_irregular_1": 718, | |
"determiner_noun_agreement_with_adj_irregular_2": 840, | |
"distractor_agreement_relational_noun": 788, | |
"distractor_agreement_relative_clause": 871, | |
"drop_argument": 920, | |
"ellipsis_n_bar_1": 802, | |
"ellipsis_n_bar_2": 828, | |
"existential_there_object_raising": 812, | |
"existential_there_quantifiers_1": 930, | |
"existential_there_quantifiers_2": 911, | |
"existential_there_subject_raising": 924, | |
"expletive_it_object_raising": 759, | |
"inchoative": 855, | |
"intransitive": 868, | |
"irregular_past_participle_adjectives": 961, | |
"irregular_past_participle_verbs": 942, | |
"irregular_plural_subject_verb_agreement_1": 804, | |
"irregular_plural_subject_verb_agreement_2": 892, | |
"left_branch_island_echo_question": 947, | |
"left_branch_island_simple_question": 951, | |
"matrix_question_npi_licensor_present": 929, | |
"npi_present_1": 909, | |
"npi_present_2": 914, | |
"only_npi_licensor_present": 882, | |
"only_npi_scope": 837, | |
"passive_1": 840, | |
"passive_2": 903, | |
"principle_A_case_1": 912, | |
"principle_A_case_2": 915, | |
"principle_A_c_command": 946, | |
"principle_A_domain_1": 914, | |
"principle_A_domain_2": 915, | |
"principle_A_domain_3": 941, | |
"principle_A_reconstruction": 967, | |
"regular_plural_subject_verb_agreement_1": 890, | |
"regular_plural_subject_verb_agreement_2": 945, | |
"sentential_negation_npi_licensor_present": 919, | |
"sentential_negation_npi_scope": 871, | |
"sentential_subject_island": 961, | |
"superlative_quantifiers_1": 979, | |
"superlative_quantifiers_2": 986, | |
"tough_vs_raising_1": 948, | |
"tough_vs_raising_2": 920, | |
"transitive": 868, | |
"wh_island": 960, | |
"wh_questions_object_gap": 859, | |
"wh_questions_subject_gap": 898, | |
"wh_questions_subject_gap_long_distance": 857, | |
"wh_vs_that_no_gap": 861, | |
"wh_vs_that_no_gap_long_distance": 875, | |
"wh_vs_that_with_gap": 919, | |
"wh_vs_that_with_gap_long_distance": 910 | |
}, | |
"blimp_supplement": { | |
"hypernym": 842, | |
"qa_congruence_easy": 64, | |
"qa_congruence_tricky": 165, | |
"subject_aux_inversion": 3867, | |
"turn_taking": 280 | |
}, | |
"ewok": { | |
"agent-properties": 2210, | |
"material-dynamics": 770, | |
"material-properties": 170, | |
"physical-dynamics": 120, | |
"physical-interactions": 556, | |
"physical-relations": 818, | |
"quantitative-properties": 314, | |
"social-interactions": 294, | |
"social-properties": 328, | |
"social-relations": 1548, | |
"spatial-relations": 490 | |
}, | |
"vqa": { | |
"vqa": 25230 | |
}, | |
"winoground": { | |
"winoground": 746 | |
}, | |
"devbench": { | |
"lex-viz_vocab": 119, | |
"gram-trog": 76, | |
"sem-things": 1854 | |
} | |
} |