distilbert-base-cased / config.json
ahmetayrnc's picture
Training in progress, epoch 1
e112842
{
"_name_or_path": "distilbert-base-cased",
"activation": "gelu",
"architectures": [
"DistilBertForSequenceClassification"
],
"attention_dropout": 0.1,
"dim": 768,
"dropout": 0.1,
"hidden_dim": 3072,
"id2label": {
"0": "sd",
"1": "b",
"2": "sv",
"3": "%",
"4": "aa",
"5": "ba",
"6": "fc",
"7": "qw",
"8": "nn",
"9": "bk",
"10": "h",
"11": "qy^d",
"12": "bh",
"13": "^q",
"14": "bf",
"15": "fo_o_fw_\"_by_bc",
"16": "fo_o_fw_by_bc_\"",
"17": "na",
"18": "ad",
"19": "^2",
"20": "b^m",
"21": "qo",
"22": "qh",
"23": "^h",
"24": "ar",
"25": "ng",
"26": "br",
"27": "no",
"28": "fp",
"29": "qrr",
"30": "arp_nd",
"31": "t3",
"32": "oo_co_cc",
"33": "aap_am",
"34": "t1",
"35": "bd",
"36": "^g",
"37": "qw^d",
"38": "fa",
"39": "ft",
"40": "+",
"41": "x",
"42": "ny",
"43": "sv_fx",
"44": "qy_qr",
"45": "ba_fe"
},
"initializer_range": 0.02,
"label2id": {
"%": 3,
"+": 40,
"^2": 19,
"^g": 36,
"^h": 23,
"^q": 13,
"aa": 4,
"aap_am": 33,
"ad": 18,
"ar": 24,
"arp_nd": 30,
"b": 1,
"b^m": 20,
"ba": 5,
"ba_fe": 45,
"bd": 35,
"bf": 14,
"bh": 12,
"bk": 9,
"br": 26,
"fa": 38,
"fc": 6,
"fo_o_fw_\"_by_bc": 15,
"fo_o_fw_by_bc_\"": 16,
"fp": 28,
"ft": 39,
"h": 10,
"na": 17,
"ng": 25,
"nn": 8,
"no": 27,
"ny": 42,
"oo_co_cc": 32,
"qh": 22,
"qo": 21,
"qrr": 29,
"qw": 7,
"qw^d": 37,
"qy^d": 11,
"qy_qr": 44,
"sd": 0,
"sv": 2,
"sv_fx": 43,
"t1": 34,
"t3": 31,
"x": 41
},
"max_position_embeddings": 512,
"model_type": "distilbert",
"n_heads": 12,
"n_layers": 6,
"output_past": true,
"pad_token_id": 0,
"problem_type": "single_label_classification",
"qa_dropout": 0.1,
"seq_classif_dropout": 0.2,
"sinusoidal_pos_embds": false,
"tie_weights_": true,
"torch_dtype": "float32",
"transformers_version": "4.26.0",
"vocab_size": 28996
}