{ "architectures": [ "ModernBertForTokenClassification" ], "attention_bias": false, "attention_dropout": 0.0, "auto_map": { "AutoConfig": "configuration_modernbert.ModernBertConfig", "AutoModel": "modeling_modernbert.ModernBertModel", "AutoModelForMaskedLM": "modeling_modernbert.ModernBertForMaskedLM", "AutoModelForSequenceClassification": "modeling_modernbert.ModernBertForSequenceClassification", "AutoModelForTokenClassification": "modeling_modernbert.ModernBertForTokenClassification" }, "bos_token_id": 0, "classifier_activation": "gelu", "classifier_bias": false, "classifier_dropout": 0.0, "classifier_pooling": "mean", "cls_token_id": 0, "custom_pipelines": { "universal-dependencies": { "impl": "ud.UniversalDependenciesPipeline", "pt": "AutoModelForTokenClassification" } }, "decoder_bias": true, "deterministic_flash_attn": false, "embedding_dropout": 0.0, "eos_token_id": 2, "global_attn_every_n_layers": 3, "global_rope_theta": 160000.0, "gradient_checkpointing": false, "hidden_activation": "gelu", "hidden_size": 768, "id2label": { "0": "ADJ|o", "1": "ADJ|o|l-acl", "2": "ADJ|o|l-advcl", "3": "ADJ|o|l-amod", "4": "ADJ|o|l-ccomp", "5": "ADJ|o|l-csubj", "6": "ADJ|o|l-csubj:outer", "7": "ADJ|o|l-nmod", "8": "ADJ|o|l-nsubj", "9": "ADJ|o|l-obj", "10": "ADJ|o|l-obl", "11": "ADJ|o|r-acl", "12": "ADJ|o|r-amod", "13": "ADJ|o|r-dep", "14": "ADJ|o|root", "15": "ADJ|x", "16": "ADJ|x|l-acl", "17": "ADJ|x|l-advcl", "18": "ADJ|x|l-amod", "19": "ADJ|x|l-ccomp", "20": "ADJ|x|l-csubj", "21": "ADJ|x|l-csubj:outer", "22": "ADJ|x|l-nmod", "23": "ADJ|x|l-nsubj", "24": "ADJ|x|l-obj", "25": "ADJ|x|l-obl", "26": "ADJ|x|r-acl", "27": "ADJ|x|r-amod", "28": "ADJ|x|r-dep", "29": "ADJ|x|root", "30": "ADP|o", "31": "ADP|o|l-case", "32": "ADP|o|r-case", "33": "ADP|o|r-fixed", "34": "ADP|x", "35": "ADP|x|l-case", "36": "ADP|x|r-case", "37": "ADP|x|r-fixed", "38": "ADV|o", "39": "ADV|o|l-advcl", "40": "ADV|o|l-advmod", "41": "ADV|o|l-obj", "42": "ADV|o|r-dep", "43": "ADV|o|root", "44": "ADV|x", "45": "ADV|x|l-advcl", "46": "ADV|x|l-advmod", "47": "ADV|x|l-obj", "48": "ADV|x|r-dep", "49": "ADV|x|root", "50": "AUX|o", "51": "AUX|o|Polarity=Neg", "52": "AUX|o|Polarity=Neg|r-aux", "53": "AUX|o|Polarity=Neg|r-fixed", "54": "AUX|o|r-aux", "55": "AUX|o|r-cop", "56": "AUX|o|r-fixed", "57": "AUX|o|root", "58": "AUX|x", "59": "AUX|x|Polarity=Neg", "60": "AUX|x|Polarity=Neg|r-aux", "61": "AUX|x|Polarity=Neg|r-fixed", "62": "AUX|x|r-aux", "63": "AUX|x|r-cop", "64": "AUX|x|r-fixed", "65": "AUX|x|root", "66": "CCONJ|o", "67": "CCONJ|o|l-cc", "68": "CCONJ|o|r-cc", "69": "CCONJ|x", "70": "CCONJ|x|l-cc", "71": "CCONJ|x|r-cc", "72": "DET|o", "73": "DET|o|l-det", "74": "DET|x", "75": "DET|x|l-det", "76": "INTJ|o", "77": "INTJ|o|l-discourse", "78": "INTJ|o|r-discourse", "79": "INTJ|o|root", "80": "INTJ|x", "81": "INTJ|x|l-discourse", "82": "INTJ|x|r-discourse", "83": "INTJ|x|root", "84": "NOUN|o", "85": "NOUN|o|Polarity=Neg", "86": "NOUN|o|Polarity=Neg|l-obl", "87": "NOUN|o|Polarity=Neg|root", "88": "NOUN|o|l-acl", "89": "NOUN|o|l-advcl", "90": "NOUN|o|l-ccomp", "91": "NOUN|o|l-compound", "92": "NOUN|o|l-csubj", "93": "NOUN|o|l-csubj:outer", "94": "NOUN|o|l-nmod", "95": "NOUN|o|l-nsubj", "96": "NOUN|o|l-nsubj:outer", "97": "NOUN|o|l-obj", "98": "NOUN|o|l-obl", "99": "NOUN|o|r-compound", "100": "NOUN|o|r-nmod", "101": "NOUN|o|r-nsubj", "102": "NOUN|o|root", "103": "NOUN|x", "104": "NOUN|x|Polarity=Neg", "105": "NOUN|x|Polarity=Neg|l-obl", "106": "NOUN|x|Polarity=Neg|root", "107": "NOUN|x|l-acl", "108": "NOUN|x|l-advcl", "109": "NOUN|x|l-ccomp", "110": "NOUN|x|l-compound", "111": "NOUN|x|l-csubj", "112": "NOUN|x|l-csubj:outer", "113": "NOUN|x|l-nmod", "114": "NOUN|x|l-nsubj", "115": "NOUN|x|l-nsubj:outer", "116": "NOUN|x|l-obj", "117": "NOUN|x|l-obl", "118": "NOUN|x|r-compound", "119": "NOUN|x|r-nmod", "120": "NOUN|x|r-nsubj", "121": "NOUN|x|root", "122": "NUM|o", "123": "NUM|o|l-advcl", "124": "NUM|o|l-compound", "125": "NUM|o|l-nmod", "126": "NUM|o|l-nsubj", "127": "NUM|o|l-nsubj:outer", "128": "NUM|o|l-nummod", "129": "NUM|o|l-obj", "130": "NUM|o|l-obl", "131": "NUM|o|r-compound", "132": "NUM|o|root", "133": "NUM|x", "134": "NUM|x|l-advcl", "135": "NUM|x|l-compound", "136": "NUM|x|l-nmod", "137": "NUM|x|l-nsubj", "138": "NUM|x|l-nsubj:outer", "139": "NUM|x|l-nummod", "140": "NUM|x|l-obj", "141": "NUM|x|l-obl", "142": "NUM|x|r-compound", "143": "NUM|x|root", "144": "PART|o", "145": "PART|o|l-mark", "146": "PART|o|r-mark", "147": "PART|x", "148": "PART|x|l-mark", "149": "PART|x|r-mark", "150": "PRON|o", "151": "PRON|o|l-acl", "152": "PRON|o|l-advcl", "153": "PRON|o|l-nmod", "154": "PRON|o|l-nsubj", "155": "PRON|o|l-nsubj:outer", "156": "PRON|o|l-obj", "157": "PRON|o|l-obl", "158": "PRON|o|root", "159": "PRON|x", "160": "PRON|x|l-acl", "161": "PRON|x|l-advcl", "162": "PRON|x|l-nmod", "163": "PRON|x|l-nsubj", "164": "PRON|x|l-nsubj:outer", "165": "PRON|x|l-obj", "166": "PRON|x|l-obl", "167": "PRON|x|root", "168": "PROPN|o", "169": "PROPN|o|l-acl", "170": "PROPN|o|l-advcl", "171": "PROPN|o|l-compound", "172": "PROPN|o|l-nmod", "173": "PROPN|o|l-nsubj", "174": "PROPN|o|l-nsubj:outer", "175": "PROPN|o|l-obj", "176": "PROPN|o|l-obl", "177": "PROPN|o|r-compound", "178": "PROPN|o|r-nmod", "179": "PROPN|o|root", "180": "PROPN|x", "181": "PROPN|x|l-acl", "182": "PROPN|x|l-advcl", "183": "PROPN|x|l-compound", "184": "PROPN|x|l-nmod", "185": "PROPN|x|l-nsubj", "186": "PROPN|x|l-nsubj:outer", "187": "PROPN|x|l-obj", "188": "PROPN|x|l-obl", "189": "PROPN|x|r-compound", "190": "PROPN|x|r-nmod", "191": "PROPN|x|root", "192": "PUNCT|o", "193": "PUNCT|o|l-punct", "194": "PUNCT|o|r-punct", "195": "PUNCT|x", "196": "PUNCT|x|l-punct", "197": "PUNCT|x|r-punct", "198": "SCONJ|o", "199": "SCONJ|o|l-dep", "200": "SCONJ|o|r-fixed", "201": "SCONJ|o|r-mark", "202": "SCONJ|x", "203": "SCONJ|x|l-dep", "204": "SCONJ|x|r-fixed", "205": "SCONJ|x|r-mark", "206": "SYM|o", "207": "SYM|o|l-compound", "208": "SYM|o|l-dep", "209": "SYM|o|l-nmod", "210": "SYM|o|l-obl", "211": "SYM|o|r-compound", "212": "SYM|o|r-dep", "213": "SYM|x", "214": "SYM|x|l-compound", "215": "SYM|x|l-dep", "216": "SYM|x|l-nmod", "217": "SYM|x|l-obl", "218": "SYM|x|r-compound", "219": "SYM|x|r-dep", "220": "VERB|o", "221": "VERB|o|l-acl", "222": "VERB|o|l-advcl", "223": "VERB|o|l-ccomp", "224": "VERB|o|l-compound", "225": "VERB|o|l-csubj", "226": "VERB|o|l-csubj:outer", "227": "VERB|o|l-nmod", "228": "VERB|o|l-obj", "229": "VERB|o|l-obl", "230": "VERB|o|r-acl", "231": "VERB|o|r-advcl", "232": "VERB|o|r-compound", "233": "VERB|o|root", "234": "VERB|x", "235": "VERB|x|l-acl", "236": "VERB|x|l-advcl", "237": "VERB|x|l-ccomp", "238": "VERB|x|l-compound", "239": "VERB|x|l-csubj", "240": "VERB|x|l-csubj:outer", "241": "VERB|x|l-nmod", "242": "VERB|x|l-obj", "243": "VERB|x|l-obl", "244": "VERB|x|r-acl", "245": "VERB|x|r-advcl", "246": "VERB|x|r-compound", "247": "VERB|x|root", "248": "X|o", "249": "X|o|l-nmod", "250": "X|o|r-dep", "251": "X|x", "252": "X|x|l-nmod", "253": "X|x|r-dep", "254": "X|x|r-goeswith" }, "initializer_cutoff_factor": 2.0, "initializer_range": 0.02, "intermediate_size": 1152, "label2id": { "ADJ|o": 0, "ADJ|o|l-acl": 1, "ADJ|o|l-advcl": 2, "ADJ|o|l-amod": 3, "ADJ|o|l-ccomp": 4, "ADJ|o|l-csubj": 5, "ADJ|o|l-csubj:outer": 6, "ADJ|o|l-nmod": 7, "ADJ|o|l-nsubj": 8, "ADJ|o|l-obj": 9, "ADJ|o|l-obl": 10, "ADJ|o|r-acl": 11, "ADJ|o|r-amod": 12, "ADJ|o|r-dep": 13, "ADJ|o|root": 14, "ADJ|x": 15, "ADJ|x|l-acl": 16, "ADJ|x|l-advcl": 17, "ADJ|x|l-amod": 18, "ADJ|x|l-ccomp": 19, "ADJ|x|l-csubj": 20, "ADJ|x|l-csubj:outer": 21, "ADJ|x|l-nmod": 22, "ADJ|x|l-nsubj": 23, "ADJ|x|l-obj": 24, "ADJ|x|l-obl": 25, "ADJ|x|r-acl": 26, "ADJ|x|r-amod": 27, "ADJ|x|r-dep": 28, "ADJ|x|root": 29, "ADP|o": 30, "ADP|o|l-case": 31, "ADP|o|r-case": 32, "ADP|o|r-fixed": 33, "ADP|x": 34, "ADP|x|l-case": 35, "ADP|x|r-case": 36, "ADP|x|r-fixed": 37, "ADV|o": 38, "ADV|o|l-advcl": 39, "ADV|o|l-advmod": 40, "ADV|o|l-obj": 41, "ADV|o|r-dep": 42, "ADV|o|root": 43, "ADV|x": 44, "ADV|x|l-advcl": 45, "ADV|x|l-advmod": 46, "ADV|x|l-obj": 47, "ADV|x|r-dep": 48, "ADV|x|root": 49, "AUX|o": 50, "AUX|o|Polarity=Neg": 51, "AUX|o|Polarity=Neg|r-aux": 52, "AUX|o|Polarity=Neg|r-fixed": 53, "AUX|o|r-aux": 54, "AUX|o|r-cop": 55, "AUX|o|r-fixed": 56, "AUX|o|root": 57, "AUX|x": 58, "AUX|x|Polarity=Neg": 59, "AUX|x|Polarity=Neg|r-aux": 60, "AUX|x|Polarity=Neg|r-fixed": 61, "AUX|x|r-aux": 62, "AUX|x|r-cop": 63, "AUX|x|r-fixed": 64, "AUX|x|root": 65, "CCONJ|o": 66, "CCONJ|o|l-cc": 67, "CCONJ|o|r-cc": 68, "CCONJ|x": 69, "CCONJ|x|l-cc": 70, "CCONJ|x|r-cc": 71, "DET|o": 72, "DET|o|l-det": 73, "DET|x": 74, "DET|x|l-det": 75, "INTJ|o": 76, "INTJ|o|l-discourse": 77, "INTJ|o|r-discourse": 78, "INTJ|o|root": 79, "INTJ|x": 80, "INTJ|x|l-discourse": 81, "INTJ|x|r-discourse": 82, "INTJ|x|root": 83, "NOUN|o": 84, "NOUN|o|Polarity=Neg": 85, "NOUN|o|Polarity=Neg|l-obl": 86, "NOUN|o|Polarity=Neg|root": 87, "NOUN|o|l-acl": 88, "NOUN|o|l-advcl": 89, "NOUN|o|l-ccomp": 90, "NOUN|o|l-compound": 91, "NOUN|o|l-csubj": 92, "NOUN|o|l-csubj:outer": 93, "NOUN|o|l-nmod": 94, "NOUN|o|l-nsubj": 95, "NOUN|o|l-nsubj:outer": 96, "NOUN|o|l-obj": 97, "NOUN|o|l-obl": 98, "NOUN|o|r-compound": 99, "NOUN|o|r-nmod": 100, "NOUN|o|r-nsubj": 101, "NOUN|o|root": 102, "NOUN|x": 103, "NOUN|x|Polarity=Neg": 104, "NOUN|x|Polarity=Neg|l-obl": 105, "NOUN|x|Polarity=Neg|root": 106, "NOUN|x|l-acl": 107, "NOUN|x|l-advcl": 108, "NOUN|x|l-ccomp": 109, "NOUN|x|l-compound": 110, "NOUN|x|l-csubj": 111, "NOUN|x|l-csubj:outer": 112, "NOUN|x|l-nmod": 113, "NOUN|x|l-nsubj": 114, "NOUN|x|l-nsubj:outer": 115, "NOUN|x|l-obj": 116, "NOUN|x|l-obl": 117, "NOUN|x|r-compound": 118, "NOUN|x|r-nmod": 119, "NOUN|x|r-nsubj": 120, "NOUN|x|root": 121, "NUM|o": 122, "NUM|o|l-advcl": 123, "NUM|o|l-compound": 124, "NUM|o|l-nmod": 125, "NUM|o|l-nsubj": 126, "NUM|o|l-nsubj:outer": 127, "NUM|o|l-nummod": 128, "NUM|o|l-obj": 129, "NUM|o|l-obl": 130, "NUM|o|r-compound": 131, "NUM|o|root": 132, "NUM|x": 133, "NUM|x|l-advcl": 134, "NUM|x|l-compound": 135, "NUM|x|l-nmod": 136, "NUM|x|l-nsubj": 137, "NUM|x|l-nsubj:outer": 138, "NUM|x|l-nummod": 139, "NUM|x|l-obj": 140, "NUM|x|l-obl": 141, "NUM|x|r-compound": 142, "NUM|x|root": 143, "PART|o": 144, "PART|o|l-mark": 145, "PART|o|r-mark": 146, "PART|x": 147, "PART|x|l-mark": 148, "PART|x|r-mark": 149, "PRON|o": 150, "PRON|o|l-acl": 151, "PRON|o|l-advcl": 152, "PRON|o|l-nmod": 153, "PRON|o|l-nsubj": 154, "PRON|o|l-nsubj:outer": 155, "PRON|o|l-obj": 156, "PRON|o|l-obl": 157, "PRON|o|root": 158, "PRON|x": 159, "PRON|x|l-acl": 160, "PRON|x|l-advcl": 161, "PRON|x|l-nmod": 162, "PRON|x|l-nsubj": 163, "PRON|x|l-nsubj:outer": 164, "PRON|x|l-obj": 165, "PRON|x|l-obl": 166, "PRON|x|root": 167, "PROPN|o": 168, "PROPN|o|l-acl": 169, "PROPN|o|l-advcl": 170, "PROPN|o|l-compound": 171, "PROPN|o|l-nmod": 172, "PROPN|o|l-nsubj": 173, "PROPN|o|l-nsubj:outer": 174, "PROPN|o|l-obj": 175, "PROPN|o|l-obl": 176, "PROPN|o|r-compound": 177, "PROPN|o|r-nmod": 178, "PROPN|o|root": 179, "PROPN|x": 180, "PROPN|x|l-acl": 181, "PROPN|x|l-advcl": 182, "PROPN|x|l-compound": 183, "PROPN|x|l-nmod": 184, "PROPN|x|l-nsubj": 185, "PROPN|x|l-nsubj:outer": 186, "PROPN|x|l-obj": 187, "PROPN|x|l-obl": 188, "PROPN|x|r-compound": 189, "PROPN|x|r-nmod": 190, "PROPN|x|root": 191, "PUNCT|o": 192, "PUNCT|o|l-punct": 193, "PUNCT|o|r-punct": 194, "PUNCT|x": 195, "PUNCT|x|l-punct": 196, "PUNCT|x|r-punct": 197, "SCONJ|o": 198, "SCONJ|o|l-dep": 199, "SCONJ|o|r-fixed": 200, "SCONJ|o|r-mark": 201, "SCONJ|x": 202, "SCONJ|x|l-dep": 203, "SCONJ|x|r-fixed": 204, "SCONJ|x|r-mark": 205, "SYM|o": 206, "SYM|o|l-compound": 207, "SYM|o|l-dep": 208, "SYM|o|l-nmod": 209, "SYM|o|l-obl": 210, "SYM|o|r-compound": 211, "SYM|o|r-dep": 212, "SYM|x": 213, "SYM|x|l-compound": 214, "SYM|x|l-dep": 215, "SYM|x|l-nmod": 216, "SYM|x|l-obl": 217, "SYM|x|r-compound": 218, "SYM|x|r-dep": 219, "VERB|o": 220, "VERB|o|l-acl": 221, "VERB|o|l-advcl": 222, "VERB|o|l-ccomp": 223, "VERB|o|l-compound": 224, "VERB|o|l-csubj": 225, "VERB|o|l-csubj:outer": 226, "VERB|o|l-nmod": 227, "VERB|o|l-obj": 228, "VERB|o|l-obl": 229, "VERB|o|r-acl": 230, "VERB|o|r-advcl": 231, "VERB|o|r-compound": 232, "VERB|o|root": 233, "VERB|x": 234, "VERB|x|l-acl": 235, "VERB|x|l-advcl": 236, "VERB|x|l-ccomp": 237, "VERB|x|l-compound": 238, "VERB|x|l-csubj": 239, "VERB|x|l-csubj:outer": 240, "VERB|x|l-nmod": 241, "VERB|x|l-obj": 242, "VERB|x|l-obl": 243, "VERB|x|r-acl": 244, "VERB|x|r-advcl": 245, "VERB|x|r-compound": 246, "VERB|x|root": 247, "X|o": 248, "X|o|l-nmod": 249, "X|o|r-dep": 250, "X|x": 251, "X|x|l-nmod": 252, "X|x|r-dep": 253, "X|x|r-goeswith": 254 }, "layer_norm_eps": 1e-05, "local_attention": 128, "local_rope_theta": 10000.0, "max_position_embeddings": 8192, "mlp_bias": false, "mlp_dropout": 0.0, "model_type": "modernbert", "norm_bias": false, "norm_eps": 1e-05, "num_attention_heads": 12, "num_hidden_layers": 22, "pad_token_id": 1, "position_embedding_type": "absolute", "reference_compile": true, "sep_token_id": 2, "sparse_pred_ignore_index": -100, "sparse_prediction": false, "tokenizer_class": "DebertaV2TokenizerFast", "torch_dtype": "float32", "transformers_version": "4.47.1", "vocab_size": 65000 }