{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "UNK", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "PAD", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "WORD_BOUNDARY", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "UTT_BOUNDARY", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "Sequence", "normalizers": [ { "type": "Strip", "strip_left": true, "strip_right": true } ] }, "pre_tokenizer": { "type": "WhitespaceSplit" }, "post_processor": { "type": "TemplateProcessing", "single": [ { "SpecialToken": { "id": "UTT_BOUNDARY", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } } ], "pair": [ { "SpecialToken": { "id": "UTT_BOUNDARY", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "UTT_BOUNDARY", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } } ], "special_tokens": { "UTT_BOUNDARY": { "id": "UTT_BOUNDARY", "ids": [ 3 ], "tokens": [ "UTT_BOUNDARY" ] } } }, "decoder": null, "model": { "type": "WordLevel", "vocab": { "UNK": 0, "PAD": 1, "WORD_BOUNDARY": 2, "UTT_BOUNDARY": 3, "a˧˥": 4, "u˧˥": 5, "a˥": 6, "au": 7, "n": 8, "a˥˩": 9, "ʃ̺": 10, "ɻ̩˥˩": 11, "ə˧˥": 12, "m": 13, "ɤ": 14, "p": 15, "j": 16, "e˧˥": 17, "kʰ": 18, "k": 19, "ɤ˥˩": 20, "w": 21, "o˥": 22, "t̠ʃ̺ʰ": 23, "ə˥": 24, "ŋ": 25, "t": 26, "ʊ˥": 27, "ɕ": 28, "i": 29, "a": 30, "l": 31, "au˧˩˧": 32, "x": 33, "u˧˩˧": 34, "i˥": 35, "ei˧˩˧": 36, "pʰ": 37, "i˧˥": 38, "ai˧˥": 39, "ou˧˩˧": 40, "ɤ˧˥": 41, "o˧˩˧": 42, "tɕ": 43, "au˥˩": 44, "ts": 45, "ə˧˩˧": 46, "ɤ˥": 47, "ei˧˥": 48, "ʊ˧˥": 49, "i˧˩˧": 50, "t̠ʃ̺": 51, "ɻ̩˧˩˧": 52, "ei˥˩": 53, "s": 54, "u˥˩": 55, "ɹ̪̩": 56, "ai˥": 57, "u˥": 58, "tɕʰ": 59, "a˧˩˧": 60, "ai˥˩": 61, "ɛ˥˩": 62, "f": 63, "i˥˩": 64, "y˥˩": 65, "au˧˥": 66, "ɻ": 67, "ou˥˩": 68, "e˥": 69, "tʰ": 70, "ɹ̪̩˥˩": 71, "ɛ˧˥": 72, "au˥": 73, "ou˧˥": 74, "e˧˩˧": 75, "ɛ˥": 76, "ɻ̩˥": 77, "ɥ": 78, "ɹ̪̩˧˩˧": 79, "ai˧˩˧": 80, "ou˥": 81, "o˥˩": 82, "ɛ˧˩˧": 83, "ʊ˧˩˧": 84, "ɔ˥": 85, "tsʰ": 86, "ei": 87, "ə˥˩": 88, "o": 89, "ʊ˥˩": 90, "ou": 91, "ɤ˧˩˧": 92, "o˧˥": 93, "ei˥": 94, "e˥˩": 95, "ɚ˧˩˧": 96, "y˥": 97, "ɚ˥˩": 98, "y˧˥": 99, "ɻ̩": 100, "y˧˩˧": 101, "ɹ̪̩˥": 102, "ɻ̩˧˥": 103, "u": 104, "ə": 105, "ai": 106, "ʊ": 107, "e": 108, "ɚ˧˥": 109, "ɔ˥˩": 110, "ɹ̪̩˧˥": 111, "ɛ": 112, "y": 113, "m˧˥": 114 }, "unk_token": "UNK" } }