foxxy-hm commited on
Commit
e4638f2
·
1 Parent(s): 5ab442b

Upload processor

Browse files
preprocessor_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "processor_class": "Wav2Vec2Processor",
8
+ "return_attention_mask": false,
9
+ "sampling_rate": 16000
10
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "pad_token": "[PAD]",
5
+ "unk_token": "[UNK]"
6
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "clean_up_tokenization_spaces": true,
4
+ "do_lower_case": false,
5
+ "eos_token": "</s>",
6
+ "model_max_length": 1000000000000000019884624838656,
7
+ "pad_token": "[PAD]",
8
+ "processor_class": "Wav2Vec2Processor",
9
+ "replace_word_delimiter_char": " ",
10
+ "target_lang": null,
11
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
12
+ "unk_token": "[UNK]",
13
+ "word_delimiter_token": "|"
14
+ }
vocab.json CHANGED
@@ -1 +1,112 @@
1
- {"\u00e2": 0, "\u00f4": 1, "\u1ef7": 2, "\u1eeb": 3, "\u1ec3": 4, "\u0111": 5, "\u1ef1": 6, "i": 7, "r": 8, "\u1ec1": 9, "d": 10, "\u1ecf": 11, "\u1eab": 12, "\u1eb7": 13, "l": 14, "\u00e8": 15, "\u00e0": 16, "\u00f9": 17, "v": 18, "\u1ec7": 19, "\u1ebd": 20, "8": 21, "\u0443": 22, "\u1ef3": 23, "7": 24, "5": 25, "z": 26, "\u01b0": 27, "\u01a1": 28, "4": 29, "f": 30, "\u1ec5": 31, "\u1ec9": 32, "\u0129": 33, "\u0169": 34, "2": 35, "%": 36, "0": 37, "\u1ee3": 38, "t": 39, "\u00fa": 40, "\u1ecb": 41, "\u1eb5": 42, "\u00e9": 43, "3": 44, "\u1eb3": 45, "k": 46, "h": 47, "\u1edf": 48, "\u00f3": 49, "\u00e1": 50, "1": 51, "\u1ed9": 52, "g": 53, "\u1edb": 54, "\u1eef": 55, "9": 56, "\u1ea7": 57, "\u0323": 58, "\u1edd": 59, "\u00ec": 60, "o": 61, "\u1eaf": 62, "c": 63, "p": 64, "\u00e3": 65, "q": 66, "\u00fd": 67, "\u1eb9": 68, "u": 69, "b": 70, "\u1ebf": 71, "\u1ef5": 72, "\u1eed": 73, "\u1ea5": 74, "\u1ed3": 75, "m": 76, "\u1ea3": 77, "\u1ed7": 78, "\u1ead": 79, "\u00ed": 80, "\u00f2": 81, "\u00ea": 82, "\u1ee9": 83, "\u1ed5": 84, "\u1ef9": 85, "\u00f5": 86, "\u1ea1": 87, "\u1ea9": 88, "\u0301": 89, "w": 90, "\u1ebb": 91, "s": 92, "\u1ecd": 93, "6": 94, "a": 95, "n": 97, "y": 98, "\u1ed1": 99, "\u0300": 100, "\u1ee1": 101, "e": 102, "\u1eb1": 103, "\u1ee5": 104, "\u1ee7": 105, "\u0103": 106, "x": 107, "|": 96, "[UNK]": 108, "[PAD]": 109}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "%": 36,
3
+ "0": 37,
4
+ "1": 51,
5
+ "2": 35,
6
+ "3": 44,
7
+ "4": 29,
8
+ "5": 25,
9
+ "6": 94,
10
+ "7": 24,
11
+ "8": 21,
12
+ "9": 56,
13
+ "[PAD]": 109,
14
+ "[UNK]": 108,
15
+ "a": 95,
16
+ "b": 70,
17
+ "c": 63,
18
+ "d": 10,
19
+ "e": 102,
20
+ "f": 30,
21
+ "g": 53,
22
+ "h": 47,
23
+ "i": 7,
24
+ "k": 46,
25
+ "l": 14,
26
+ "m": 76,
27
+ "n": 97,
28
+ "o": 61,
29
+ "p": 64,
30
+ "q": 66,
31
+ "r": 8,
32
+ "s": 92,
33
+ "t": 39,
34
+ "u": 69,
35
+ "v": 18,
36
+ "w": 90,
37
+ "x": 107,
38
+ "y": 98,
39
+ "z": 26,
40
+ "|": 96,
41
+ "à": 16,
42
+ "á": 50,
43
+ "â": 0,
44
+ "ã": 65,
45
+ "è": 15,
46
+ "é": 43,
47
+ "ê": 82,
48
+ "ì": 60,
49
+ "í": 80,
50
+ "ò": 81,
51
+ "ó": 49,
52
+ "ô": 1,
53
+ "õ": 86,
54
+ "ù": 17,
55
+ "ú": 40,
56
+ "ý": 67,
57
+ "ă": 106,
58
+ "đ": 5,
59
+ "ĩ": 33,
60
+ "ũ": 34,
61
+ "ơ": 28,
62
+ "ư": 27,
63
+ "̀": 100,
64
+ "́": 89,
65
+ "̣": 58,
66
+ "у": 22,
67
+ "ạ": 87,
68
+ "ả": 77,
69
+ "ấ": 74,
70
+ "ầ": 57,
71
+ "ẩ": 88,
72
+ "ẫ": 12,
73
+ "ậ": 79,
74
+ "ắ": 62,
75
+ "ằ": 103,
76
+ "ẳ": 45,
77
+ "ẵ": 42,
78
+ "ặ": 13,
79
+ "ẹ": 68,
80
+ "ẻ": 91,
81
+ "ẽ": 20,
82
+ "ế": 71,
83
+ "ề": 9,
84
+ "ể": 4,
85
+ "ễ": 31,
86
+ "ệ": 19,
87
+ "ỉ": 32,
88
+ "ị": 41,
89
+ "ọ": 93,
90
+ "ỏ": 11,
91
+ "ố": 99,
92
+ "ồ": 75,
93
+ "ổ": 84,
94
+ "ỗ": 78,
95
+ "ộ": 52,
96
+ "ớ": 54,
97
+ "ờ": 59,
98
+ "ở": 48,
99
+ "ỡ": 101,
100
+ "ợ": 38,
101
+ "ụ": 104,
102
+ "ủ": 105,
103
+ "ứ": 83,
104
+ "ừ": 3,
105
+ "ử": 73,
106
+ "ữ": 55,
107
+ "ự": 6,
108
+ "ỳ": 23,
109
+ "ỵ": 72,
110
+ "ỷ": 2,
111
+ "ỹ": 85
112
+ }