foxxy-hm commited on
Commit
4347067
·
1 Parent(s): 53c9672

Upload processor

Browse files
preprocessor_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "processor_class": "Wav2Vec2Processor",
8
+ "return_attention_mask": false,
9
+ "sampling_rate": 16000
10
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "pad_token": "[PAD]",
5
+ "unk_token": "[UNK]"
6
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "clean_up_tokenization_spaces": true,
4
+ "do_lower_case": false,
5
+ "eos_token": "</s>",
6
+ "model_max_length": 1000000000000000019884624838656,
7
+ "pad_token": "[PAD]",
8
+ "processor_class": "Wav2Vec2Processor",
9
+ "replace_word_delimiter_char": " ",
10
+ "target_lang": null,
11
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
12
+ "unk_token": "[UNK]",
13
+ "word_delimiter_token": "|"
14
+ }
vocab.json CHANGED
@@ -1 +1,112 @@
1
- {"c": 0, "\u1eef": 1, "i": 2, "\u1eb7": 3, "\u1eed": 4, "\u00e8": 5, "\u1edb": 6, "\u1ef3": 7, "\u1ea3": 8, "o": 9, "\u00e2": 10, "\u00fd": 11, "\u00e3": 12, "\u0300": 13, "t": 14, "\u1ebf": 15, "\u1eb1": 16, "\u1ef5": 17, "\u1ecd": 18, "\u00f9": 19, "\u01a1": 20, "\u1ecf": 21, "r": 22, "d": 23, "q": 24, "\u00f5": 25, "\u1eaf": 26, "h": 27, "\u1eeb": 28, "\u00f2": 29, "\u1ed5": 30, "5": 31, "a": 32, "z": 33, "\u1ead": 34, "\u1ef1": 35, "s": 36, "w": 37, "g": 38, "\u1edd": 39, "\u1ea1": 40, "\u1eb9": 41, "e": 42, "4": 43, "\u0323": 44, "\u0443": 45, "\u1ed1": 46, "\u1ed7": 47, "\u1ef7": 48, "\u00ec": 49, "\u01b0": 50, "3": 51, "\u1ebd": 52, "p": 53, "\u1ed3": 54, "\u1ec9": 55, "\u1ecb": 56, "\u1ea5": 57, "y": 58, "\u1ec1": 59, "x": 60, "\u1ee3": 61, "\u1ea9": 62, "\u0129": 63, "n": 64, "1": 65, "\u1eab": 66, "6": 67, "\u1edf": 68, "\u1ee1": 69, "9": 70, "\u00e1": 71, "v": 72, "\u00f3": 73, "\u0111": 74, "l": 75, "\u1ea7": 76, "\u0169": 77, "f": 78, "\u1eb3": 79, "8": 80, "\u00e9": 81, "0": 82, "\u1ebb": 83, "\u00ea": 84, "\u00fa": 85, "\u1ec5": 86, "\u1ee7": 87, "\u1ef9": 88, "\u0301": 89, "\u00ed": 90, "\u00e0": 91, "\u1ee9": 92, "2": 93, "%": 94, "\u0103": 95, "k": 96, "\u1ee5": 97, "\u1ec7": 98, "b": 100, "u": 101, "\u00f4": 102, "\u1eb5": 103, "\u1ec3": 104, "m": 105, "\u1ed9": 106, "7": 107, "|": 99, "[UNK]": 108, "[PAD]": 109}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "%": 94,
3
+ "0": 82,
4
+ "1": 65,
5
+ "2": 93,
6
+ "3": 51,
7
+ "4": 43,
8
+ "5": 31,
9
+ "6": 67,
10
+ "7": 107,
11
+ "8": 80,
12
+ "9": 70,
13
+ "[PAD]": 109,
14
+ "[UNK]": 108,
15
+ "a": 32,
16
+ "b": 100,
17
+ "c": 0,
18
+ "d": 23,
19
+ "e": 42,
20
+ "f": 78,
21
+ "g": 38,
22
+ "h": 27,
23
+ "i": 2,
24
+ "k": 96,
25
+ "l": 75,
26
+ "m": 105,
27
+ "n": 64,
28
+ "o": 9,
29
+ "p": 53,
30
+ "q": 24,
31
+ "r": 22,
32
+ "s": 36,
33
+ "t": 14,
34
+ "u": 101,
35
+ "v": 72,
36
+ "w": 37,
37
+ "x": 60,
38
+ "y": 58,
39
+ "z": 33,
40
+ "|": 99,
41
+ "à": 91,
42
+ "á": 71,
43
+ "â": 10,
44
+ "ã": 12,
45
+ "è": 5,
46
+ "é": 81,
47
+ "ê": 84,
48
+ "ì": 49,
49
+ "í": 90,
50
+ "ò": 29,
51
+ "ó": 73,
52
+ "ô": 102,
53
+ "õ": 25,
54
+ "ù": 19,
55
+ "ú": 85,
56
+ "ý": 11,
57
+ "ă": 95,
58
+ "đ": 74,
59
+ "ĩ": 63,
60
+ "ũ": 77,
61
+ "ơ": 20,
62
+ "ư": 50,
63
+ "̀": 13,
64
+ "́": 89,
65
+ "̣": 44,
66
+ "у": 45,
67
+ "ạ": 40,
68
+ "ả": 8,
69
+ "ấ": 57,
70
+ "ầ": 76,
71
+ "ẩ": 62,
72
+ "ẫ": 66,
73
+ "ậ": 34,
74
+ "ắ": 26,
75
+ "ằ": 16,
76
+ "ẳ": 79,
77
+ "ẵ": 103,
78
+ "ặ": 3,
79
+ "ẹ": 41,
80
+ "ẻ": 83,
81
+ "ẽ": 52,
82
+ "ế": 15,
83
+ "ề": 59,
84
+ "ể": 104,
85
+ "ễ": 86,
86
+ "ệ": 98,
87
+ "ỉ": 55,
88
+ "ị": 56,
89
+ "ọ": 18,
90
+ "ỏ": 21,
91
+ "ố": 46,
92
+ "ồ": 54,
93
+ "ổ": 30,
94
+ "ỗ": 47,
95
+ "ộ": 106,
96
+ "ớ": 6,
97
+ "ờ": 39,
98
+ "ở": 68,
99
+ "ỡ": 69,
100
+ "ợ": 61,
101
+ "ụ": 97,
102
+ "ủ": 87,
103
+ "ứ": 92,
104
+ "ừ": 28,
105
+ "ử": 4,
106
+ "ữ": 1,
107
+ "ự": 35,
108
+ "ỳ": 7,
109
+ "ỵ": 17,
110
+ "ỷ": 48,
111
+ "ỹ": 88
112
+ }