foxxy-hm commited on
Commit
dcb83ae
·
1 Parent(s): 5a0ad96

Upload processor

Browse files
preprocessor_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "processor_class": "Wav2Vec2Processor",
8
+ "return_attention_mask": false,
9
+ "sampling_rate": 16000
10
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "pad_token": "[PAD]",
5
+ "unk_token": "[UNK]"
6
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "clean_up_tokenization_spaces": true,
4
+ "do_lower_case": false,
5
+ "eos_token": "</s>",
6
+ "model_max_length": 1000000000000000019884624838656,
7
+ "pad_token": "[PAD]",
8
+ "processor_class": "Wav2Vec2Processor",
9
+ "replace_word_delimiter_char": " ",
10
+ "target_lang": null,
11
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
12
+ "unk_token": "[UNK]",
13
+ "word_delimiter_token": "|"
14
+ }
vocab.json CHANGED
@@ -1 +1,107 @@
1
- {"q": 0, "\u0169": 1, "\u00fa": 2, "o": 3, "\u1ec3": 4, "\u0111": 5, "w": 6, "\u1ebd": 7, "\u1eed": 8, "\u01a1": 9, "\u1edf": 10, "\u1ee7": 11, "z": 12, "\u1ec7": 13, "2": 14, "8": 15, "k": 16, "\u1ed5": 17, "7": 18, "\u1ee5": 19, "\u1eeb": 20, "\u1ebf": 21, "p": 22, "\u1ec1": 23, "\u1eb3": 24, "\u0129": 25, "\u1ecd": 26, "n": 27, "r": 28, "\u00e3": 29, "\u1eaf": 30, "\u1ef7": 31, "\u1ec9": 32, "\u1ead": 33, "\u00e1": 34, "4": 35, "\u1ed3": 36, "g": 37, "\u1ed9": 38, "\u0103": 39, "h": 40, "/": 41, "\u00f3": 42, "e": 43, "\u00e9": 44, "c": 45, "\u00e2": 46, "6": 47, "\u1ef1": 48, "\u00ed": 49, "\u1eb7": 50, "9": 51, "\u1ebb": 52, "\u00e0": 53, "%": 54, "\u1ed1": 55, "l": 56, "b": 57, "\u1ecb": 58, "v": 59, "\u1eb1": 60, "d": 61, "\u1ea5": 62, "\u1ea1": 63, "f": 64, "\u1eef": 65, "u": 66, "\u00f2": 67, "\u00ea": 68, "\u1eab": 69, "\u1eb5": 70, "i": 71, "\u00f4": 72, "\u1ea9": 73, "t": 74, "x": 75, "3": 76, "\u1edd": 77, "\u1ef3": 78, "\u1eb9": 79, "\u1edb": 80, "\u1ecf": 81, "y": 82, "a": 83, "\u1ec5": 84, "\u1ed7": 86, "1": 87, "\u00e8": 88, "\u1ee1": 89, "\u00ec": 90, "\u00f9": 91, "\u1ee3": 92, "0": 93, "\u00fd": 94, "5": 95, "\u00f5": 96, "\u1ee9": 97, "\u1ea7": 98, "m": 99, "\u1ea3": 100, "s": 101, "\u01b0": 102, "|": 85, "[UNK]": 103, "[PAD]": 104}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "%": 54,
3
+ "/": 41,
4
+ "0": 93,
5
+ "1": 87,
6
+ "2": 14,
7
+ "3": 76,
8
+ "4": 35,
9
+ "5": 95,
10
+ "6": 47,
11
+ "7": 18,
12
+ "8": 15,
13
+ "9": 51,
14
+ "[PAD]": 104,
15
+ "[UNK]": 103,
16
+ "a": 83,
17
+ "b": 57,
18
+ "c": 45,
19
+ "d": 61,
20
+ "e": 43,
21
+ "f": 64,
22
+ "g": 37,
23
+ "h": 40,
24
+ "i": 71,
25
+ "k": 16,
26
+ "l": 56,
27
+ "m": 99,
28
+ "n": 27,
29
+ "o": 3,
30
+ "p": 22,
31
+ "q": 0,
32
+ "r": 28,
33
+ "s": 101,
34
+ "t": 74,
35
+ "u": 66,
36
+ "v": 59,
37
+ "w": 6,
38
+ "x": 75,
39
+ "y": 82,
40
+ "z": 12,
41
+ "|": 85,
42
+ "à": 53,
43
+ "á": 34,
44
+ "â": 46,
45
+ "ã": 29,
46
+ "è": 88,
47
+ "é": 44,
48
+ "ê": 68,
49
+ "ì": 90,
50
+ "í": 49,
51
+ "ò": 67,
52
+ "ó": 42,
53
+ "ô": 72,
54
+ "õ": 96,
55
+ "ù": 91,
56
+ "ú": 2,
57
+ "ý": 94,
58
+ "ă": 39,
59
+ "đ": 5,
60
+ "ĩ": 25,
61
+ "ũ": 1,
62
+ "ơ": 9,
63
+ "ư": 102,
64
+ "ạ": 63,
65
+ "ả": 100,
66
+ "ấ": 62,
67
+ "ầ": 98,
68
+ "ẩ": 73,
69
+ "ẫ": 69,
70
+ "ậ": 33,
71
+ "ắ": 30,
72
+ "ằ": 60,
73
+ "ẳ": 24,
74
+ "ẵ": 70,
75
+ "ặ": 50,
76
+ "ẹ": 79,
77
+ "ẻ": 52,
78
+ "ẽ": 7,
79
+ "ế": 21,
80
+ "ề": 23,
81
+ "ể": 4,
82
+ "ễ": 84,
83
+ "ệ": 13,
84
+ "ỉ": 32,
85
+ "ị": 58,
86
+ "ọ": 26,
87
+ "ỏ": 81,
88
+ "ố": 55,
89
+ "ồ": 36,
90
+ "ổ": 17,
91
+ "ỗ": 86,
92
+ "ộ": 38,
93
+ "ớ": 80,
94
+ "ờ": 77,
95
+ "ở": 10,
96
+ "ỡ": 89,
97
+ "ợ": 92,
98
+ "ụ": 19,
99
+ "ủ": 11,
100
+ "ứ": 97,
101
+ "ừ": 20,
102
+ "ử": 8,
103
+ "ữ": 65,
104
+ "ự": 48,
105
+ "ỳ": 78,
106
+ "ỷ": 31
107
+ }