karline commited on
Commit
1073961
·
1 Parent(s): 5eabd70

Upload tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +6 -0
  2. tokenizer_config.json +10 -0
  3. vocab.json +77 -0
special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "pad_token": "[PAD]",
5
+ "unk_token": "[UNK]"
6
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "do_lower_case": false,
4
+ "eos_token": "</s>",
5
+ "pad_token": "[PAD]",
6
+ "replace_word_delimiter_char": " ",
7
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
8
+ "unk_token": "[UNK]",
9
+ "word_delimiter_token": "|"
10
+ }
vocab.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "'": 71,
3
+ "[PAD]": 74,
4
+ "[UNK]": 73,
5
+ "a": 35,
6
+ "b": 14,
7
+ "c": 25,
8
+ "d": 12,
9
+ "e": 32,
10
+ "f": 54,
11
+ "g": 40,
12
+ "h": 21,
13
+ "i": 39,
14
+ "j": 38,
15
+ "k": 27,
16
+ "l": 30,
17
+ "m": 53,
18
+ "n": 23,
19
+ "o": 34,
20
+ "p": 72,
21
+ "q": 15,
22
+ "r": 43,
23
+ "s": 9,
24
+ "t": 5,
25
+ "u": 64,
26
+ "v": 18,
27
+ "w": 69,
28
+ "x": 47,
29
+ "y": 36,
30
+ "z": 48,
31
+ "|": 62,
32
+ "ꯀ": 51,
33
+ "ꯁ": 11,
34
+ "ꯂ": 45,
35
+ "ꯃ": 50,
36
+ "ꯄ": 70,
37
+ "ꯅ": 13,
38
+ "ꯆ": 22,
39
+ "ꯇ": 59,
40
+ "ꯈ": 63,
41
+ "ꯉ": 2,
42
+ "ꯊ": 16,
43
+ "ꯋ": 41,
44
+ "ꯌ": 24,
45
+ "ꯍ": 66,
46
+ "ꯎ": 42,
47
+ "ꯏ": 46,
48
+ "ꯐ": 6,
49
+ "ꯑ": 3,
50
+ "ꯒ": 0,
51
+ "ꯓ": 49,
52
+ "ꯔ": 52,
53
+ "ꯕ": 8,
54
+ "ꯖ": 20,
55
+ "ꯗ": 44,
56
+ "ꯘ": 58,
57
+ "ꯙ": 33,
58
+ "ꯚ": 17,
59
+ "ꯛ": 26,
60
+ "ꯜ": 1,
61
+ "ꯝ": 60,
62
+ "ꯞ": 68,
63
+ "ꯟ": 4,
64
+ "ꯠ": 55,
65
+ "ꯡ": 56,
66
+ "ꯢ": 19,
67
+ "ꯣ": 65,
68
+ "ꯤ": 7,
69
+ "ꯥ": 10,
70
+ "ꯦ": 29,
71
+ "ꯧ": 67,
72
+ "ꯨ": 57,
73
+ "ꯩ": 37,
74
+ "ꯪ": 28,
75
+ "꯬": 31,
76
+ "꯭": 61
77
+ }