nrshoudi commited on
Commit
82a736e
1 Parent(s): 3b19454

Upload tokenizer

Browse files
Files changed (4) hide show
  1. README.md +1 -1
  2. added_tokens.json +2 -2
  3. tokenizer_config.json +4 -4
  4. vocab.json +117 -35
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
  license: apache-2.0
3
- base_model: facebook/hubert-large-ll60k
4
  tags:
5
  - generated_from_trainer
 
6
  metrics:
7
  - wer
8
  model-index:
 
1
  ---
2
  license: apache-2.0
 
3
  tags:
4
  - generated_from_trainer
5
+ base_model: facebook/hubert-large-ll60k
6
  metrics:
7
  - wer
8
  model-index:
added_tokens.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
- "</s>": 37,
3
- "<s>": 36
4
  }
 
1
  {
2
+ "</s>": 119,
3
+ "<s>": 118
4
  }
tokenizer_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "added_tokens_decoder": {
3
- "34": {
4
  "content": "[UNK]",
5
  "lstrip": false,
6
  "normalized": true,
@@ -8,7 +8,7 @@
8
  "single_word": false,
9
  "special": true
10
  },
11
- "35": {
12
  "content": "[PAD]",
13
  "lstrip": false,
14
  "normalized": true,
@@ -16,7 +16,7 @@
16
  "single_word": false,
17
  "special": true
18
  },
19
- "36": {
20
  "content": "<s>",
21
  "lstrip": false,
22
  "normalized": true,
@@ -24,7 +24,7 @@
24
  "single_word": false,
25
  "special": true
26
  },
27
- "37": {
28
  "content": "</s>",
29
  "lstrip": false,
30
  "normalized": true,
 
1
  {
2
  "added_tokens_decoder": {
3
+ "116": {
4
  "content": "[UNK]",
5
  "lstrip": false,
6
  "normalized": true,
 
8
  "single_word": false,
9
  "special": true
10
  },
11
+ "117": {
12
  "content": "[PAD]",
13
  "lstrip": false,
14
  "normalized": true,
 
16
  "single_word": false,
17
  "special": true
18
  },
19
+ "118": {
20
  "content": "<s>",
21
  "lstrip": false,
22
  "normalized": true,
 
24
  "single_word": false,
25
  "special": true
26
  },
27
+ "119": {
28
  "content": "</s>",
29
  "lstrip": false,
30
  "normalized": true,
vocab.json CHANGED
@@ -1,38 +1,120 @@
1
  {
2
  " ": 0,
3
- "!": 1,
4
- "\"": 2,
5
- "'": 3,
6
- ",": 4,
7
- ".": 5,
8
- ":": 6,
9
- "?": 7,
10
- "A": 8,
11
- "B": 9,
12
- "C": 10,
13
- "D": 11,
14
- "E": 12,
15
- "F": 13,
16
- "G": 14,
17
- "H": 15,
18
- "I": 16,
19
- "J": 17,
20
- "K": 18,
21
- "L": 19,
22
- "M": 20,
23
- "N": 21,
24
- "O": 22,
25
- "P": 23,
26
- "Q": 24,
27
- "R": 25,
28
- "S": 26,
29
- "T": 27,
30
- "U": 28,
31
- "V": 29,
32
- "W": 30,
33
- "X": 31,
34
- "Y": 32,
35
- "Z": 33,
36
- "[PAD]": 35,
37
- "[UNK]": 34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  }
 
1
  {
2
  " ": 0,
3
+ "<DEL>": 1,
4
+ "<unk>": 2,
5
+ "AA": 3,
6
+ "AA0": 4,
7
+ "AA1": 5,
8
+ "AA2": 6,
9
+ "AE": 7,
10
+ "AE*": 8,
11
+ "AE0": 9,
12
+ "AE1": 10,
13
+ "AE2": 11,
14
+ "AH": 12,
15
+ "AH*": 13,
16
+ "AH0": 14,
17
+ "AH1": 15,
18
+ "AH2": 16,
19
+ "AO": 17,
20
+ "AO*": 18,
21
+ "AO0": 19,
22
+ "AO1": 20,
23
+ "AO2": 21,
24
+ "AR": 22,
25
+ "AW": 23,
26
+ "AW0": 24,
27
+ "AW1": 25,
28
+ "AW2": 26,
29
+ "AY": 27,
30
+ "AY*": 28,
31
+ "AY0": 29,
32
+ "AY1": 30,
33
+ "AY2": 31,
34
+ "B": 32,
35
+ "B*": 33,
36
+ "CH": 34,
37
+ "CH*": 35,
38
+ "D": 36,
39
+ "D*": 37,
40
+ "DH": 38,
41
+ "DR": 39,
42
+ "DZ": 40,
43
+ "EH": 41,
44
+ "EH*": 42,
45
+ "EH0": 43,
46
+ "EH1": 44,
47
+ "EH2": 45,
48
+ "ER": 46,
49
+ "ER*": 47,
50
+ "ER0": 48,
51
+ "ER1": 49,
52
+ "EY": 50,
53
+ "EY*": 51,
54
+ "EY0": 52,
55
+ "EY1": 53,
56
+ "EY2": 54,
57
+ "F": 55,
58
+ "F*": 56,
59
+ "G": 57,
60
+ "G*": 58,
61
+ "HH": 59,
62
+ "IH": 60,
63
+ "IH*": 61,
64
+ "IH0": 62,
65
+ "IH1": 63,
66
+ "IH2": 64,
67
+ "IR": 65,
68
+ "IY": 66,
69
+ "IY*": 67,
70
+ "IY0": 68,
71
+ "IY1": 69,
72
+ "JH": 70,
73
+ "JH*": 71,
74
+ "K": 72,
75
+ "K*": 73,
76
+ "L": 74,
77
+ "L*": 75,
78
+ "M": 76,
79
+ "M*": 77,
80
+ "N": 78,
81
+ "N*": 79,
82
+ "NG": 80,
83
+ "NG*": 81,
84
+ "OW": 82,
85
+ "OW*": 83,
86
+ "OW0": 84,
87
+ "OW1": 85,
88
+ "OY0": 86,
89
+ "OY1": 87,
90
+ "P": 88,
91
+ "P*": 89,
92
+ "R": 90,
93
+ "R*": 91,
94
+ "S": 92,
95
+ "S*": 93,
96
+ "SH": 94,
97
+ "T": 95,
98
+ "T*": 96,
99
+ "TH": 97,
100
+ "TH*": 98,
101
+ "TR": 99,
102
+ "TS": 100,
103
+ "UH": 101,
104
+ "UH*": 102,
105
+ "UH0": 103,
106
+ "UH1": 104,
107
+ "UW": 105,
108
+ "UW0": 106,
109
+ "UW1": 107,
110
+ "UW2": 108,
111
+ "V": 109,
112
+ "V*": 110,
113
+ "W": 111,
114
+ "W*": 112,
115
+ "Y": 113,
116
+ "Z": 114,
117
+ "ZH": 115,
118
+ "[PAD]": 117,
119
+ "[UNK]": 116
120
  }