Upload tokenizer
Browse files- tokenizer.json +149 -1
- vocab.json +1 -1
tokenizer.json
CHANGED
@@ -105,7 +105,155 @@
|
|
105 |
"UNK": 0,
|
106 |
"PAD": 1,
|
107 |
"WORD_BOUNDARY": 2,
|
108 |
-
"UTT_BOUNDARY": 3
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
},
|
110 |
"unk_token": "UNK"
|
111 |
}
|
|
|
105 |
"UNK": 0,
|
106 |
"PAD": 1,
|
107 |
"WORD_BOUNDARY": 2,
|
108 |
+
"UTT_BOUNDARY": 3,
|
109 |
+
"aː˧": 4,
|
110 |
+
"t": 5,
|
111 |
+
"ɐ˥": 6,
|
112 |
+
"k": 7,
|
113 |
+
"l": 8,
|
114 |
+
"j": 9,
|
115 |
+
"ʊ˥": 10,
|
116 |
+
"aː˧˩̰": 11,
|
117 |
+
"ɛː˥": 12,
|
118 |
+
"n": 13,
|
119 |
+
"ei˩˧": 14,
|
120 |
+
"w": 15,
|
121 |
+
"aː˨": 16,
|
122 |
+
"ɐi˧˥": 17,
|
123 |
+
"m̩˧˥": 18,
|
124 |
+
"m": 19,
|
125 |
+
"ou˥": 20,
|
126 |
+
"aː˧˥": 21,
|
127 |
+
"ei˥": 22,
|
128 |
+
"iː˧": 23,
|
129 |
+
"ts": 24,
|
130 |
+
"ɔː˧˥": 25,
|
131 |
+
"tʰ": 26,
|
132 |
+
"iː˥": 27,
|
133 |
+
"f": 28,
|
134 |
+
"aːĭ˧": 29,
|
135 |
+
"ɐ˨": 30,
|
136 |
+
"p": 31,
|
137 |
+
"h": 32,
|
138 |
+
"ɵy˧": 33,
|
139 |
+
"aː˥": 34,
|
140 |
+
"ou˨": 35,
|
141 |
+
"ɔː˧": 36,
|
142 |
+
"ɐi˧˩̰": 37,
|
143 |
+
"uː˧": 38,
|
144 |
+
"ŋ": 39,
|
145 |
+
"s": 40,
|
146 |
+
"ɔːĭ˥": 41,
|
147 |
+
"ɐu˨": 42,
|
148 |
+
"iː˨": 43,
|
149 |
+
"ei˧˥": 44,
|
150 |
+
"ɐi˨": 45,
|
151 |
+
"ʊ˧˩̰": 46,
|
152 |
+
"ʊ˨": 47,
|
153 |
+
"aː˩˧": 48,
|
154 |
+
"aːĭ˧˥": 49,
|
155 |
+
"ɔː˨": 50,
|
156 |
+
"ɛː˩˧": 51,
|
157 |
+
"ɪ˨": 52,
|
158 |
+
"iːŭ˧": 53,
|
159 |
+
"ɛː˧˩̰": 54,
|
160 |
+
"ɪ˧˥": 55,
|
161 |
+
"̩˧˩̰": 56,
|
162 |
+
"ɵ˧˥": 57,
|
163 |
+
"ei˧": 58,
|
164 |
+
"ɐu˧˩̰": 59,
|
165 |
+
"m̩˧": 60,
|
166 |
+
"ɐu˧˥": 61,
|
167 |
+
"ɐu˩˧": 62,
|
168 |
+
"ɐi˥": 63,
|
169 |
+
"ɔː˥": 64,
|
170 |
+
"ɔːĭ˧": 65,
|
171 |
+
"ou˧˥": 66,
|
172 |
+
"ou˩˧": 67,
|
173 |
+
"ɐ˧": 68,
|
174 |
+
"tsʰ": 69,
|
175 |
+
"ɛː˧˥": 70,
|
176 |
+
"iː˧˥": 71,
|
177 |
+
"ɔː˩˧": 72,
|
178 |
+
"kʰ": 73,
|
179 |
+
"ɐ˧˩̰": 74,
|
180 |
+
"aːŭ˧˥": 75,
|
181 |
+
"pʰ": 76,
|
182 |
+
"aːĭ˧˩̰": 77,
|
183 |
+
"ɵy˩˧": 78,
|
184 |
+
"ɵ˧": 79,
|
185 |
+
"ɛː˧": 80,
|
186 |
+
"ei˧˩̰": 81,
|
187 |
+
"uː˧˥": 82,
|
188 |
+
"ɔː˧˩̰": 83,
|
189 |
+
"ɛː˨": 84,
|
190 |
+
"uː˥": 85,
|
191 |
+
"ʊ˧": 86,
|
192 |
+
"iː˧˩̰": 87,
|
193 |
+
"yː˨": 88,
|
194 |
+
"aːŭ˧": 89,
|
195 |
+
"œː˩˧": 90,
|
196 |
+
"ɐ˧˥": 91,
|
197 |
+
"iː˩˧": 92,
|
198 |
+
"ɪ˧˩̰": 93,
|
199 |
+
"iːŭ˧˩̰": 94,
|
200 |
+
"œː˧˥": 95,
|
201 |
+
"yː˧": 96,
|
202 |
+
"uːĭ˩˧": 97,
|
203 |
+
"ɵy˧˥": 98,
|
204 |
+
"yː˧˩̰": 99,
|
205 |
+
"ɔːĭ˧˥": 100,
|
206 |
+
"ɛː": 101,
|
207 |
+
"u˨": 102,
|
208 |
+
"ou˧": 103,
|
209 |
+
"ei˨": 104,
|
210 |
+
"ɐu˥": 105,
|
211 |
+
"ɵ˥": 106,
|
212 |
+
"uː˧˩̰": 107,
|
213 |
+
"yː˥": 108,
|
214 |
+
"ɪ˥": 109,
|
215 |
+
"œː˥": 110,
|
216 |
+
"œː˧˩̰": 111,
|
217 |
+
"aːĭ˨": 112,
|
218 |
+
"ɐ˩˧": 113,
|
219 |
+
"œː˧": 114,
|
220 |
+
"uːĭ˧˥": 115,
|
221 |
+
"ɐu˧": 116,
|
222 |
+
"ɐi˧": 117,
|
223 |
+
"ou˧˩̰": 118,
|
224 |
+
"aːĭ˥": 119,
|
225 |
+
"aːŭ˥": 120,
|
226 |
+
"yː˧˥": 121,
|
227 |
+
"iːŭ˥": 122,
|
228 |
+
"ɔːĭ˨": 123,
|
229 |
+
"ʊ˧˥": 124,
|
230 |
+
"m̩˥": 125,
|
231 |
+
"iːŭ˧˥": 126,
|
232 |
+
"ɐi˩˧": 127,
|
233 |
+
"ɵy˥": 128,
|
234 |
+
"uːĭ˧": 129,
|
235 |
+
"ɵy˧˩̰": 130,
|
236 |
+
"uːĭ˥": 131,
|
237 |
+
"aːŭ˧˩̰": 132,
|
238 |
+
"yː˩˧": 133,
|
239 |
+
"ɔːĭ˧˩̰": 134,
|
240 |
+
"aːŭ˩˧": 135,
|
241 |
+
"aːĭ˩˧": 136,
|
242 |
+
"uːĭ˨": 137,
|
243 |
+
"œː˨": 138,
|
244 |
+
"uː˨": 139,
|
245 |
+
"ɵy˨": 140,
|
246 |
+
"aːŭ˨": 141,
|
247 |
+
"m̩˩˧": 142,
|
248 |
+
"ŋ˩˧": 143,
|
249 |
+
"ɪ˧": 144,
|
250 |
+
"m̩˨": 145,
|
251 |
+
"iːŭ˩˧": 146,
|
252 |
+
"iːŭ˨": 147,
|
253 |
+
"ɵ˨": 148,
|
254 |
+
"uːĭ˧˩̰": 149,
|
255 |
+
"uː˩˧": 150,
|
256 |
+
"ɵ˧˩̰": 151
|
257 |
},
|
258 |
"unk_token": "UNK"
|
259 |
}
|
vocab.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"UNK":0,"PAD":1,"WORD_BOUNDARY":2,"UTT_BOUNDARY":3}
|
|
|
1 |
+
{"UNK":0,"PAD":1,"WORD_BOUNDARY":2,"UTT_BOUNDARY":3,"aː˧":4,"t":5,"ɐ˥":6,"k":7,"l":8,"j":9,"ʊ˥":10,"aː˧˩̰":11,"ɛː˥":12,"n":13,"ei˩˧":14,"w":15,"aː˨":16,"ɐi˧˥":17,"m̩˧˥":18,"m":19,"ou˥":20,"aː˧˥":21,"ei˥":22,"iː˧":23,"ts":24,"ɔː˧˥":25,"tʰ":26,"iː˥":27,"f":28,"aːĭ˧":29,"ɐ˨":30,"p":31,"h":32,"ɵy˧":33,"aː˥":34,"ou˨":35,"ɔː˧":36,"ɐi˧˩̰":37,"uː˧":38,"ŋ":39,"s":40,"ɔːĭ˥":41,"ɐu˨":42,"iː˨":43,"ei˧˥":44,"ɐi˨":45,"ʊ˧˩̰":46,"ʊ˨":47,"aː˩˧":48,"aːĭ˧˥":49,"ɔː˨":50,"ɛː˩˧":51,"ɪ˨":52,"iːŭ˧":53,"ɛː˧˩̰":54,"ɪ˧˥":55,"̩˧˩̰":56,"ɵ˧˥":57,"ei˧":58,"ɐu˧˩̰":59,"m̩˧":60,"ɐu˧˥":61,"ɐu˩˧":62,"ɐi˥":63,"ɔː˥":64,"ɔːĭ˧":65,"ou˧˥":66,"ou˩˧":67,"ɐ˧":68,"tsʰ":69,"ɛː˧˥":70,"iː˧˥":71,"ɔː˩˧":72,"kʰ":73,"ɐ˧˩̰":74,"aːŭ˧˥":75,"pʰ":76,"aːĭ˧˩̰":77,"ɵy˩˧":78,"ɵ˧":79,"ɛː˧":80,"ei˧˩̰":81,"uː˧˥":82,"ɔː˧˩̰":83,"ɛː˨":84,"uː˥":85,"ʊ˧":86,"iː˧˩̰":87,"yː˨":88,"aːŭ˧":89,"œː˩˧":90,"ɐ˧˥":91,"iː˩˧":92,"ɪ˧˩̰":93,"iːŭ˧˩̰":94,"œː˧˥":95,"yː˧":96,"uːĭ˩˧":97,"ɵy˧˥":98,"yː˧˩̰":99,"ɔːĭ˧˥":100,"ɛː":101,"u˨":102,"ou˧":103,"ei˨":104,"ɐu˥":105,"ɵ˥":106,"uː˧˩̰":107,"yː˥":108,"ɪ˥":109,"œː˥":110,"œː˧˩̰":111,"aːĭ˨":112,"ɐ˩˧":113,"œː˧":114,"uːĭ˧˥":115,"ɐu˧":116,"ɐi˧":117,"ou˧˩̰":118,"aːĭ˥":119,"aːŭ˥":120,"yː˧˥":121,"iːŭ˥":122,"ɔːĭ˨":123,"ʊ˧˥":124,"m̩˥":125,"iːŭ˧˥":126,"ɐi˩˧":127,"ɵy˥":128,"uːĭ˧":129,"ɵy˧˩̰":130,"uːĭ˥":131,"aːŭ˧˩̰":132,"yː˩˧":133,"ɔːĭ˧˩̰":134,"aːŭ˩˧":135,"aːĭ˩˧":136,"uːĭ˨":137,"œː˨":138,"uː˨":139,"ɵy˨":140,"aːŭ˨":141,"m̩˩˧":142,"ŋ˩˧":143,"ɪ˧":144,"m̩˨":145,"iːŭ˩˧":146,"iːŭ˨":147,"ɵ˨":148,"uːĭ˧˩̰":149,"uː˩˧":150,"ɵ˧˩̰":151}
|