codebyzeb commited on
Commit
09eaaab
·
verified ·
1 Parent(s): 32264ad

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +149 -1
  2. vocab.json +1 -1
tokenizer.json CHANGED
@@ -105,7 +105,155 @@
105
  "UNK": 0,
106
  "PAD": 1,
107
  "WORD_BOUNDARY": 2,
108
- "UTT_BOUNDARY": 3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  },
110
  "unk_token": "UNK"
111
  }
 
105
  "UNK": 0,
106
  "PAD": 1,
107
  "WORD_BOUNDARY": 2,
108
+ "UTT_BOUNDARY": 3,
109
+ "aː˧": 4,
110
+ "t": 5,
111
+ "ɐ˥": 6,
112
+ "k": 7,
113
+ "l": 8,
114
+ "j": 9,
115
+ "ʊ˥": 10,
116
+ "aː˧˩̰": 11,
117
+ "ɛː˥": 12,
118
+ "n": 13,
119
+ "ei˩˧": 14,
120
+ "w": 15,
121
+ "aː˨": 16,
122
+ "ɐi˧˥": 17,
123
+ "m̩˧˥": 18,
124
+ "m": 19,
125
+ "ou˥": 20,
126
+ "aː˧˥": 21,
127
+ "ei˥": 22,
128
+ "iː˧": 23,
129
+ "ts": 24,
130
+ "ɔː˧˥": 25,
131
+ "tʰ": 26,
132
+ "iː˥": 27,
133
+ "f": 28,
134
+ "aːĭ˧": 29,
135
+ "ɐ˨": 30,
136
+ "p": 31,
137
+ "h": 32,
138
+ "ɵy˧": 33,
139
+ "aː˥": 34,
140
+ "ou˨": 35,
141
+ "ɔː˧": 36,
142
+ "ɐi˧˩̰": 37,
143
+ "uː˧": 38,
144
+ "ŋ": 39,
145
+ "s": 40,
146
+ "ɔːĭ˥": 41,
147
+ "ɐu˨": 42,
148
+ "iː˨": 43,
149
+ "ei˧˥": 44,
150
+ "ɐi˨": 45,
151
+ "ʊ˧˩̰": 46,
152
+ "ʊ˨": 47,
153
+ "aː˩˧": 48,
154
+ "aːĭ˧˥": 49,
155
+ "ɔː˨": 50,
156
+ "ɛː˩˧": 51,
157
+ "ɪ˨": 52,
158
+ "iːŭ˧": 53,
159
+ "ɛː˧˩̰": 54,
160
+ "ɪ˧˥": 55,
161
+ "̩˧˩̰": 56,
162
+ "ɵ˧˥": 57,
163
+ "ei˧": 58,
164
+ "ɐu˧˩̰": 59,
165
+ "m̩˧": 60,
166
+ "ɐu˧˥": 61,
167
+ "ɐu˩˧": 62,
168
+ "ɐi˥": 63,
169
+ "ɔː˥": 64,
170
+ "ɔːĭ˧": 65,
171
+ "ou˧˥": 66,
172
+ "ou˩˧": 67,
173
+ "ɐ˧": 68,
174
+ "tsʰ": 69,
175
+ "ɛː˧˥": 70,
176
+ "iː˧˥": 71,
177
+ "ɔː˩˧": 72,
178
+ "kʰ": 73,
179
+ "ɐ˧˩̰": 74,
180
+ "aːŭ˧˥": 75,
181
+ "pʰ": 76,
182
+ "aːĭ˧˩̰": 77,
183
+ "ɵy˩˧": 78,
184
+ "ɵ˧": 79,
185
+ "ɛː˧": 80,
186
+ "ei˧˩̰": 81,
187
+ "uː˧˥": 82,
188
+ "ɔː˧˩̰": 83,
189
+ "ɛː˨": 84,
190
+ "uː˥": 85,
191
+ "ʊ˧": 86,
192
+ "iː˧˩̰": 87,
193
+ "yː˨": 88,
194
+ "aːŭ˧": 89,
195
+ "œː˩˧": 90,
196
+ "ɐ˧˥": 91,
197
+ "iː˩˧": 92,
198
+ "ɪ˧˩̰": 93,
199
+ "iːŭ˧˩̰": 94,
200
+ "œː˧˥": 95,
201
+ "yː˧": 96,
202
+ "uːĭ˩˧": 97,
203
+ "ɵy˧˥": 98,
204
+ "yː˧˩̰": 99,
205
+ "ɔːĭ˧˥": 100,
206
+ "ɛː": 101,
207
+ "u˨": 102,
208
+ "ou˧": 103,
209
+ "ei˨": 104,
210
+ "ɐu˥": 105,
211
+ "ɵ˥": 106,
212
+ "uː˧˩̰": 107,
213
+ "yː˥": 108,
214
+ "ɪ˥": 109,
215
+ "œː˥": 110,
216
+ "œː˧˩̰": 111,
217
+ "aːĭ˨": 112,
218
+ "ɐ˩˧": 113,
219
+ "œː˧": 114,
220
+ "uːĭ˧˥": 115,
221
+ "ɐu˧": 116,
222
+ "ɐi˧": 117,
223
+ "ou˧˩̰": 118,
224
+ "aːĭ˥": 119,
225
+ "aːŭ˥": 120,
226
+ "yː˧˥": 121,
227
+ "iːŭ˥": 122,
228
+ "ɔːĭ˨": 123,
229
+ "ʊ˧˥": 124,
230
+ "m̩˥": 125,
231
+ "iːŭ˧˥": 126,
232
+ "ɐi˩˧": 127,
233
+ "ɵy˥": 128,
234
+ "uːĭ˧": 129,
235
+ "ɵy˧˩̰": 130,
236
+ "uːĭ˥": 131,
237
+ "aːŭ˧˩̰": 132,
238
+ "yː˩˧": 133,
239
+ "ɔːĭ˧˩̰": 134,
240
+ "aːŭ˩˧": 135,
241
+ "aːĭ˩˧": 136,
242
+ "uːĭ˨": 137,
243
+ "œː˨": 138,
244
+ "uː˨": 139,
245
+ "ɵy˨": 140,
246
+ "aːŭ˨": 141,
247
+ "m̩˩˧": 142,
248
+ "ŋ˩˧": 143,
249
+ "ɪ˧": 144,
250
+ "m̩˨": 145,
251
+ "iːŭ˩˧": 146,
252
+ "iːŭ˨": 147,
253
+ "ɵ˨": 148,
254
+ "uːĭ˧˩̰": 149,
255
+ "uː˩˧": 150,
256
+ "ɵ˧˩̰": 151
257
  },
258
  "unk_token": "UNK"
259
  }
vocab.json CHANGED
@@ -1 +1 @@
1
- {"UNK":0,"PAD":1,"WORD_BOUNDARY":2,"UTT_BOUNDARY":3}
 
1
+ {"UNK":0,"PAD":1,"WORD_BOUNDARY":2,"UTT_BOUNDARY":3,"aː˧":4,"t":5,"ɐ˥":6,"k":7,"l":8,"j":9,"ʊ˥":10,"aː˧˩̰":11,"ɛː˥":12,"n":13,"ei˩˧":14,"w":15,"aː˨":16,"ɐi˧˥":17,"m̩˧˥":18,"m":19,"ou˥":20,"aː˧˥":21,"ei˥":22,"iː˧":23,"ts":24,"ɔː˧˥":25,"tʰ":26,"iː˥":27,"f":28,"aːĭ˧":29,"ɐ˨":30,"p":31,"h":32,"ɵy˧":33,"aː˥":34,"ou˨":35,"ɔː˧":36,"ɐi˧˩̰":37,"uː˧":38,"ŋ":39,"s":40,"ɔːĭ˥":41,"ɐu˨":42,"iː˨":43,"ei˧˥":44,"ɐi˨":45,"ʊ˧˩̰":46,"ʊ˨":47,"aː˩˧":48,"aːĭ˧˥":49,"ɔː˨":50,"ɛː˩˧":51,"ɪ˨":52,"iːŭ˧":53,"ɛː˧˩̰":54,"ɪ˧˥":55,"̩˧˩̰":56,"ɵ˧˥":57,"ei˧":58,"ɐu˧˩̰":59,"m̩˧":60,"ɐu˧˥":61,"ɐu˩˧":62,"ɐi˥":63,"ɔː˥":64,"ɔːĭ˧":65,"ou˧˥":66,"ou˩˧":67,"ɐ˧":68,"tsʰ":69,"ɛː˧˥":70,"iː˧˥":71,"ɔː˩˧":72,"kʰ":73,"ɐ˧˩̰":74,"aːŭ˧˥":75,"pʰ":76,"aːĭ˧˩̰":77,"ɵy˩˧":78,"ɵ˧":79,"ɛː˧":80,"ei˧˩̰":81,"uː˧˥":82,"ɔː˧˩̰":83,"ɛː˨":84,"uː˥":85,"ʊ˧":86,"iː˧˩̰":87,"yː˨":88,"aːŭ˧":89,"œː˩˧":90,"ɐ˧˥":91,"iː˩˧":92,"ɪ˧˩̰":93,"iːŭ˧˩̰":94,"œː˧˥":95,"yː˧":96,"uːĭ˩˧":97,"ɵy˧˥":98,"yː˧˩̰":99,"ɔːĭ˧˥":100,"ɛː":101,"u˨":102,"ou˧":103,"ei˨":104,"ɐu˥":105,"ɵ˥":106,"uː˧˩̰":107,"yː˥":108,"ɪ˥":109,"œː˥":110,"œː˧˩̰":111,"aːĭ˨":112,"ɐ˩˧":113,"œː˧":114,"uːĭ˧˥":115,"ɐu˧":116,"ɐi˧":117,"ou˧˩̰":118,"aːĭ˥":119,"aːŭ˥":120,"yː˧˥":121,"iːŭ˥":122,"ɔːĭ˨":123,"ʊ˧˥":124,"m̩˥":125,"iːŭ˧˥":126,"ɐi˩˧":127,"ɵy˥":128,"uːĭ˧":129,"ɵy˧˩̰":130,"uːĭ˥":131,"aːŭ˧˩̰":132,"yː˩˧":133,"ɔːĭ˧˩̰":134,"aːŭ˩˧":135,"aːĭ˩˧":136,"uːĭ˨":137,"œː˨":138,"uː˨":139,"ɵy˨":140,"aːŭ˨":141,"m̩˩˧":142,"ŋ˩˧":143,"ɪ˧":144,"m̩˨":145,"iːŭ˩˧":146,"iːŭ˨":147,"ɵ˨":148,"uːĭ˧˩̰":149,"uː˩˧":150,"ɵ˧˩̰":151}