Spaces:
Runtime error
Runtime error
Upload 2 files
Browse files- text/__init__.py +447 -0
- text/symbols.py +71 -0
text/__init__.py
ADDED
@@ -0,0 +1,447 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from text.symbols import symbols
|
2 |
+
|
3 |
+
|
4 |
+
# Mappings from symbol to numeric ID and vice versa:
|
5 |
+
_symbol_to_id = {s: i for i, s in enumerate(symbols)}
|
6 |
+
_id_to_symbol = {i: s for i, s in enumerate(symbols)}
|
7 |
+
|
8 |
+
|
9 |
+
def cleaned_text_to_sequence(cleaned_text):
|
10 |
+
"""Converts a string of text to a sequence of IDs corresponding to the symbols in the text.
|
11 |
+
Args:
|
12 |
+
text: string to convert to a sequence
|
13 |
+
Returns:
|
14 |
+
List of integers corresponding to the symbols in the text
|
15 |
+
"""
|
16 |
+
sequence = [_symbol_to_id[symbol] for symbol in cleaned_text.split()]
|
17 |
+
return sequence
|
18 |
+
|
19 |
+
|
20 |
+
def sequence_to_text(sequence):
|
21 |
+
"""Converts a sequence of IDs back to a string"""
|
22 |
+
result = ""
|
23 |
+
for symbol_id in sequence:
|
24 |
+
s = _id_to_symbol[symbol_id]
|
25 |
+
result += s
|
26 |
+
return result
|
27 |
+
|
28 |
+
|
29 |
+
pinyin_dict = {
|
30 |
+
"a": ("^", "a"),
|
31 |
+
"ai": ("^", "ai"),
|
32 |
+
"an": ("^", "an"),
|
33 |
+
"ang": ("^", "ang"),
|
34 |
+
"ao": ("^", "ao"),
|
35 |
+
"ba": ("b", "a"),
|
36 |
+
"bai": ("b", "ai"),
|
37 |
+
"ban": ("b", "an"),
|
38 |
+
"bang": ("b", "ang"),
|
39 |
+
"bao": ("b", "ao"),
|
40 |
+
"be": ("b", "e"),
|
41 |
+
"bei": ("b", "ei"),
|
42 |
+
"ben": ("b", "en"),
|
43 |
+
"beng": ("b", "eng"),
|
44 |
+
"bi": ("b", "i"),
|
45 |
+
"bian": ("b", "ian"),
|
46 |
+
"biao": ("b", "iao"),
|
47 |
+
"bie": ("b", "ie"),
|
48 |
+
"bin": ("b", "in"),
|
49 |
+
"bing": ("b", "ing"),
|
50 |
+
"bo": ("b", "o"),
|
51 |
+
"bu": ("b", "u"),
|
52 |
+
"ca": ("c", "a"),
|
53 |
+
"cai": ("c", "ai"),
|
54 |
+
"can": ("c", "an"),
|
55 |
+
"cang": ("c", "ang"),
|
56 |
+
"cao": ("c", "ao"),
|
57 |
+
"ce": ("c", "e"),
|
58 |
+
"cen": ("c", "en"),
|
59 |
+
"ceng": ("c", "eng"),
|
60 |
+
"cha": ("ch", "a"),
|
61 |
+
"chai": ("ch", "ai"),
|
62 |
+
"chan": ("ch", "an"),
|
63 |
+
"chang": ("ch", "ang"),
|
64 |
+
"chao": ("ch", "ao"),
|
65 |
+
"che": ("ch", "e"),
|
66 |
+
"chen": ("ch", "en"),
|
67 |
+
"cheng": ("ch", "eng"),
|
68 |
+
"chi": ("ch", "iii"),
|
69 |
+
"chong": ("ch", "ong"),
|
70 |
+
"chou": ("ch", "ou"),
|
71 |
+
"chu": ("ch", "u"),
|
72 |
+
"chua": ("ch", "ua"),
|
73 |
+
"chuai": ("ch", "uai"),
|
74 |
+
"chuan": ("ch", "uan"),
|
75 |
+
"chuang": ("ch", "uang"),
|
76 |
+
"chui": ("ch", "uei"),
|
77 |
+
"chun": ("ch", "uen"),
|
78 |
+
"chuo": ("ch", "uo"),
|
79 |
+
"ci": ("c", "ii"),
|
80 |
+
"cong": ("c", "ong"),
|
81 |
+
"cou": ("c", "ou"),
|
82 |
+
"cu": ("c", "u"),
|
83 |
+
"cuan": ("c", "uan"),
|
84 |
+
"cui": ("c", "uei"),
|
85 |
+
"cun": ("c", "uen"),
|
86 |
+
"cuo": ("c", "uo"),
|
87 |
+
"da": ("d", "a"),
|
88 |
+
"dai": ("d", "ai"),
|
89 |
+
"dan": ("d", "an"),
|
90 |
+
"dang": ("d", "ang"),
|
91 |
+
"dao": ("d", "ao"),
|
92 |
+
"de": ("d", "e"),
|
93 |
+
"dei": ("d", "ei"),
|
94 |
+
"den": ("d", "en"),
|
95 |
+
"deng": ("d", "eng"),
|
96 |
+
"di": ("d", "i"),
|
97 |
+
"dia": ("d", "ia"),
|
98 |
+
"dian": ("d", "ian"),
|
99 |
+
"diao": ("d", "iao"),
|
100 |
+
"die": ("d", "ie"),
|
101 |
+
"ding": ("d", "ing"),
|
102 |
+
"diu": ("d", "iou"),
|
103 |
+
"dong": ("d", "ong"),
|
104 |
+
"dou": ("d", "ou"),
|
105 |
+
"du": ("d", "u"),
|
106 |
+
"duan": ("d", "uan"),
|
107 |
+
"dui": ("d", "uei"),
|
108 |
+
"dun": ("d", "uen"),
|
109 |
+
"duo": ("d", "uo"),
|
110 |
+
"e": ("^", "e"),
|
111 |
+
"ei": ("^", "ei"),
|
112 |
+
"en": ("^", "en"),
|
113 |
+
"ng": ("^", "en"),
|
114 |
+
"eng": ("^", "eng"),
|
115 |
+
"er": ("^", "er"),
|
116 |
+
"fa": ("f", "a"),
|
117 |
+
"fan": ("f", "an"),
|
118 |
+
"fang": ("f", "ang"),
|
119 |
+
"fei": ("f", "ei"),
|
120 |
+
"fen": ("f", "en"),
|
121 |
+
"feng": ("f", "eng"),
|
122 |
+
"fo": ("f", "o"),
|
123 |
+
"fou": ("f", "ou"),
|
124 |
+
"fu": ("f", "u"),
|
125 |
+
"ga": ("g", "a"),
|
126 |
+
"gai": ("g", "ai"),
|
127 |
+
"gan": ("g", "an"),
|
128 |
+
"gang": ("g", "ang"),
|
129 |
+
"gao": ("g", "ao"),
|
130 |
+
"ge": ("g", "e"),
|
131 |
+
"gei": ("g", "ei"),
|
132 |
+
"gen": ("g", "en"),
|
133 |
+
"geng": ("g", "eng"),
|
134 |
+
"gong": ("g", "ong"),
|
135 |
+
"gou": ("g", "ou"),
|
136 |
+
"gu": ("g", "u"),
|
137 |
+
"gua": ("g", "ua"),
|
138 |
+
"guai": ("g", "uai"),
|
139 |
+
"guan": ("g", "uan"),
|
140 |
+
"guang": ("g", "uang"),
|
141 |
+
"gui": ("g", "uei"),
|
142 |
+
"gun": ("g", "uen"),
|
143 |
+
"guo": ("g", "uo"),
|
144 |
+
"ha": ("h", "a"),
|
145 |
+
"hai": ("h", "ai"),
|
146 |
+
"han": ("h", "an"),
|
147 |
+
"hang": ("h", "ang"),
|
148 |
+
"hao": ("h", "ao"),
|
149 |
+
"he": ("h", "e"),
|
150 |
+
"hei": ("h", "ei"),
|
151 |
+
"hen": ("h", "en"),
|
152 |
+
"heng": ("h", "eng"),
|
153 |
+
"hong": ("h", "ong"),
|
154 |
+
"hou": ("h", "ou"),
|
155 |
+
"hu": ("h", "u"),
|
156 |
+
"hua": ("h", "ua"),
|
157 |
+
"huai": ("h", "uai"),
|
158 |
+
"huan": ("h", "uan"),
|
159 |
+
"huang": ("h", "uang"),
|
160 |
+
"hui": ("h", "uei"),
|
161 |
+
"hun": ("h", "uen"),
|
162 |
+
"huo": ("h", "uo"),
|
163 |
+
"ji": ("j", "i"),
|
164 |
+
"jia": ("j", "ia"),
|
165 |
+
"jian": ("j", "ian"),
|
166 |
+
"jiang": ("j", "iang"),
|
167 |
+
"jiao": ("j", "iao"),
|
168 |
+
"jie": ("j", "ie"),
|
169 |
+
"jin": ("j", "in"),
|
170 |
+
"jing": ("j", "ing"),
|
171 |
+
"jiong": ("j", "iong"),
|
172 |
+
"jiu": ("j", "iou"),
|
173 |
+
"ju": ("j", "v"),
|
174 |
+
"juan": ("j", "van"),
|
175 |
+
"jue": ("j", "ve"),
|
176 |
+
"jun": ("j", "vn"),
|
177 |
+
"ka": ("k", "a"),
|
178 |
+
"kai": ("k", "ai"),
|
179 |
+
"kan": ("k", "an"),
|
180 |
+
"kang": ("k", "ang"),
|
181 |
+
"kao": ("k", "ao"),
|
182 |
+
"ke": ("k", "e"),
|
183 |
+
"kei": ("k", "ei"),
|
184 |
+
"ken": ("k", "en"),
|
185 |
+
"keng": ("k", "eng"),
|
186 |
+
"kong": ("k", "ong"),
|
187 |
+
"kou": ("k", "ou"),
|
188 |
+
"ku": ("k", "u"),
|
189 |
+
"kua": ("k", "ua"),
|
190 |
+
"kuai": ("k", "uai"),
|
191 |
+
"kuan": ("k", "uan"),
|
192 |
+
"kuang": ("k", "uang"),
|
193 |
+
"kui": ("k", "uei"),
|
194 |
+
"kun": ("k", "uen"),
|
195 |
+
"kuo": ("k", "uo"),
|
196 |
+
"la": ("l", "a"),
|
197 |
+
"lai": ("l", "ai"),
|
198 |
+
"lan": ("l", "an"),
|
199 |
+
"lang": ("l", "ang"),
|
200 |
+
"lao": ("l", "ao"),
|
201 |
+
"le": ("l", "e"),
|
202 |
+
"lei": ("l", "ei"),
|
203 |
+
"leng": ("l", "eng"),
|
204 |
+
"li": ("l", "i"),
|
205 |
+
"lia": ("l", "ia"),
|
206 |
+
"lian": ("l", "ian"),
|
207 |
+
"liang": ("l", "iang"),
|
208 |
+
"liao": ("l", "iao"),
|
209 |
+
"lie": ("l", "ie"),
|
210 |
+
"lin": ("l", "in"),
|
211 |
+
"ling": ("l", "ing"),
|
212 |
+
"liu": ("l", "iou"),
|
213 |
+
"lo": ("l", "o"),
|
214 |
+
"long": ("l", "ong"),
|
215 |
+
"lou": ("l", "ou"),
|
216 |
+
"lu": ("l", "u"),
|
217 |
+
"lv": ("l", "v"),
|
218 |
+
"luan": ("l", "uan"),
|
219 |
+
"lve": ("l", "ve"),
|
220 |
+
"lue": ("l", "ve"),
|
221 |
+
"lun": ("l", "uen"),
|
222 |
+
"luo": ("l", "uo"),
|
223 |
+
"ma": ("m", "a"),
|
224 |
+
"mai": ("m", "ai"),
|
225 |
+
"man": ("m", "an"),
|
226 |
+
"mang": ("m", "ang"),
|
227 |
+
"mao": ("m", "ao"),
|
228 |
+
"me": ("m", "e"),
|
229 |
+
"mei": ("m", "ei"),
|
230 |
+
"men": ("m", "en"),
|
231 |
+
"meng": ("m", "eng"),
|
232 |
+
"mi": ("m", "i"),
|
233 |
+
"mian": ("m", "ian"),
|
234 |
+
"miao": ("m", "iao"),
|
235 |
+
"mie": ("m", "ie"),
|
236 |
+
"min": ("m", "in"),
|
237 |
+
"ming": ("m", "ing"),
|
238 |
+
"miu": ("m", "iou"),
|
239 |
+
"mo": ("m", "o"),
|
240 |
+
"mou": ("m", "ou"),
|
241 |
+
"mu": ("m", "u"),
|
242 |
+
"na": ("n", "a"),
|
243 |
+
"nai": ("n", "ai"),
|
244 |
+
"nan": ("n", "an"),
|
245 |
+
"nang": ("n", "ang"),
|
246 |
+
"nao": ("n", "ao"),
|
247 |
+
"ne": ("n", "e"),
|
248 |
+
"nei": ("n", "ei"),
|
249 |
+
"nen": ("n", "en"),
|
250 |
+
"neng": ("n", "eng"),
|
251 |
+
"ni": ("n", "i"),
|
252 |
+
"nia": ("n", "ia"),
|
253 |
+
"nian": ("n", "ian"),
|
254 |
+
"niang": ("n", "iang"),
|
255 |
+
"niao": ("n", "iao"),
|
256 |
+
"nie": ("n", "ie"),
|
257 |
+
"nin": ("n", "in"),
|
258 |
+
"ning": ("n", "ing"),
|
259 |
+
"niu": ("n", "iou"),
|
260 |
+
"nong": ("n", "ong"),
|
261 |
+
"nou": ("n", "ou"),
|
262 |
+
"nu": ("n", "u"),
|
263 |
+
"nv": ("n", "v"),
|
264 |
+
"nuan": ("n", "uan"),
|
265 |
+
"nve": ("n", "ve"),
|
266 |
+
"nue": ("n", "ve"),
|
267 |
+
"nuo": ("n", "uo"),
|
268 |
+
"o": ("^", "o"),
|
269 |
+
"ou": ("^", "ou"),
|
270 |
+
"pa": ("p", "a"),
|
271 |
+
"pai": ("p", "ai"),
|
272 |
+
"pan": ("p", "an"),
|
273 |
+
"pang": ("p", "ang"),
|
274 |
+
"pao": ("p", "ao"),
|
275 |
+
"pe": ("p", "e"),
|
276 |
+
"pei": ("p", "ei"),
|
277 |
+
"pen": ("p", "en"),
|
278 |
+
"peng": ("p", "eng"),
|
279 |
+
"pi": ("p", "i"),
|
280 |
+
"pian": ("p", "ian"),
|
281 |
+
"piao": ("p", "iao"),
|
282 |
+
"pie": ("p", "ie"),
|
283 |
+
"pin": ("p", "in"),
|
284 |
+
"ping": ("p", "ing"),
|
285 |
+
"po": ("p", "o"),
|
286 |
+
"pou": ("p", "ou"),
|
287 |
+
"pu": ("p", "u"),
|
288 |
+
"qi": ("q", "i"),
|
289 |
+
"qia": ("q", "ia"),
|
290 |
+
"qian": ("q", "ian"),
|
291 |
+
"qiang": ("q", "iang"),
|
292 |
+
"qiao": ("q", "iao"),
|
293 |
+
"qie": ("q", "ie"),
|
294 |
+
"qin": ("q", "in"),
|
295 |
+
"qing": ("q", "ing"),
|
296 |
+
"qiong": ("q", "iong"),
|
297 |
+
"qiu": ("q", "iou"),
|
298 |
+
"qu": ("q", "v"),
|
299 |
+
"quan": ("q", "van"),
|
300 |
+
"que": ("q", "ve"),
|
301 |
+
"qun": ("q", "vn"),
|
302 |
+
"ran": ("r", "an"),
|
303 |
+
"rang": ("r", "ang"),
|
304 |
+
"rao": ("r", "ao"),
|
305 |
+
"re": ("r", "e"),
|
306 |
+
"ren": ("r", "en"),
|
307 |
+
"reng": ("r", "eng"),
|
308 |
+
"ri": ("r", "iii"),
|
309 |
+
"rong": ("r", "ong"),
|
310 |
+
"rou": ("r", "ou"),
|
311 |
+
"ru": ("r", "u"),
|
312 |
+
"rua": ("r", "ua"),
|
313 |
+
"ruan": ("r", "uan"),
|
314 |
+
"rui": ("r", "uei"),
|
315 |
+
"run": ("r", "uen"),
|
316 |
+
"ruo": ("r", "uo"),
|
317 |
+
"sa": ("s", "a"),
|
318 |
+
"sai": ("s", "ai"),
|
319 |
+
"san": ("s", "an"),
|
320 |
+
"sang": ("s", "ang"),
|
321 |
+
"sao": ("s", "ao"),
|
322 |
+
"se": ("s", "e"),
|
323 |
+
"sen": ("s", "en"),
|
324 |
+
"seng": ("s", "eng"),
|
325 |
+
"sha": ("sh", "a"),
|
326 |
+
"shai": ("sh", "ai"),
|
327 |
+
"shan": ("sh", "an"),
|
328 |
+
"shang": ("sh", "ang"),
|
329 |
+
"shao": ("sh", "ao"),
|
330 |
+
"she": ("sh", "e"),
|
331 |
+
"shei": ("sh", "ei"),
|
332 |
+
"shen": ("sh", "en"),
|
333 |
+
"sheng": ("sh", "eng"),
|
334 |
+
"shi": ("sh", "iii"),
|
335 |
+
"shou": ("sh", "ou"),
|
336 |
+
"shu": ("sh", "u"),
|
337 |
+
"shua": ("sh", "ua"),
|
338 |
+
"shuai": ("sh", "uai"),
|
339 |
+
"shuan": ("sh", "uan"),
|
340 |
+
"shuang": ("sh", "uang"),
|
341 |
+
"shui": ("sh", "uei"),
|
342 |
+
"shun": ("sh", "uen"),
|
343 |
+
"shuo": ("sh", "uo"),
|
344 |
+
"si": ("s", "ii"),
|
345 |
+
"song": ("s", "ong"),
|
346 |
+
"sou": ("s", "ou"),
|
347 |
+
"su": ("s", "u"),
|
348 |
+
"suan": ("s", "uan"),
|
349 |
+
"sui": ("s", "uei"),
|
350 |
+
"sun": ("s", "uen"),
|
351 |
+
"suo": ("s", "uo"),
|
352 |
+
"ta": ("t", "a"),
|
353 |
+
"tai": ("t", "ai"),
|
354 |
+
"tan": ("t", "an"),
|
355 |
+
"tang": ("t", "ang"),
|
356 |
+
"tao": ("t", "ao"),
|
357 |
+
"te": ("t", "e"),
|
358 |
+
"tei": ("t", "ei"),
|
359 |
+
"teng": ("t", "eng"),
|
360 |
+
"ti": ("t", "i"),
|
361 |
+
"tian": ("t", "ian"),
|
362 |
+
"tiao": ("t", "iao"),
|
363 |
+
"tie": ("t", "ie"),
|
364 |
+
"ting": ("t", "ing"),
|
365 |
+
"tong": ("t", "ong"),
|
366 |
+
"tou": ("t", "ou"),
|
367 |
+
"tu": ("t", "u"),
|
368 |
+
"tuan": ("t", "uan"),
|
369 |
+
"tui": ("t", "uei"),
|
370 |
+
"tun": ("t", "uen"),
|
371 |
+
"tuo": ("t", "uo"),
|
372 |
+
"wa": ("^", "ua"),
|
373 |
+
"wai": ("^", "uai"),
|
374 |
+
"wan": ("^", "uan"),
|
375 |
+
"wang": ("^", "uang"),
|
376 |
+
"wei": ("^", "uei"),
|
377 |
+
"wen": ("^", "uen"),
|
378 |
+
"weng": ("^", "ueng"),
|
379 |
+
"wo": ("^", "uo"),
|
380 |
+
"wu": ("^", "u"),
|
381 |
+
"xi": ("x", "i"),
|
382 |
+
"xia": ("x", "ia"),
|
383 |
+
"xian": ("x", "ian"),
|
384 |
+
"xiang": ("x", "iang"),
|
385 |
+
"xiao": ("x", "iao"),
|
386 |
+
"xie": ("x", "ie"),
|
387 |
+
"xin": ("x", "in"),
|
388 |
+
"xing": ("x", "ing"),
|
389 |
+
"xiong": ("x", "iong"),
|
390 |
+
"xiu": ("x", "iou"),
|
391 |
+
"xu": ("x", "v"),
|
392 |
+
"xuan": ("x", "van"),
|
393 |
+
"xue": ("x", "ve"),
|
394 |
+
"xun": ("x", "vn"),
|
395 |
+
"ya": ("^", "ia"),
|
396 |
+
"yan": ("^", "ian"),
|
397 |
+
"yang": ("^", "iang"),
|
398 |
+
"yao": ("^", "iao"),
|
399 |
+
"ye": ("^", "ie"),
|
400 |
+
"yi": ("^", "i"),
|
401 |
+
"yin": ("^", "in"),
|
402 |
+
"ying": ("^", "ing"),
|
403 |
+
"yo": ("^", "iou"),
|
404 |
+
"yong": ("^", "iong"),
|
405 |
+
"you": ("^", "iou"),
|
406 |
+
"yu": ("^", "v"),
|
407 |
+
"yuan": ("^", "van"),
|
408 |
+
"yue": ("^", "ve"),
|
409 |
+
"yun": ("^", "vn"),
|
410 |
+
"za": ("z", "a"),
|
411 |
+
"zai": ("z", "ai"),
|
412 |
+
"zan": ("z", "an"),
|
413 |
+
"zang": ("z", "ang"),
|
414 |
+
"zao": ("z", "ao"),
|
415 |
+
"ze": ("z", "e"),
|
416 |
+
"zei": ("z", "ei"),
|
417 |
+
"zen": ("z", "en"),
|
418 |
+
"zeng": ("z", "eng"),
|
419 |
+
"zha": ("zh", "a"),
|
420 |
+
"zhai": ("zh", "ai"),
|
421 |
+
"zhan": ("zh", "an"),
|
422 |
+
"zhang": ("zh", "ang"),
|
423 |
+
"zhao": ("zh", "ao"),
|
424 |
+
"zhe": ("zh", "e"),
|
425 |
+
"zhei": ("zh", "ei"),
|
426 |
+
"zhen": ("zh", "en"),
|
427 |
+
"zheng": ("zh", "eng"),
|
428 |
+
"zhi": ("zh", "iii"),
|
429 |
+
"zhong": ("zh", "ong"),
|
430 |
+
"zhou": ("zh", "ou"),
|
431 |
+
"zhu": ("zh", "u"),
|
432 |
+
"zhua": ("zh", "ua"),
|
433 |
+
"zhuai": ("zh", "uai"),
|
434 |
+
"zhuan": ("zh", "uan"),
|
435 |
+
"zhuang": ("zh", "uang"),
|
436 |
+
"zhui": ("zh", "uei"),
|
437 |
+
"zhun": ("zh", "uen"),
|
438 |
+
"zhuo": ("zh", "uo"),
|
439 |
+
"zi": ("z", "ii"),
|
440 |
+
"zong": ("z", "ong"),
|
441 |
+
"zou": ("z", "ou"),
|
442 |
+
"zu": ("z", "u"),
|
443 |
+
"zuan": ("z", "uan"),
|
444 |
+
"zui": ("z", "uei"),
|
445 |
+
"zun": ("z", "uen"),
|
446 |
+
"zuo": ("z", "uo"),
|
447 |
+
}
|
text/symbols.py
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_pause = ["sil", "eos", "sp", "#0", "#1", "#2", "#3"]
|
2 |
+
|
3 |
+
_initials = [
|
4 |
+
"^",
|
5 |
+
"b",
|
6 |
+
"c",
|
7 |
+
"ch",
|
8 |
+
"d",
|
9 |
+
"f",
|
10 |
+
"g",
|
11 |
+
"h",
|
12 |
+
"j",
|
13 |
+
"k",
|
14 |
+
"l",
|
15 |
+
"m",
|
16 |
+
"n",
|
17 |
+
"p",
|
18 |
+
"q",
|
19 |
+
"r",
|
20 |
+
"s",
|
21 |
+
"sh",
|
22 |
+
"t",
|
23 |
+
"x",
|
24 |
+
"z",
|
25 |
+
"zh",
|
26 |
+
]
|
27 |
+
|
28 |
+
_tones = ["1", "2", "3", "4", "5"]
|
29 |
+
|
30 |
+
_finals = [
|
31 |
+
"a",
|
32 |
+
"ai",
|
33 |
+
"an",
|
34 |
+
"ang",
|
35 |
+
"ao",
|
36 |
+
"e",
|
37 |
+
"ei",
|
38 |
+
"en",
|
39 |
+
"eng",
|
40 |
+
"er",
|
41 |
+
"i",
|
42 |
+
"ia",
|
43 |
+
"ian",
|
44 |
+
"iang",
|
45 |
+
"iao",
|
46 |
+
"ie",
|
47 |
+
"ii",
|
48 |
+
"iii",
|
49 |
+
"in",
|
50 |
+
"ing",
|
51 |
+
"iong",
|
52 |
+
"iou",
|
53 |
+
"o",
|
54 |
+
"ong",
|
55 |
+
"ou",
|
56 |
+
"u",
|
57 |
+
"ua",
|
58 |
+
"uai",
|
59 |
+
"uan",
|
60 |
+
"uang",
|
61 |
+
"uei",
|
62 |
+
"uen",
|
63 |
+
"ueng",
|
64 |
+
"uo",
|
65 |
+
"v",
|
66 |
+
"van",
|
67 |
+
"ve",
|
68 |
+
"vn",
|
69 |
+
]
|
70 |
+
|
71 |
+
symbols = _pause + _initials + [i + j for i in _finals for j in _tones]
|