Yehor's picture
Init
ea6a7ed
raw
history blame contribute delete
3.06 kB
"""adapted from https://github.com/keithito/tacotron"""
"""
Defines the set of symbols used in text input to the model.
The default is a set of ASCII characters that works well for English or text
that has been run through Unidecode. For other data, you can modify
_characters."""
arpabet = [
"AA",
"AA0",
"AA1",
"AA2",
"AE",
"AE0",
"AE1",
"AE2",
"AH",
"AH0",
"AH1",
"AH2",
"AO",
"AO0",
"AO1",
"AO2",
"AW",
"AW0",
"AW1",
"AW2",
"AY",
"AY0",
"AY1",
"AY2",
"B",
"CH",
"D",
"DH",
"EH",
"EH0",
"EH1",
"EH2",
"ER",
"ER0",
"ER1",
"ER2",
"EY",
"EY0",
"EY1",
"EY2",
"F",
"G",
"HH",
"IH",
"IH0",
"IH1",
"IH2",
"IY",
"IY0",
"IY1",
"IY2",
"JH",
"K",
"L",
"M",
"N",
"NG",
"OW",
"OW0",
"OW1",
"OW2",
"OY",
"OY0",
"OY1",
"OY2",
"P",
"R",
"S",
"SH",
"T",
"TH",
"UH",
"UH0",
"UH1",
"UH2",
"UW",
"UW0",
"UW1",
"UW2",
"V",
"W",
"Y",
"Z",
"ZH",
]
def get_symbols(symbol_set):
if symbol_set == "english_basic":
_pad = "_"
_punctuation = "!'\"(),.:;? "
_special = "-"
_letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
_arpabet = ["@" + s for s in arpabet]
symbols = list(_pad + _special + _punctuation + _letters) + _arpabet
elif symbol_set == "english_basic_lowercase":
_pad = "_"
_punctuation = "!'\"(),.:;? "
_special = "-"
_letters = "abcdefghijklmnopqrstuvwxyz"
_arpabet = ["@" + s for s in arpabet]
symbols = list(_pad + _special + _punctuation + _letters) + _arpabet
elif symbol_set == "english_expanded":
_punctuation = "!'\",.:;? "
_math = "#%&*+-/[]()"
_special = "_@©°½—₩€$"
_accented = "áçéêëñöøćž"
_letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
_arpabet = ["@" + s for s in arpabet]
symbols = (
list(_punctuation + _math + _special + _accented + _letters) + _arpabet
)
elif symbol_set == "ukrainian":
_punctuation = "'.,?! "
_special = "-+"
_letters = "абвгґдежзийклмнопрстуфхцчшщьюяєії"
symbols = list(_punctuation + _special + _letters)
elif symbol_set == "radtts":
_punctuation = "!'\",.:;? "
_math = "#%&*+-/[]()"
_special = "_@©°½—₩€$"
_accented = "áçéêëñöøćž"
_numbers = "0123456789"
_letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
_arpabet = ["@" + s for s in arpabet]
symbols = (
list(_punctuation + _math + _special + _accented + _numbers + _letters)
+ _arpabet
)
else:
raise Exception("{} symbol set does not exist".format(symbol_set))
return symbols