"""adapted from https://github.com/keithito/tacotron""" """ Defines the set of symbols used in text input to the model. The default is a set of ASCII characters that works well for English or text that has been run through Unidecode. For other data, you can modify _characters.""" arpabet = [ "AA", "AA0", "AA1", "AA2", "AE", "AE0", "AE1", "AE2", "AH", "AH0", "AH1", "AH2", "AO", "AO0", "AO1", "AO2", "AW", "AW0", "AW1", "AW2", "AY", "AY0", "AY1", "AY2", "B", "CH", "D", "DH", "EH", "EH0", "EH1", "EH2", "ER", "ER0", "ER1", "ER2", "EY", "EY0", "EY1", "EY2", "F", "G", "HH", "IH", "IH0", "IH1", "IH2", "IY", "IY0", "IY1", "IY2", "JH", "K", "L", "M", "N", "NG", "OW", "OW0", "OW1", "OW2", "OY", "OY0", "OY1", "OY2", "P", "R", "S", "SH", "T", "TH", "UH", "UH0", "UH1", "UH2", "UW", "UW0", "UW1", "UW2", "V", "W", "Y", "Z", "ZH", ] def get_symbols(symbol_set): if symbol_set == "english_basic": _pad = "_" _punctuation = "!'\"(),.:;? " _special = "-" _letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" _arpabet = ["@" + s for s in arpabet] symbols = list(_pad + _special + _punctuation + _letters) + _arpabet elif symbol_set == "english_basic_lowercase": _pad = "_" _punctuation = "!'\"(),.:;? " _special = "-" _letters = "abcdefghijklmnopqrstuvwxyz" _arpabet = ["@" + s for s in arpabet] symbols = list(_pad + _special + _punctuation + _letters) + _arpabet elif symbol_set == "english_expanded": _punctuation = "!'\",.:;? " _math = "#%&*+-/[]()" _special = "_@©°½—₩€$" _accented = "áçéêëñöøćž" _letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" _arpabet = ["@" + s for s in arpabet] symbols = ( list(_punctuation + _math + _special + _accented + _letters) + _arpabet ) elif symbol_set == "ukrainian": _punctuation = "'.,?! " _special = "-+" _letters = "абвгґдежзийклмнопрстуфхцчшщьюяєії" symbols = list(_punctuation + _special + _letters) elif symbol_set == "radtts": _punctuation = "!'\",.:;? " _math = "#%&*+-/[]()" _special = "_@©°½—₩€$" _accented = "áçéêëñöøćž" _numbers = "0123456789" _letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" _arpabet = ["@" + s for s in arpabet] symbols = ( list(_punctuation + _math + _special + _accented + _numbers + _letters) + _arpabet ) else: raise Exception("{} symbol set does not exist".format(symbol_set)) return symbols