hexgrad commited on
Commit
0662719
·
verified ·
1 Parent(s): 24f33ad

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -0
app.py CHANGED
@@ -81,6 +81,8 @@ def normalize(text):
81
  text = re.sub(r'(?<=\d),(?=\d)', '', text)
82
  text = re.sub(r'(?<=\d)-(?=\d)', ' to ', text) # TODO: could be minus
83
  text = re.sub(r'(?<=\d):(?=\d)', ' ', text)
 
 
84
  return parens_to_angles(text).strip()
85
 
86
  phonemizers = dict(
@@ -101,6 +103,8 @@ def phonemize(text, voice, norm=True):
101
  if lang in 'ab':
102
  ps = ps.replace('kəkˈoːɹoʊ', 'kˈoʊkəɹoʊ').replace('kəkˈɔːɹəʊ', 'kˈəʊkəɹəʊ')
103
  ps = ps.replace('ʲ', 'j').replace('r', 'ɹ').replace('x', 'k')
 
 
104
  ps = ''.join(filter(lambda p: p in VOCAB, ps))
105
  if lang == 'j' and any(p in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' for p in ps):
106
  gr.Warning('Japanese tokenizer does not handle English letters.')
 
81
  text = re.sub(r'(?<=\d),(?=\d)', '', text)
82
  text = re.sub(r'(?<=\d)-(?=\d)', ' to ', text) # TODO: could be minus
83
  text = re.sub(r'(?<=\d):(?=\d)', ' ', text)
84
+ text = re.sub(r'(?<=\d)S', ' S', text)
85
+ text = re.sub(r"(?<=[A-Z])'?s", lambda m: m.group().upper(), text)
86
  return parens_to_angles(text).strip()
87
 
88
  phonemizers = dict(
 
103
  if lang in 'ab':
104
  ps = ps.replace('kəkˈoːɹoʊ', 'kˈoʊkəɹoʊ').replace('kəkˈɔːɹəʊ', 'kˈəʊkəɹəʊ')
105
  ps = ps.replace('ʲ', 'j').replace('r', 'ɹ').replace('x', 'k')
106
+ ps = ps.replace(' z', 'z')
107
+ ps = re.sub(r'(wˈʌn|tˈuː|θɹˈiː|fˈoːɹ|fˈaɪv|sˈɪks|sˈɛvən|ˈeɪt|nˈaɪn)(hˈʌndɹɪd)', r'\1 \2', ps)
108
  ps = ''.join(filter(lambda p: p in VOCAB, ps))
109
  if lang == 'j' and any(p in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' for p in ps):
110
  gr.Warning('Japanese tokenizer does not handle English letters.')