Pendrokar commited on
Commit
b2e60d3
·
1 Parent(s): 5622bbe

ipa brace support

Browse files
Files changed (1) hide show
  1. styletts2importable.py +18 -0
styletts2importable.py CHANGED
@@ -38,6 +38,7 @@ from nltk.tokenize import word_tokenize
38
  from models import *
39
  from utils import *
40
  from text_utils import TextCleaner
 
41
  textclenaer = TextCleaner()
42
 
43
 
@@ -136,9 +137,26 @@ sampler = DiffusionSampler(
136
 
137
  def inference(text, ref_s, alpha = 0.3, beta = 0.7, diffusion_steps=5, embedding_scale=1, use_gruut=False):
138
  text = text.strip()
 
 
 
 
 
 
 
 
 
 
 
139
  ps = global_phonemizer.phonemize([text])
140
  ps = word_tokenize(ps[0])
141
  ps = ' '.join(ps)
 
 
 
 
 
 
142
  tokens = textclenaer(ps)
143
  tokens.insert(0, 0)
144
  tokens = torch.LongTensor(tokens).to(device).unsqueeze(0)
 
38
  from models import *
39
  from utils import *
40
  from text_utils import TextCleaner
41
+ import re
42
  textclenaer = TextCleaner()
43
 
44
 
 
137
 
138
  def inference(text, ref_s, alpha = 0.3, beta = 0.7, diffusion_steps=5, embedding_scale=1, use_gruut=False):
139
  text = text.strip()
140
+
141
+ # search for IPA within []
142
+ regex = r"\[[^\]]*\]"
143
+ # remove all non-IPA sections
144
+ text = text.replace('[]', '')
145
+ ipa_sections = re.findall(regex, text)
146
+
147
+ # replace IPA sections with []
148
+ if (ipa_sections is not None):
149
+ text = re.sub(regex, '[]', text, 0, re.MULTILINE)
150
+
151
  ps = global_phonemizer.phonemize([text])
152
  ps = word_tokenize(ps[0])
153
  ps = ' '.join(ps)
154
+
155
+ # add the IPA back
156
+ if (ipa_sections is not None):
157
+ for ipa in ipa_sections:
158
+ ps = ps.replace('[ ]', ipa, 1)
159
+
160
  tokens = textclenaer(ps)
161
  tokens.insert(0, 0)
162
  tokens = torch.LongTensor(tokens).to(device).unsqueeze(0)