bendangelo
commited on
generate method can optionally accept a phonetics argument
Browse filesAllows forcing the model so speak a certain way, for example: a bow or take a bow.
kokoro.py
CHANGED
@@ -131,15 +131,20 @@ def forward(model, tokens, ref_s, speed):
|
|
131 |
asr = t_en @ pred_aln_trg.unsqueeze(0).to(device)
|
132 |
return model.decoder(asr, F0_pred, N_pred, ref_s[:, :128]).squeeze().cpu().numpy()
|
133 |
|
134 |
-
def generate(model, text, voicepack, speed=1):
|
135 |
-
|
|
|
|
|
|
|
|
|
136 |
tokens = tokenize(ps)
|
137 |
if not tokens:
|
138 |
return None
|
139 |
elif len(tokens) > 510:
|
140 |
tokens = tokens[:510]
|
141 |
print('Truncated to 510 tokens')
|
|
|
142 |
ref_s = voicepack[len(tokens)]
|
143 |
out = forward(model, tokens, ref_s, speed)
|
144 |
ps = ''.join(next(k for k, v in VOCAB.items() if i == v) for i in tokens)
|
145 |
-
return out, ps
|
|
|
131 |
asr = t_en @ pred_aln_trg.unsqueeze(0).to(device)
|
132 |
return model.decoder(asr, F0_pred, N_pred, ref_s[:, :128]).squeeze().cpu().numpy()
|
133 |
|
134 |
+
def generate(model, text, voicepack, speed=1, phonetics=None):
|
135 |
+
if phonetics is not None:
|
136 |
+
ps = phonetics
|
137 |
+
else:
|
138 |
+
ps = phonemize(text)
|
139 |
+
|
140 |
tokens = tokenize(ps)
|
141 |
if not tokens:
|
142 |
return None
|
143 |
elif len(tokens) > 510:
|
144 |
tokens = tokens[:510]
|
145 |
print('Truncated to 510 tokens')
|
146 |
+
|
147 |
ref_s = voicepack[len(tokens)]
|
148 |
out = forward(model, tokens, ref_s, speed)
|
149 |
ps = ''.join(next(k for k, v in VOCAB.items() if i == v) for i in tokens)
|
150 |
+
return out, ps
|