Text-to-Speech
ONNX
English
bendangelo commited on
Commit
04802cb
·
verified ·
1 Parent(s): 9b10985

generate method can optionally accept a phonetics argument

Browse files

Allows forcing the model so speak a certain way, for example: a bow or take a bow.

Files changed (1) hide show
  1. kokoro.py +8 -3
kokoro.py CHANGED
@@ -131,15 +131,20 @@ def forward(model, tokens, ref_s, speed):
131
  asr = t_en @ pred_aln_trg.unsqueeze(0).to(device)
132
  return model.decoder(asr, F0_pred, N_pred, ref_s[:, :128]).squeeze().cpu().numpy()
133
 
134
- def generate(model, text, voicepack, speed=1):
135
- ps = phonemize(text)
 
 
 
 
136
  tokens = tokenize(ps)
137
  if not tokens:
138
  return None
139
  elif len(tokens) > 510:
140
  tokens = tokens[:510]
141
  print('Truncated to 510 tokens')
 
142
  ref_s = voicepack[len(tokens)]
143
  out = forward(model, tokens, ref_s, speed)
144
  ps = ''.join(next(k for k, v in VOCAB.items() if i == v) for i in tokens)
145
- return out, ps
 
131
  asr = t_en @ pred_aln_trg.unsqueeze(0).to(device)
132
  return model.decoder(asr, F0_pred, N_pred, ref_s[:, :128]).squeeze().cpu().numpy()
133
 
134
+ def generate(model, text, voicepack, speed=1, phonetics=None):
135
+ if phonetics is not None:
136
+ ps = phonetics
137
+ else:
138
+ ps = phonemize(text)
139
+
140
  tokens = tokenize(ps)
141
  if not tokens:
142
  return None
143
  elif len(tokens) > 510:
144
  tokens = tokens[:510]
145
  print('Truncated to 510 tokens')
146
+
147
  ref_s = voicepack[len(tokens)]
148
  out = forward(model, tokens, ref_s, speed)
149
  ps = ''.join(next(k for k, v in VOCAB.items() if i == v) for i in tokens)
150
+ return out, ps