Spaces:

patriotyk
/

styletts2-ukrainian

Serhiy Stetskovych commited on 5 days ago

Commit

81cbc69

•

1 Parent(s): 95f2409

Better accents, and refactor

Files changed (10) hide show

.gitattributes CHANGED Viewed

@@ -37,3 +37,7 @@ Utils/JDC/bst.t7 filter=lfs diff=lfs merge=lfs -text
 Utils/PLBERT/step_410000.t7 filter=lfs diff=lfs merge=lfs -text
 epoch_2nd_00027_filatov_whisp_cont.pth filter=lfs diff=lfs merge=lfs -text
 Utils/ASR/epoch_00100.pth filter=lfs diff=lfs merge=lfs -text

 Utils/PLBERT/step_410000.t7 filter=lfs diff=lfs merge=lfs -text
 epoch_2nd_00027_filatov_whisp_cont.pth filter=lfs diff=lfs merge=lfs -text
 Utils/ASR/epoch_00100.pth filter=lfs diff=lfs merge=lfs -text
+weights/plbert.bin filter=lfs diff=lfs merge=lfs -text
+weights/asr.bin filter=lfs diff=lfs merge=lfs -text
+weights/filatov.bin filter=lfs diff=lfs merge=lfs -text
+weights/jdc.bin filter=lfs diff=lfs merge=lfs -text

Utils/PLBERT/util.py CHANGED Viewed

@@ -12,31 +12,13 @@ class CustomAlbert(AlbertModel):
         return outputs.last_hidden_state
-def load_plbert(log_dir):
-    config_path = os.path.join(log_dir, "config.yml")
     plbert_config = yaml.safe_load(open(config_path))
     albert_base_configuration = AlbertConfig(**plbert_config['model_params'])
     bert = CustomAlbert(albert_base_configuration)
-    files = os.listdir(log_dir)
-    ckpts = []
-    for f in os.listdir(log_dir):
-        if f.startswith("step_"): ckpts.append(f)
-    iters = [int(f.split('_')[-1].split('.')[0]) for f in ckpts if os.path.isfile(os.path.join(log_dir, f))]
-    iters = sorted(iters)[-1]
-    checkpoint = torch.load(log_dir + "/step_" + str(iters) + ".t7", map_location='cpu')
-    state_dict = checkpoint['net']
-    from collections import OrderedDict
-    new_state_dict = OrderedDict()
-    for k, v in state_dict.items():
-        name = k[7:] # remove `module.`
-        if name.startswith('encoder.'):
-            name = name[8:] # remove `encoder.`
-            new_state_dict[name] = v
-    #del new_state_dict["embeddings.position_ids"]
-    bert.load_state_dict(new_state_dict, strict=False)
     return bert

         return outputs.last_hidden_state
+def load_plbert(wights_path, config_path):
     plbert_config = yaml.safe_load(open(config_path))
     albert_base_configuration = AlbertConfig(**plbert_config['model_params'])
     bert = CustomAlbert(albert_base_configuration)
+    state_dict = torch.load(wights_path, map_location='cpu')
+    bert.load_state_dict(state_dict, strict=False)
     return bert

infer.py CHANGED Viewed

@@ -20,7 +20,7 @@ import torchaudio
 from ipa_uk import ipa
 from unicodedata import normalize
 from ukrainian_word_stress import Stressifier, StressSymbol
-stressify = Stressifier(stress_symbol=StressSymbol.CombiningAcuteAccent)
@@ -56,15 +56,15 @@ pitch_extractor = load_F0_models(F0_path)
 # load BERT model
 from Utils.PLBERT.util import load_plbert
-BERT_path = config.get('PLBERT_dir', False)
-plbert = load_plbert(BERT_path)
 model = build_model(recursive_munch(config['model_params']), text_aligner, pitch_extractor, plbert)
 _ = [model[key].eval() for key in model]
 _ = [model[key].to(device) for key in model]
-params_whole = torch.load('epoch_2nd_00027_filatov_whisp_cont.pth', map_location='cpu')
-params = params_whole['net']
 for key in model:
     if key in params:
@@ -110,14 +110,17 @@ def split_to_parts(text):
 def _inf(text, speed, s_prev, noise, alpha, diffusion_steps, embedding_scale):
     text = text.strip()
     text = text.replace('"', '')
-    text = text.replace('+', 'ˈ')
     text = normalize('NFKC', text)
     text = re.sub(r'[᠆‐‑‒–—―⁻₋−⸺⸻]', '-', text)
     text = re.sub(r' - ', ': ', text)
-    ps = ipa(stressify(text))
-    #ps = text
-    print(ps)
     tokens = textclenaer(ps)
     tokens.insert(0, 0)

 from ipa_uk import ipa
 from unicodedata import normalize
 from ukrainian_word_stress import Stressifier, StressSymbol
+stressify = Stressifier()
 # load BERT model
 from Utils.PLBERT.util import load_plbert
+plbert = load_plbert('weights/plbert.bin', 'Utils/PLBERT/config.yml')
 model = build_model(recursive_munch(config['model_params']), text_aligner, pitch_extractor, plbert)
 _ = [model[key].eval() for key in model]
 _ = [model[key].to(device) for key in model]
+params = torch.load('weights/filatov.bin', map_location='cpu')
 for key in model:
     if key in params:
 def _inf(text, speed, s_prev, noise, alpha, diffusion_steps, embedding_scale):
     text = text.strip()
     text = text.replace('"', '')
+    text = text.replace('+', '\u0301')
     text = normalize('NFKC', text)
     text = re.sub(r'[᠆‐‑‒–—―⁻₋−⸺⸻]', '-', text)
     text = re.sub(r' - ', ': ', text)
+    stressed = stressify(text)
+    ps = ipa(stressed)
+    print(stressed)
     tokens = textclenaer(ps)
     tokens.insert(0, 0)

models.py CHANGED Viewed

@@ -585,7 +585,7 @@ def load_F0_models(path):
     # load F0 model
     F0_model = JDCNet(num_class=1, seq_len=192)
-    params = torch.load(path, map_location='cpu')['net']
     F0_model.load_state_dict(params)
     _ = F0_model.train()
@@ -601,7 +601,7 @@ def load_ASR_models(ASR_MODEL_PATH, ASR_MODEL_CONFIG):
     def _load_model(model_config, model_path):
         model = ASRCNN(**model_config)
-        params = torch.load(model_path, map_location='cpu')['model']
         model.load_state_dict(params)
         return model

     # load F0 model
     F0_model = JDCNet(num_class=1, seq_len=192)
+    params = torch.load(path, map_location='cpu')
     F0_model.load_state_dict(params)
     _ = F0_model.train()
     def _load_model(model_config, model_path):
         model = ASRCNN(**model_config)
+        params = torch.load(model_path, map_location='cpu')
         model.load_state_dict(params)
         return model

requirements.txt CHANGED Viewed

@@ -13,7 +13,7 @@ einops
 einops_exts
 txtsplit
 transformers
-ukrainian-word-stress
 git+https://github.com/patriotyk/ipa-uk.git
 spaces
 numpy<2

 einops_exts
 txtsplit
 transformers
+git+https://github.com/patriotyk/ukrainian-word-stress.git
 git+https://github.com/patriotyk/ipa-uk.git
 spaces
 numpy<2

styletts_config.yml CHANGED Viewed

@@ -1,7 +1,7 @@
-F0_path: "Utils/JDC/bst.t7"
 ASR_config: "Utils/ASR/config.yml"
-ASR_path: "Utils/ASR/epoch_00100.pth"
 PLBERT_dir: 'Utils/PLBERT/'

+F0_path: "weights/jdc.bin"
 ASR_config: "Utils/ASR/config.yml"
+ASR_path: "weights/asr.bin"
 PLBERT_dir: 'Utils/PLBERT/'

Utils/ASR/epoch_00100.pth → weights/asr.bin RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6b29394cfa9560a368c9dc67cfbc1f748b0b9c8565e40a3d3f19b859cf7f14e3
-size 94605236

 version https://git-lfs.github.com/spec/v1
+oid sha256:ee69a5f32c76aff88bafb09f31379dc625e51e3f71d842a4cf30ecd69cb92b56
+size 31529032

Utils/PLBERT/step_410000.t7 → weights/filatov.bin RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:02a4b690d254645b11dcc698ced7e6e0e24db1696ffff76d16f4712f8f486f60
-size 3098773241

 version https://git-lfs.github.com/spec/v1
+oid sha256:25e78d882ec4ee5a8a361749004edf6914137760f2be33a71ea24ce22da1a24a
+size 748848243

Utils/JDC/bst.t7 → weights/jdc.bin RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:54dc94364b97e18ac1dfa6287714ed121248cfaac4cfd39d061c6e0a089ef169
-size 21029926

 version https://git-lfs.github.com/spec/v1
+oid sha256:87aabb4a35814b581d13f4cc6ee352e99e80ede7fb6e7e963ea85b1feee940ac
+size 21023821

weights/plbert.bin ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:74284b3d86962b0c4b52f2d3fe8507fc1ad87ae1defd1e7ddf28677380a290ef
+size 25188799