Serhiy Stetskovych commited on
Commit
81cbc69
β€’
1 Parent(s): 95f2409

Better accents, and refactor

Browse files
.gitattributes CHANGED
@@ -37,3 +37,7 @@ Utils/JDC/bst.t7 filter=lfs diff=lfs merge=lfs -text
37
  Utils/PLBERT/step_410000.t7 filter=lfs diff=lfs merge=lfs -text
38
  epoch_2nd_00027_filatov_whisp_cont.pth filter=lfs diff=lfs merge=lfs -text
39
  Utils/ASR/epoch_00100.pth filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
37
  Utils/PLBERT/step_410000.t7 filter=lfs diff=lfs merge=lfs -text
38
  epoch_2nd_00027_filatov_whisp_cont.pth filter=lfs diff=lfs merge=lfs -text
39
  Utils/ASR/epoch_00100.pth filter=lfs diff=lfs merge=lfs -text
40
+ weights/plbert.bin filter=lfs diff=lfs merge=lfs -text
41
+ weights/asr.bin filter=lfs diff=lfs merge=lfs -text
42
+ weights/filatov.bin filter=lfs diff=lfs merge=lfs -text
43
+ weights/jdc.bin filter=lfs diff=lfs merge=lfs -text
Utils/PLBERT/util.py CHANGED
@@ -12,31 +12,13 @@ class CustomAlbert(AlbertModel):
12
  return outputs.last_hidden_state
13
 
14
 
15
- def load_plbert(log_dir):
16
- config_path = os.path.join(log_dir, "config.yml")
17
  plbert_config = yaml.safe_load(open(config_path))
18
 
19
  albert_base_configuration = AlbertConfig(**plbert_config['model_params'])
20
  bert = CustomAlbert(albert_base_configuration)
21
 
22
- files = os.listdir(log_dir)
23
- ckpts = []
24
- for f in os.listdir(log_dir):
25
- if f.startswith("step_"): ckpts.append(f)
26
-
27
- iters = [int(f.split('_')[-1].split('.')[0]) for f in ckpts if os.path.isfile(os.path.join(log_dir, f))]
28
- iters = sorted(iters)[-1]
29
-
30
- checkpoint = torch.load(log_dir + "/step_" + str(iters) + ".t7", map_location='cpu')
31
- state_dict = checkpoint['net']
32
- from collections import OrderedDict
33
- new_state_dict = OrderedDict()
34
- for k, v in state_dict.items():
35
- name = k[7:] # remove `module.`
36
- if name.startswith('encoder.'):
37
- name = name[8:] # remove `encoder.`
38
- new_state_dict[name] = v
39
- #del new_state_dict["embeddings.position_ids"]
40
- bert.load_state_dict(new_state_dict, strict=False)
41
 
42
  return bert
 
12
  return outputs.last_hidden_state
13
 
14
 
15
+ def load_plbert(wights_path, config_path):
 
16
  plbert_config = yaml.safe_load(open(config_path))
17
 
18
  albert_base_configuration = AlbertConfig(**plbert_config['model_params'])
19
  bert = CustomAlbert(albert_base_configuration)
20
 
21
+ state_dict = torch.load(wights_path, map_location='cpu')
22
+ bert.load_state_dict(state_dict, strict=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  return bert
infer.py CHANGED
@@ -20,7 +20,7 @@ import torchaudio
20
  from ipa_uk import ipa
21
  from unicodedata import normalize
22
  from ukrainian_word_stress import Stressifier, StressSymbol
23
- stressify = Stressifier(stress_symbol=StressSymbol.CombiningAcuteAccent)
24
 
25
 
26
 
@@ -56,15 +56,15 @@ pitch_extractor = load_F0_models(F0_path)
56
 
57
  # load BERT model
58
  from Utils.PLBERT.util import load_plbert
59
- BERT_path = config.get('PLBERT_dir', False)
60
- plbert = load_plbert(BERT_path)
61
 
62
  model = build_model(recursive_munch(config['model_params']), text_aligner, pitch_extractor, plbert)
63
  _ = [model[key].eval() for key in model]
64
  _ = [model[key].to(device) for key in model]
65
 
66
- params_whole = torch.load('epoch_2nd_00027_filatov_whisp_cont.pth', map_location='cpu')
67
- params = params_whole['net']
68
 
69
  for key in model:
70
  if key in params:
@@ -110,14 +110,17 @@ def split_to_parts(text):
110
  def _inf(text, speed, s_prev, noise, alpha, diffusion_steps, embedding_scale):
111
  text = text.strip()
112
  text = text.replace('"', '')
113
- text = text.replace('+', 'ˈ')
114
  text = normalize('NFKC', text)
115
 
116
  text = re.sub(r'[α †β€β€‘β€’β€“β€”β€•β»β‚‹βˆ’βΈΊβΈ»]', '-', text)
117
  text = re.sub(r' - ', ': ', text)
118
- ps = ipa(stressify(text))
119
- #ps = text
120
- print(ps)
 
 
 
121
 
122
  tokens = textclenaer(ps)
123
  tokens.insert(0, 0)
 
20
  from ipa_uk import ipa
21
  from unicodedata import normalize
22
  from ukrainian_word_stress import Stressifier, StressSymbol
23
+ stressify = Stressifier()
24
 
25
 
26
 
 
56
 
57
  # load BERT model
58
  from Utils.PLBERT.util import load_plbert
59
+
60
+ plbert = load_plbert('weights/plbert.bin', 'Utils/PLBERT/config.yml')
61
 
62
  model = build_model(recursive_munch(config['model_params']), text_aligner, pitch_extractor, plbert)
63
  _ = [model[key].eval() for key in model]
64
  _ = [model[key].to(device) for key in model]
65
 
66
+ params = torch.load('weights/filatov.bin', map_location='cpu')
67
+
68
 
69
  for key in model:
70
  if key in params:
 
110
  def _inf(text, speed, s_prev, noise, alpha, diffusion_steps, embedding_scale):
111
  text = text.strip()
112
  text = text.replace('"', '')
113
+ text = text.replace('+', '\u0301')
114
  text = normalize('NFKC', text)
115
 
116
  text = re.sub(r'[α †β€β€‘β€’β€“β€”β€•β»β‚‹βˆ’βΈΊβΈ»]', '-', text)
117
  text = re.sub(r' - ', ': ', text)
118
+ stressed = stressify(text)
119
+
120
+
121
+ ps = ipa(stressed)
122
+
123
+ print(stressed)
124
 
125
  tokens = textclenaer(ps)
126
  tokens.insert(0, 0)
models.py CHANGED
@@ -585,7 +585,7 @@ def load_F0_models(path):
585
  # load F0 model
586
 
587
  F0_model = JDCNet(num_class=1, seq_len=192)
588
- params = torch.load(path, map_location='cpu')['net']
589
  F0_model.load_state_dict(params)
590
  _ = F0_model.train()
591
 
@@ -601,7 +601,7 @@ def load_ASR_models(ASR_MODEL_PATH, ASR_MODEL_CONFIG):
601
 
602
  def _load_model(model_config, model_path):
603
  model = ASRCNN(**model_config)
604
- params = torch.load(model_path, map_location='cpu')['model']
605
  model.load_state_dict(params)
606
  return model
607
 
 
585
  # load F0 model
586
 
587
  F0_model = JDCNet(num_class=1, seq_len=192)
588
+ params = torch.load(path, map_location='cpu')
589
  F0_model.load_state_dict(params)
590
  _ = F0_model.train()
591
 
 
601
 
602
  def _load_model(model_config, model_path):
603
  model = ASRCNN(**model_config)
604
+ params = torch.load(model_path, map_location='cpu')
605
  model.load_state_dict(params)
606
  return model
607
 
requirements.txt CHANGED
@@ -13,7 +13,7 @@ einops
13
  einops_exts
14
  txtsplit
15
  transformers
16
- ukrainian-word-stress
17
  git+https://github.com/patriotyk/ipa-uk.git
18
  spaces
19
  numpy<2
 
13
  einops_exts
14
  txtsplit
15
  transformers
16
+ git+https://github.com/patriotyk/ukrainian-word-stress.git
17
  git+https://github.com/patriotyk/ipa-uk.git
18
  spaces
19
  numpy<2
styletts_config.yml CHANGED
@@ -1,7 +1,7 @@
1
 
2
- F0_path: "Utils/JDC/bst.t7"
3
  ASR_config: "Utils/ASR/config.yml"
4
- ASR_path: "Utils/ASR/epoch_00100.pth"
5
  PLBERT_dir: 'Utils/PLBERT/'
6
 
7
 
 
1
 
2
+ F0_path: "weights/jdc.bin"
3
  ASR_config: "Utils/ASR/config.yml"
4
+ ASR_path: "weights/asr.bin"
5
  PLBERT_dir: 'Utils/PLBERT/'
6
 
7
 
Utils/ASR/epoch_00100.pth β†’ weights/asr.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b29394cfa9560a368c9dc67cfbc1f748b0b9c8565e40a3d3f19b859cf7f14e3
3
- size 94605236
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee69a5f32c76aff88bafb09f31379dc625e51e3f71d842a4cf30ecd69cb92b56
3
+ size 31529032
Utils/PLBERT/step_410000.t7 β†’ weights/filatov.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:02a4b690d254645b11dcc698ced7e6e0e24db1696ffff76d16f4712f8f486f60
3
- size 3098773241
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25e78d882ec4ee5a8a361749004edf6914137760f2be33a71ea24ce22da1a24a
3
+ size 748848243
Utils/JDC/bst.t7 β†’ weights/jdc.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54dc94364b97e18ac1dfa6287714ed121248cfaac4cfd39d061c6e0a089ef169
3
- size 21029926
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87aabb4a35814b581d13f4cc6ee352e99e80ede7fb6e7e963ea85b1feee940ac
3
+ size 21023821
weights/plbert.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74284b3d86962b0c4b52f2d3fe8507fc1ad87ae1defd1e7ddf28677380a290ef
3
+ size 25188799