Kamtera commited on
Commit
6f44090
1 Parent(s): b59a09d

Update train_vits-2.py

Browse files
Files changed (1) hide show
  1. train_vits-2.py +45 -4
train_vits-2.py CHANGED
@@ -10,13 +10,54 @@ from TTS.tts.models.vits import CharactersConfig, Vits, VitsArgs, VitsAudioConf
10
  from TTS.tts.utils.text.tokenizer import TTSTokenizer
11
  from TTS.utils.audio import AudioProcessor
12
  from TTS.tts.utils.speakers import SpeakerManager
13
- from TTS.tts.datasets.formatters import mozilla_with_speaker
14
 
15
  output_path = os.path.dirname(os.path.abspath(__file__))
16
 
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  dataset_config = BaseDatasetConfig(
19
- formatter="mozilla_with_speaker",
 
20
  dataset_name="multi_persian",
21
  meta_file_train="metadata.csv",
22
  language="fa",
@@ -99,7 +140,7 @@ config = VitsConfig(
99
  ["مردی نزد بقالی آمد و گفت پیاز هم ده تا دهان بدان خو شبوی سازم.","farid",None,"fa"],
100
  ["از مال خود پاره ای گوشت بستان و زیره بایی معطّر بساز","dilara",None,"fa"],
101
  ["یک بار هم از جهنم بگویید.","changiz",None,"fa"],
102
- ["یکی اسبی به عاریت خواست","changiz",None,"fa"]
103
  ],
104
  output_path=output_path,
105
  datasets=[audio_config],
@@ -134,7 +175,7 @@ tokenizer, config = TTSTokenizer.init_from_config(config)
134
  # Load all the datasets samples and split traning and evaluation sets
135
  train_samples, eval_samples = load_tts_samples(
136
  config.datasets,
137
- # formatter=mozilla_with_speaker,
138
  eval_split=True,
139
  eval_split_max_size=config.eval_split_max_size,
140
  eval_split_size=config.eval_split_size,
 
10
  from TTS.tts.utils.text.tokenizer import TTSTokenizer
11
  from TTS.utils.audio import AudioProcessor
12
  from TTS.tts.utils.speakers import SpeakerManager
13
+ # from TTS.tts.datasets.formatters import mozilla_with_speaker
14
 
15
  output_path = os.path.dirname(os.path.abspath(__file__))
16
 
17
 
18
+
19
+
20
+ def mozilla_with_speaker(root_path, meta_file, **kwargs):
21
+ """Loades three kaggle datasets in Mozilla format as a multispeaker dataset
22
+ Kaggle datasets are:
23
+ magnoliasis/persian-tts-dataset-famale
24
+ magnoliasis/persian-tts-dataset
25
+ magnoliasis/persian-tts-dataset-male
26
+
27
+ This function is very usefull while using kaggle notebooks.
28
+
29
+ Args:
30
+ root_path (str): root folder where all three datasets downloaded. for example on kaggle notebooks: /kaggle/input
31
+ meta_files (str): list of meta files to be used in the training.
32
+ """
33
+ dataset_names={
34
+ "persian-tts-dataset-famale":"dilara",
35
+ "persian-tts-dataset":"changiz",
36
+ "persian-tts-dataset-male":"farid"
37
+ }
38
+ items = []
39
+ for data_root_path in dataset_names.keys():
40
+ new_root_path=os.path.join(root_path,data_root_path)
41
+ txt_file = os.path.join(new_root_path, meta_file)
42
+ speaker_name = dataset_names[data_root_path]
43
+ print(speaker_name)
44
+ with open(txt_file, "r", encoding="utf-8") as ttf:
45
+ for line in ttf:
46
+ cols = line.split("|")
47
+ wav_file = cols[1].strip()
48
+ text = cols[0].strip()
49
+ wav_file = os.path.join(new_root_path, "wavs", wav_file)
50
+ items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name, "root_path": new_root_path})
51
+ return items
52
+
53
+
54
+
55
+
56
+
57
+
58
  dataset_config = BaseDatasetConfig(
59
+ # formatter="mozilla_with_speaker",
60
+ formatter="mozilla",
61
  dataset_name="multi_persian",
62
  meta_file_train="metadata.csv",
63
  language="fa",
 
140
  ["مردی نزد بقالی آمد و گفت پیاز هم ده تا دهان بدان خو شبوی سازم.","farid",None,"fa"],
141
  ["از مال خود پاره ای گوشت بستان و زیره بایی معطّر بساز","dilara",None,"fa"],
142
  ["یک بار هم از جهنم بگویید.","changiz",None,"fa"],
143
+ ["یکی اسبی به عاریت خواست","changiz",None,"fa"],
144
  ],
145
  output_path=output_path,
146
  datasets=[audio_config],
 
175
  # Load all the datasets samples and split traning and evaluation sets
176
  train_samples, eval_samples = load_tts_samples(
177
  config.datasets,
178
+ formatter=mozilla_with_speaker,
179
  eval_split=True,
180
  eval_split_max_size=config.eval_split_max_size,
181
  eval_split_size=config.eval_split_size,